1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
6
7define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
8; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
9; X86:       # %bb.0:
10; X86-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0x4c,0x24,0x04]
11; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
12; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13; X86-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc1]
14; X86-NEXT:    vmovdqa32 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0xd1]
15; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
16; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
17; X86-NEXT:    retl # encoding: [0xc3]
18;
19; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
20; X64:       # %bb.0:
21; X64-NEXT:    vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf]
22; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
23; X64-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7]
24; X64-NEXT:    vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7]
25; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
26; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
27; X64-NEXT:    retq # encoding: [0xc3]
28  %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
29  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
30  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
31  %res3 = add <4 x i32> %res, %res1
32  %res4 = add <4 x i32> %res2, %res3
33  ret <4 x i32> %res4
34}
35
36
37declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
38
39define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
40; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
41; X86:       # %bb.0:
42; X86-NEXT:    vpbroadcastq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x4c,0x24,0x04]
43; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
44; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
45; X86-NEXT:    vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
46; X86-NEXT:    vmovdqa64 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xd1]
47; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
48; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
49; X86-NEXT:    retl # encoding: [0xc3]
50;
51; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
52; X64:       # %bb.0:
53; X64-NEXT:    vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf]
54; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
55; X64-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7]
56; X64-NEXT:    vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7]
57; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
58; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
59; X64-NEXT:    retq # encoding: [0xc3]
60  %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
61  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
62  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
63  %res3 = add <2 x i64> %res, %res1
64  %res4 = add <2 x i64> %res2, %res3
65  ret <2 x i64> %res4
66}
67
68
69 declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
70
71  define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
72; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
73; X86:       # %bb.0:
74; X86-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x58,0x4c,0x24,0x04]
75; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
76; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
77; X86-NEXT:    vmovdqa32 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc1]
78; X86-NEXT:    vmovdqa32 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0xd1]
79; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
80; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
81; X86-NEXT:    retl # encoding: [0xc3]
82;
83; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
84; X64:       # %bb.0:
85; X64-NEXT:    vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf]
86; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
87; X64-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7]
88; X64-NEXT:    vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7]
89; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
90; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
91; X64-NEXT:    retq # encoding: [0xc3]
92  %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
93  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
94  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
95  %res3 = add <8 x i32> %res, %res1
96  %res4 = add <8 x i32> %res2, %res3
97  ret <8 x i32> %res4
98}
99
100declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
101
102define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
103; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
104; X86:       # %bb.0:
105; X86-NEXT:    vpbroadcastq {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x4c,0x24,0x04]
106; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
107; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
108; X86-NEXT:    vmovdqa64 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc1]
109; X86-NEXT:    vmovdqa64 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xd1]
110; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
111; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
112; X86-NEXT:    retl # encoding: [0xc3]
113;
114; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
115; X64:       # %bb.0:
116; X64-NEXT:    vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf]
117; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
118; X64-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7]
119; X64-NEXT:    vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7]
120; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
121; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
122; X64-NEXT:    retq # encoding: [0xc3]
123  %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
124  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
125  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
126  %res3 = add <4 x i64> %res, %res1
127  %res4 = add <4 x i64> %res2, %res3
128  ret <4 x i64> %res4
129}
130
131
132
133declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
134
135define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i32 * %y_ptr) {
136; X86-LABEL: test_int_x86_avx512_pbroadcastd_256:
137; X86:       # %bb.0:
138; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
139; X86-NEXT:    vbroadcastss (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x00]
140; X86-NEXT:    retl # encoding: [0xc3]
141;
142; X64-LABEL: test_int_x86_avx512_pbroadcastd_256:
143; X64:       # %bb.0:
144; X64-NEXT:    vbroadcastss (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0x07]
145; X64-NEXT:    retq # encoding: [0xc3]
146  %y_32  = load i32, i32 * %y_ptr
147  %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
148  %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1)
149  ret <8 x i32> %res
150}
151
152define <8 x i32>@test_int_x86_avx512_mask_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) {
153; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_256:
154; X86:       # %bb.0:
155; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
156; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
157; X86-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
158; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
159; X86-NEXT:    retl # encoding: [0xc3]
160;
161; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_256:
162; X64:       # %bb.0:
163; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
164; X64-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
165; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
166; X64-NEXT:    retq # encoding: [0xc3]
167  %y_32  = load i32, i32 * %y_ptr
168  %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
169  %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
170  ret <8 x i32> %res
171}
172
173define <8 x i32>@test_int_x86_avx512_maskz_pbroadcastd_256(<4 x i32> %x0, i8 %mask, i32 * %y_ptr) {
174; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_256:
175; X86:       # %bb.0:
176; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
177; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
178; X86-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
179; X86-NEXT:    retl # encoding: [0xc3]
180;
181; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_256:
182; X64:       # %bb.0:
183; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
184; X64-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
185; X64-NEXT:    retq # encoding: [0xc3]
186  %y_32  = load i32, i32 * %y_ptr
187  %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
188  %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
189  ret <8 x i32> %res
190}
191
192declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
193
194define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1) {
195; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
198; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
199  %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
200  ret <4 x i32> %res
201}
202
203define <4 x i32>@test_int_x86_avx512_mask_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
204; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_128:
205; X86:       # %bb.0:
206; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
207; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
208; X86-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
209; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
210; X86-NEXT:    retl # encoding: [0xc3]
211;
212; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_128:
213; X64:       # %bb.0:
214; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
215; X64-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
216; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
217; X64-NEXT:    retq # encoding: [0xc3]
218  %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
219  ret <4 x i32> %res
220}
221
222define <4 x i32>@test_int_x86_avx512_maskz_pbroadcastd_128(<4 x i32> %x0, i8 %mask) {
223; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastd_128:
224; X86:       # %bb.0:
225; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
226; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
227; X86-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
228; X86-NEXT:    retl # encoding: [0xc3]
229;
230; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastd_128:
231; X64:       # %bb.0:
232; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
233; X64-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
234; X64-NEXT:    retq # encoding: [0xc3]
235  %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
236  ret <4 x i32> %res
237}
238
239declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
240
241define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1) {
242; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
243; CHECK:       # %bb.0:
244; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc0]
245; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
246  %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
247  ret <4 x i64> %res
248}
249
250define <4 x i64>@test_int_x86_avx512_mask_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
251; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_256:
252; X86:       # %bb.0:
253; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
254; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
255; X86-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
256; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
257; X86-NEXT:    retl # encoding: [0xc3]
258;
259; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_256:
260; X64:       # %bb.0:
261; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
262; X64-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
263; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
264; X64-NEXT:    retq # encoding: [0xc3]
265  %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
266  ret <4 x i64> %res
267}
268
269define <4 x i64>@test_int_x86_avx512_maskz_pbroadcastq_256(<2 x i64> %x0, i8 %mask) {
270; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_256:
271; X86:       # %bb.0:
272; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
273; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
274; X86-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
275; X86-NEXT:    retl # encoding: [0xc3]
276;
277; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_256:
278; X64:       # %bb.0:
279; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
280; X64-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
281; X64-NEXT:    retq # encoding: [0xc3]
282  %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
283  ret <4 x i64> %res
284}
285
286declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
287
288define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1) {
289; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
292; CHECK-NEXT:    # xmm0 = xmm0[0,0]
293; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
294  %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
295  ret <2 x i64> %res
296}
297
298define <2 x i64>@test_int_x86_avx512_mask_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
299; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_128:
300; X86:       # %bb.0:
301; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
302; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
303; X86-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
304; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
305; X86-NEXT:    retl # encoding: [0xc3]
306;
307; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_128:
308; X64:       # %bb.0:
309; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
310; X64-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
311; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
312; X64-NEXT:    retq # encoding: [0xc3]
313  %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
314  ret <2 x i64> %res
315}
316
317define <2 x i64>@test_int_x86_avx512_maskz_pbroadcastq_128(<2 x i64> %x0, i8 %mask) {
318; X86-LABEL: test_int_x86_avx512_maskz_pbroadcastq_128:
319; X86:       # %bb.0:
320; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
321; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
322; X86-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
323; X86-NEXT:    retl # encoding: [0xc3]
324;
325; X64-LABEL: test_int_x86_avx512_maskz_pbroadcastq_128:
326; X64:       # %bb.0:
327; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
328; X64-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
329; X64-NEXT:    retq # encoding: [0xc3]
330  %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
331  ret <2 x i64> %res
332}
333
334declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
335
336define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1) {
337; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
338; CHECK:       # %bb.0:
339; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc0]
340; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
341  %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> undef, i8 -1)
342  ret <4 x double> %res
343}
344
345define <4 x double> @test_x86_mask_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
346; X86-LABEL: test_x86_mask_vbroadcast_sd_pd_256:
347; X86:       # %bb.0:
348; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
349; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
350; X86-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
351; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
352; X86-NEXT:    retl # encoding: [0xc3]
353;
354; X64-LABEL: test_x86_mask_vbroadcast_sd_pd_256:
355; X64:       # %bb.0:
356; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
357; X64-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
358; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
359; X64-NEXT:    retq # encoding: [0xc3]
360  %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
361  ret <4 x double> %res
362}
363
364define <4 x double> @test_x86_maskz_vbroadcast_sd_pd_256(<2 x double> %a0, i8 %mask ) {
365; X86-LABEL: test_x86_maskz_vbroadcast_sd_pd_256:
366; X86:       # %bb.0:
367; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
368; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
369; X86-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
370; X86-NEXT:    retl # encoding: [0xc3]
371;
372; X64-LABEL: test_x86_maskz_vbroadcast_sd_pd_256:
373; X64:       # %bb.0:
374; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
375; X64-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
376; X64-NEXT:    retq # encoding: [0xc3]
377  %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
378  ret <4 x double> %res
379}
380
381declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
382
383define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1) {
384; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
385; CHECK:       # %bb.0:
386; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xc0]
387; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
388  %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
389  ret <8 x float> %res
390}
391
392define <8 x float> @test_x86_mask_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
393; X86-LABEL: test_x86_mask_vbroadcast_ss_ps_256:
394; X86:       # %bb.0:
395; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
396; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
397; X86-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
398; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
399; X86-NEXT:    retl # encoding: [0xc3]
400;
401; X64-LABEL: test_x86_mask_vbroadcast_ss_ps_256:
402; X64:       # %bb.0:
403; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
404; X64-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
405; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
406; X64-NEXT:    retq # encoding: [0xc3]
407  %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
408  ret <8 x float> %res
409}
410
411define <8 x float> @test_x86_maskz_vbroadcast_ss_ps_256(<4 x float> %a0, i8 %mask ) {
412; X86-LABEL: test_x86_maskz_vbroadcast_ss_ps_256:
413; X86:       # %bb.0:
414; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
415; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
416; X86-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
417; X86-NEXT:    retl # encoding: [0xc3]
418;
419; X64-LABEL: test_x86_maskz_vbroadcast_ss_ps_256:
420; X64:       # %bb.0:
421; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
422; X64-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
423; X64-NEXT:    retq # encoding: [0xc3]
424  %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
425  ret <8 x float> %res
426}
427
428declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
429
430define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1) {
431; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
432; CHECK:       # %bb.0:
433; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0]
434; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
435  %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> undef, i8 -1)
436  ret <4 x float> %res
437}
438
439
440define <4 x float> @test_x86_mask_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
441; X86-LABEL: test_x86_mask_vbroadcast_ss_ps_128:
442; X86:       # %bb.0:
443; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
444; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
445; X86-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
446; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
447; X86-NEXT:    retl # encoding: [0xc3]
448;
449; X64-LABEL: test_x86_mask_vbroadcast_ss_ps_128:
450; X64:       # %bb.0:
451; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
452; X64-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
453; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
454; X64-NEXT:    retq # encoding: [0xc3]
455  %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
456  ret <4 x float> %res
457}
458
459define <4 x float> @test_x86_maskz_vbroadcast_ss_ps_128(<4 x float> %a0, i8 %mask ) {
460; X86-LABEL: test_x86_maskz_vbroadcast_ss_ps_128:
461; X86:       # %bb.0:
462; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
463; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
464; X86-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
465; X86-NEXT:    retl # encoding: [0xc3]
466;
467; X64-LABEL: test_x86_maskz_vbroadcast_ss_ps_128:
468; X64:       # %bb.0:
469; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
470; X64-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
471; X64-NEXT:    retq # encoding: [0xc3]
472  %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
473  ret <4 x float> %res
474}
475
476declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
477
478define <4 x float>@test_int_x86_avx512_movsldup_128(<4 x float> %x0, <4 x float> %x1) {
479; CHECK-LABEL: test_int_x86_avx512_movsldup_128:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    vmovsldup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xc0]
482; CHECK-NEXT:    # xmm0 = xmm0[0,0,2,2]
483; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
484  %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
485  ret <4 x float> %res
486}
487
488define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
489; X86-LABEL: test_int_x86_avx512_mask_movsldup_128:
490; X86:       # %bb.0:
491; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
492; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
493; X86-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
494; X86-NEXT:    # xmm1 {%k1} = xmm0[0,0,2,2]
495; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
496; X86-NEXT:    retl # encoding: [0xc3]
497;
498; X64-LABEL: test_int_x86_avx512_mask_movsldup_128:
499; X64:       # %bb.0:
500; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
501; X64-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
502; X64-NEXT:    # xmm1 {%k1} = xmm0[0,0,2,2]
503; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
504; X64-NEXT:    retq # encoding: [0xc3]
505  %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
506  ret <4 x float> %res
507}
508
509define <4 x float>@test_int_x86_avx512_maskz_movsldup_128(<4 x float> %x0, i8 %x2) {
510; X86-LABEL: test_int_x86_avx512_maskz_movsldup_128:
511; X86:       # %bb.0:
512; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
513; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
514; X86-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
515; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0,2,2]
516; X86-NEXT:    retl # encoding: [0xc3]
517;
518; X64-LABEL: test_int_x86_avx512_maskz_movsldup_128:
519; X64:       # %bb.0:
520; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
521; X64-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
522; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0,2,2]
523; X64-NEXT:    retq # encoding: [0xc3]
524  %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
525  ret <4 x float> %res
526}
527
528declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
529
530define <8 x float>@test_int_x86_avx512_movsldup_256(<8 x float> %x0, <8 x float> %x1) {
531; CHECK-LABEL: test_int_x86_avx512_movsldup_256:
532; CHECK:       # %bb.0:
533; CHECK-NEXT:    vmovsldup %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xc0]
534; CHECK-NEXT:    # ymm0 = ymm0[0,0,2,2,4,4,6,6]
535; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
536  %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
537  ret <8 x float> %res
538}
539
540define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
541; X86-LABEL: test_int_x86_avx512_mask_movsldup_256:
542; X86:       # %bb.0:
543; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
544; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
545; X86-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
546; X86-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
547; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
548; X86-NEXT:    retl # encoding: [0xc3]
549;
550; X64-LABEL: test_int_x86_avx512_mask_movsldup_256:
551; X64:       # %bb.0:
552; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
553; X64-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
554; X64-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
555; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
556; X64-NEXT:    retq # encoding: [0xc3]
557  %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
558  ret <8 x float> %res
559}
560
561define <8 x float>@test_int_x86_avx512_maskz_movsldup_256(<8 x float> %x0, i8 %x2) {
562; X86-LABEL: test_int_x86_avx512_maskz_movsldup_256:
563; X86:       # %bb.0:
564; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
565; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
566; X86-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
567; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
568; X86-NEXT:    retl # encoding: [0xc3]
569;
570; X64-LABEL: test_int_x86_avx512_maskz_movsldup_256:
571; X64:       # %bb.0:
572; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
573; X64-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
574; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
575; X64-NEXT:    retq # encoding: [0xc3]
576  %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
577  ret <8 x float> %res
578}
579
580declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
581
582define <4 x float>@test_int_x86_avx512_movshdup_128(<4 x float> %x0, <4 x float> %x1) {
583; CHECK-LABEL: test_int_x86_avx512_movshdup_128:
584; CHECK:       # %bb.0:
585; CHECK-NEXT:    vmovshdup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xc0]
586; CHECK-NEXT:    # xmm0 = xmm0[1,1,3,3]
587; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
588  %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
589  ret <4 x float> %res
590}
591
592define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
593; X86-LABEL: test_int_x86_avx512_mask_movshdup_128:
594; X86:       # %bb.0:
595; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
596; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
597; X86-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
598; X86-NEXT:    # xmm1 {%k1} = xmm0[1,1,3,3]
599; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
600; X86-NEXT:    retl # encoding: [0xc3]
601;
602; X64-LABEL: test_int_x86_avx512_mask_movshdup_128:
603; X64:       # %bb.0:
604; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
605; X64-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
606; X64-NEXT:    # xmm1 {%k1} = xmm0[1,1,3,3]
607; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
608; X64-NEXT:    retq # encoding: [0xc3]
609  %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
610  ret <4 x float> %res
611}
612
613define <4 x float>@test_int_x86_avx512_maskz_movshdup_128(<4 x float> %x0, i8 %x2) {
614; X86-LABEL: test_int_x86_avx512_maskz_movshdup_128:
615; X86:       # %bb.0:
616; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
617; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
618; X86-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
619; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1,1,3,3]
620; X86-NEXT:    retl # encoding: [0xc3]
621;
622; X64-LABEL: test_int_x86_avx512_maskz_movshdup_128:
623; X64:       # %bb.0:
624; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
625; X64-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
626; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1,1,3,3]
627; X64-NEXT:    retq # encoding: [0xc3]
628  %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
629  ret <4 x float> %res
630}
631
632declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
633
634define <8 x float>@test_int_x86_avx512_movshdup_256(<8 x float> %x0, <8 x float> %x1) {
635; CHECK-LABEL: test_int_x86_avx512_movshdup_256:
636; CHECK:       # %bb.0:
637; CHECK-NEXT:    vmovshdup %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xc0]
638; CHECK-NEXT:    # ymm0 = ymm0[1,1,3,3,5,5,7,7]
639; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
640  %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
641  ret <8 x float> %res
642}
643
644define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
645; X86-LABEL: test_int_x86_avx512_mask_movshdup_256:
646; X86:       # %bb.0:
647; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
648; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
649; X86-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
650; X86-NEXT:    # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
651; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
652; X86-NEXT:    retl # encoding: [0xc3]
653;
654; X64-LABEL: test_int_x86_avx512_mask_movshdup_256:
655; X64:       # %bb.0:
656; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
657; X64-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
658; X64-NEXT:    # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
659; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
660; X64-NEXT:    retq # encoding: [0xc3]
661  %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
662  ret <8 x float> %res
663}
664
665define <8 x float>@test_int_x86_avx512_maskz_movshdup_256(<8 x float> %x0, i8 %x2) {
666; X86-LABEL: test_int_x86_avx512_maskz_movshdup_256:
667; X86:       # %bb.0:
668; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
669; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
670; X86-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
671; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
672; X86-NEXT:    retl # encoding: [0xc3]
673;
674; X64-LABEL: test_int_x86_avx512_maskz_movshdup_256:
675; X64:       # %bb.0:
676; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
677; X64-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
678; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
679; X64-NEXT:    retq # encoding: [0xc3]
680  %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
681  ret <8 x float> %res
682}
683
684declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
685
686define <2 x double>@test_int_x86_avx512_movddup_128(<2 x double> %x0, <2 x double> %x1) {
687; CHECK-LABEL: test_int_x86_avx512_movddup_128:
688; CHECK:       # %bb.0:
689; CHECK-NEXT:    vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0]
690; CHECK-NEXT:    # xmm0 = xmm0[0,0]
691; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
692  %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
693  ret <2 x double> %res
694}
695
696define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
697; X86-LABEL: test_int_x86_avx512_mask_movddup_128:
698; X86:       # %bb.0:
699; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
700; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
701; X86-NEXT:    vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
702; X86-NEXT:    # xmm1 {%k1} = xmm0[0,0]
703; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
704; X86-NEXT:    retl # encoding: [0xc3]
705;
706; X64-LABEL: test_int_x86_avx512_mask_movddup_128:
707; X64:       # %bb.0:
708; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
709; X64-NEXT:    vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
710; X64-NEXT:    # xmm1 {%k1} = xmm0[0,0]
711; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
712; X64-NEXT:    retq # encoding: [0xc3]
713  %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
714  ret <2 x double> %res
715}
716
717define <2 x double>@test_int_x86_avx512_maskz_movddup_128(<2 x double> %x0, i8 %x2) {
718; X86-LABEL: test_int_x86_avx512_maskz_movddup_128:
719; X86:       # %bb.0:
720; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
721; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
722; X86-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
723; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0]
724; X86-NEXT:    retl # encoding: [0xc3]
725;
726; X64-LABEL: test_int_x86_avx512_maskz_movddup_128:
727; X64:       # %bb.0:
728; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
729; X64-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
730; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0]
731; X64-NEXT:    retq # encoding: [0xc3]
732  %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
733  ret <2 x double> %res
734}
735
736declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
737
738define <4 x double>@test_int_x86_avx512_movddup_256(<4 x double> %x0, <4 x double> %x1) {
739; CHECK-LABEL: test_int_x86_avx512_movddup_256:
740; CHECK:       # %bb.0:
741; CHECK-NEXT:    vmovddup %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xc0]
742; CHECK-NEXT:    # ymm0 = ymm0[0,0,2,2]
743; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
744  %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
745  ret <4 x double> %res
746}
747
748
749define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
750; X86-LABEL: test_int_x86_avx512_mask_movddup_256:
751; X86:       # %bb.0:
752; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
753; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
754; X86-NEXT:    vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
755; X86-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2]
756; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
757; X86-NEXT:    retl # encoding: [0xc3]
758;
759; X64-LABEL: test_int_x86_avx512_mask_movddup_256:
760; X64:       # %bb.0:
761; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
762; X64-NEXT:    vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
763; X64-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2]
764; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
765; X64-NEXT:    retq # encoding: [0xc3]
766  %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
767  ret <4 x double> %res
768}
769
770define <4 x double>@test_int_x86_avx512_maskz_movddup_256(<4 x double> %x0, i8 %x2) {
771; X86-LABEL: test_int_x86_avx512_maskz_movddup_256:
772; X86:       # %bb.0:
773; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
774; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
775; X86-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
776; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2]
777; X86-NEXT:    retl # encoding: [0xc3]
778;
779; X64-LABEL: test_int_x86_avx512_maskz_movddup_256:
780; X64:       # %bb.0:
781; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
782; X64-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
783; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2]
784; X64-NEXT:    retq # encoding: [0xc3]
785  %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
786  ret <4 x double> %res
787}
788
789declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
790
791define <4 x double>@test_int_x86_avx512_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2) {
792; CHECK-LABEL: test_int_x86_avx512_vpermil_pd_256:
793; CHECK:       # %bb.0:
794; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x06]
795; CHECK-NEXT:    # ymm0 = ymm0[0,1,3,2]
796; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
797  %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
798  ret <4 x double> %res
799}
800
801define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
802; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
803; X86:       # %bb.0:
804; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
805; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
806; X86-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
807; X86-NEXT:    # ymm1 {%k1} = ymm0[0,1,3,2]
808; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
809; X86-NEXT:    retl # encoding: [0xc3]
810;
811; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
812; X64:       # %bb.0:
813; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
814; X64-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
815; X64-NEXT:    # ymm1 {%k1} = ymm0[0,1,3,2]
816; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
817; X64-NEXT:    retq # encoding: [0xc3]
818  %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
819  ret <4 x double> %res
820}
821
822define <4 x double>@test_int_x86_avx512_maskz_vpermil_pd_256(<4 x double> %x0, i8 %x3) {
823; X86-LABEL: test_int_x86_avx512_maskz_vpermil_pd_256:
824; X86:       # %bb.0:
825; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
826; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
827; X86-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
828; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,3,2]
829; X86-NEXT:    retl # encoding: [0xc3]
830;
831; X64-LABEL: test_int_x86_avx512_maskz_vpermil_pd_256:
832; X64:       # %bb.0:
833; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
834; X64-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
835; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,3,2]
836; X64-NEXT:    retq # encoding: [0xc3]
837  %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
838  ret <4 x double> %res
839}
840
841declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
842
843define <2 x double>@test_int_x86_avx512_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2) {
844; CHECK-LABEL: test_int_x86_avx512_vpermil_pd_128:
845; CHECK:       # %bb.0:
846; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
847; CHECK-NEXT:    # xmm0 = xmm0[1,0]
848; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
849  %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
850  ret <2 x double> %res
851}
852
853define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
854; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
855; X86:       # %bb.0:
856; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
857; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
858; X86-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
859; X86-NEXT:    # xmm1 {%k1} = xmm0[1,0]
860; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
861; X86-NEXT:    retl # encoding: [0xc3]
862;
863; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
864; X64:       # %bb.0:
865; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
866; X64-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
867; X64-NEXT:    # xmm1 {%k1} = xmm0[1,0]
868; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
869; X64-NEXT:    retq # encoding: [0xc3]
870  %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
871  ret <2 x double> %res
872}
873
874define <2 x double>@test_int_x86_avx512_maskz_vpermil_pd_128(<2 x double> %x0, i8 %x3) {
875; X86-LABEL: test_int_x86_avx512_maskz_vpermil_pd_128:
876; X86:       # %bb.0:
877; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
878; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
879; X86-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
880; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1,0]
881; X86-NEXT:    retl # encoding: [0xc3]
882;
883; X64-LABEL: test_int_x86_avx512_maskz_vpermil_pd_128:
884; X64:       # %bb.0:
885; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
886; X64-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
887; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1,0]
888; X64-NEXT:    retq # encoding: [0xc3]
889  %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
890  ret <2 x double> %res
891}
892
893declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
894
895define <8 x float>@test_int_x86_avx512_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2) {
896; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_256:
897; CHECK:       # %bb.0:
898; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x16]
899; CHECK-NEXT:    # ymm0 = ymm0[2,1,1,0,6,5,5,4]
900; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
901  %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
902  ret <8 x float> %res
903}
904
905define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
906; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
907; X86:       # %bb.0:
908; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
909; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
910; X86-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
911; X86-NEXT:    # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
912; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
913; X86-NEXT:    retl # encoding: [0xc3]
914;
915; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
916; X64:       # %bb.0:
917; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
918; X64-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
919; X64-NEXT:    # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
920; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
921; X64-NEXT:    retq # encoding: [0xc3]
922  %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
923  ret <8 x float> %res
924}
925
926define <8 x float>@test_int_x86_avx512_maskz_vpermil_ps_256(<8 x float> %x0, i8 %x3) {
927; X86-LABEL: test_int_x86_avx512_maskz_vpermil_ps_256:
928; X86:       # %bb.0:
929; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
930; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
931; X86-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
932; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
933; X86-NEXT:    retl # encoding: [0xc3]
934;
935; X64-LABEL: test_int_x86_avx512_maskz_vpermil_ps_256:
936; X64:       # %bb.0:
937; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
938; X64-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
939; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
940; X64-NEXT:    retq # encoding: [0xc3]
941  %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
942  ret <8 x float> %res
943}
944
945declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
946
947define <4 x float>@test_int_x86_avx512_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2) {
948; CHECK-LABEL: test_int_x86_avx512_vpermil_ps_128:
949; CHECK:       # %bb.0:
950; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x16]
951; CHECK-NEXT:    # xmm0 = xmm0[2,1,1,0]
952; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
953  %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
954  ret <4 x float> %res
955}
956
957define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
958; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
959; X86:       # %bb.0:
960; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
961; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
962; X86-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
963; X86-NEXT:    # xmm1 {%k1} = xmm0[2,1,1,0]
964; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
965; X86-NEXT:    retl # encoding: [0xc3]
966;
967; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
968; X64:       # %bb.0:
969; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
970; X64-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
971; X64-NEXT:    # xmm1 {%k1} = xmm0[2,1,1,0]
972; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
973; X64-NEXT:    retq # encoding: [0xc3]
974  %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
975  ret <4 x float> %res
976}
977
978define <4 x float>@test_int_x86_avx512_maskz_vpermil_ps_128(<4 x float> %x0, i8 %x3) {
979; X86-LABEL: test_int_x86_avx512_maskz_vpermil_ps_128:
980; X86:       # %bb.0:
981; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
982; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
983; X86-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
984; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[2,1,1,0]
985; X86-NEXT:    retl # encoding: [0xc3]
986;
987; X64-LABEL: test_int_x86_avx512_maskz_vpermil_ps_128:
988; X64:       # %bb.0:
989; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
990; X64-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
991; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[2,1,1,0]
992; X64-NEXT:    retq # encoding: [0xc3]
993  %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
994  ret <4 x float> %res
995}
996
997declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
998
999define <4 x double>@test_int_x86_avx512_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2) {
1000; CHECK-LABEL: test_int_x86_avx512_perm_df_256:
1001; CHECK:       # %bb.0:
1002; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xc0,0x03]
1003; CHECK-NEXT:    # ymm0 = ymm0[3,0,0,0]
1004; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1005  %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
1006  ret <4 x double> %res
1007}
1008
1009define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
1010; X86-LABEL: test_int_x86_avx512_mask_perm_df_256:
1011; X86:       # %bb.0:
1012; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1013; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1014; X86-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
1015; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
1016; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
1017; X86-NEXT:    retl # encoding: [0xc3]
1018;
1019; X64-LABEL: test_int_x86_avx512_mask_perm_df_256:
1020; X64:       # %bb.0:
1021; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1022; X64-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
1023; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
1024; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
1025; X64-NEXT:    retq # encoding: [0xc3]
1026  %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
1027  ret <4 x double> %res
1028}
1029
1030define <4 x double>@test_int_x86_avx512_maskz_perm_df_256(<4 x double> %x0, i32 %x1, i8 %x3) {
1031; X86-LABEL: test_int_x86_avx512_maskz_perm_df_256:
1032; X86:       # %bb.0:
1033; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1034; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1035; X86-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
1036; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
1037; X86-NEXT:    retl # encoding: [0xc3]
1038;
1039; X64-LABEL: test_int_x86_avx512_maskz_perm_df_256:
1040; X64:       # %bb.0:
1041; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1042; X64-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
1043; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
1044; X64-NEXT:    retq # encoding: [0xc3]
1045  %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
1046  ret <4 x double> %res
1047}
1048
1049declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
1050
1051define <4 x i64>@test_int_x86_avx512_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2) {
1052; CHECK-LABEL: test_int_x86_avx512_perm_di_256:
1053; CHECK:       # %bb.0:
1054; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xc0,0x03]
1055; CHECK-NEXT:    # ymm0 = ymm0[3,0,0,0]
1056; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1057  %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
1058  ret <4 x i64> %res
1059}
1060
1061define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
1062; X86-LABEL: test_int_x86_avx512_mask_perm_di_256:
1063; X86:       # %bb.0:
1064; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1065; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1066; X86-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
1067; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
1068; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1069; X86-NEXT:    retl # encoding: [0xc3]
1070;
1071; X64-LABEL: test_int_x86_avx512_mask_perm_di_256:
1072; X64:       # %bb.0:
1073; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1074; X64-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
1075; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
1076; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
1077; X64-NEXT:    retq # encoding: [0xc3]
1078  %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
1079  ret <4 x i64> %res
1080}
1081
1082define <4 x i64>@test_int_x86_avx512_maskz_perm_di_256(<4 x i64> %x0, i32 %x1, i8 %x3) {
1083; X86-LABEL: test_int_x86_avx512_maskz_perm_di_256:
1084; X86:       # %bb.0:
1085; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1086; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1087; X86-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
1088; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
1089; X86-NEXT:    retl # encoding: [0xc3]
1090;
1091; X64-LABEL: test_int_x86_avx512_maskz_perm_di_256:
1092; X64:       # %bb.0:
1093; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1094; X64-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
1095; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
1096; X64-NEXT:    retq # encoding: [0xc3]
1097  %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
1098  ret <4 x i64> %res
1099}
1100
1101declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
1102
1103define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
1104; X86-LABEL: test_int_x86_avx512_mask_store_pd_128:
1105; X86:       # %bb.0:
1106; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1107; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1108; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1109; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1110; X86-NEXT:    vmovapd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x01]
1111; X86-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
1112; X86-NEXT:    retl # encoding: [0xc3]
1113;
1114; X64-LABEL: test_int_x86_avx512_mask_store_pd_128:
1115; X64:       # %bb.0:
1116; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1117; X64-NEXT:    vmovapd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
1118; X64-NEXT:    vmovapd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06]
1119; X64-NEXT:    retq # encoding: [0xc3]
1120  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
1121  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
1122  ret void
1123}
1124
1125declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
1126
1127define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
1128; X86-LABEL: test_int_x86_avx512_mask_store_pd_256:
1129; X86:       # %bb.0:
1130; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1131; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1132; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1133; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1134; X86-NEXT:    vmovapd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x01]
1135; X86-NEXT:    vmovapd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x00]
1136; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1137; X86-NEXT:    retl # encoding: [0xc3]
1138;
1139; X64-LABEL: test_int_x86_avx512_mask_store_pd_256:
1140; X64:       # %bb.0:
1141; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1142; X64-NEXT:    vmovapd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
1143; X64-NEXT:    vmovapd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06]
1144; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1145; X64-NEXT:    retq # encoding: [0xc3]
1146  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
1147  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
1148  ret void
1149}
1150
1151declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
1152
1153define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
1154; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
1155; X86:       # %bb.0:
1156; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1157; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1158; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1159; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1160; X86-NEXT:    vmovupd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x01]
1161; X86-NEXT:    vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
1162; X86-NEXT:    retl # encoding: [0xc3]
1163;
1164; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
1165; X64:       # %bb.0:
1166; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1167; X64-NEXT:    vmovupd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
1168; X64-NEXT:    vmovupd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06]
1169; X64-NEXT:    retq # encoding: [0xc3]
1170  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
1171  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
1172  ret void
1173}
1174
1175declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
1176
1177define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
1178; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
1179; X86:       # %bb.0:
1180; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1181; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1182; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1183; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1184; X86-NEXT:    vmovupd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x01]
1185; X86-NEXT:    vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
1186; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1187; X86-NEXT:    retl # encoding: [0xc3]
1188;
1189; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
1190; X64:       # %bb.0:
1191; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1192; X64-NEXT:    vmovupd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
1193; X64-NEXT:    vmovupd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06]
1194; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1195; X64-NEXT:    retq # encoding: [0xc3]
1196  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
1197  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
1198  ret void
1199}
1200
1201declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
1202
1203define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
1204; X86-LABEL: test_int_x86_avx512_mask_store_ps_128:
1205; X86:       # %bb.0:
1206; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1207; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1208; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1209; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1210; X86-NEXT:    vmovaps %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x01]
1211; X86-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
1212; X86-NEXT:    retl # encoding: [0xc3]
1213;
1214; X64-LABEL: test_int_x86_avx512_mask_store_ps_128:
1215; X64:       # %bb.0:
1216; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1217; X64-NEXT:    vmovaps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
1218; X64-NEXT:    vmovaps %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06]
1219; X64-NEXT:    retq # encoding: [0xc3]
1220    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
1221    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
1222    ret void
1223}
1224
1225declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
1226
1227define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
1228; X86-LABEL: test_int_x86_avx512_mask_store_ps_256:
1229; X86:       # %bb.0:
1230; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1231; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1232; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1233; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1234; X86-NEXT:    vmovaps %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x01]
1235; X86-NEXT:    vmovaps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x00]
1236; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1237; X86-NEXT:    retl # encoding: [0xc3]
1238;
1239; X64-LABEL: test_int_x86_avx512_mask_store_ps_256:
1240; X64:       # %bb.0:
1241; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1242; X64-NEXT:    vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
1243; X64-NEXT:    vmovaps %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06]
1244; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1245; X64-NEXT:    retq # encoding: [0xc3]
1246    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
1247    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
1248    ret void
1249}
1250
1251declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
1252
1253define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
1254; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
1255; X86:       # %bb.0:
1256; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1257; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1258; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1259; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1260; X86-NEXT:    vmovups %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x01]
1261; X86-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
1262; X86-NEXT:    retl # encoding: [0xc3]
1263;
1264; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
1265; X64:       # %bb.0:
1266; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1267; X64-NEXT:    vmovups %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
1268; X64-NEXT:    vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06]
1269; X64-NEXT:    retq # encoding: [0xc3]
1270    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
1271    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
1272    ret void
1273}
1274
1275declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
1276
1277define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
1278; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
1279; X86:       # %bb.0:
1280; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1281; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1282; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1283; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1284; X86-NEXT:    vmovups %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x01]
1285; X86-NEXT:    vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
1286; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1287; X86-NEXT:    retl # encoding: [0xc3]
1288;
1289; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
1290; X64:       # %bb.0:
1291; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1292; X64-NEXT:    vmovups %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
1293; X64-NEXT:    vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06]
1294; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1295; X64-NEXT:    retq # encoding: [0xc3]
1296    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
1297    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
1298    ret void
1299}
1300
1301declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
1302
1303define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
1304; X86-LABEL: test_int_x86_avx512_mask_storeu_q_128:
1305; X86:       # %bb.0:
1306; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1307; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1308; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1309; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1310; X86-NEXT:    vmovdqu64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x01]
1311; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
1312; X86-NEXT:    retl # encoding: [0xc3]
1313;
1314; X64-LABEL: test_int_x86_avx512_mask_storeu_q_128:
1315; X64:       # %bb.0:
1316; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1317; X64-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
1318; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
1319; X64-NEXT:    retq # encoding: [0xc3]
1320  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
1321  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
1322  ret void
1323}
1324
1325declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
1326
1327define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
1328; X86-LABEL: test_int_x86_avx512_mask_storeu_q_256:
1329; X86:       # %bb.0:
1330; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1331; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1332; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1333; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1334; X86-NEXT:    vmovdqu64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x01]
1335; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
1336; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1337; X86-NEXT:    retl # encoding: [0xc3]
1338;
1339; X64-LABEL: test_int_x86_avx512_mask_storeu_q_256:
1340; X64:       # %bb.0:
1341; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1342; X64-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
1343; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
1344; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1345; X64-NEXT:    retq # encoding: [0xc3]
1346  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
1347  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
1348  ret void
1349}
1350
1351declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
1352
1353define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
1354; X86-LABEL: test_int_x86_avx512_mask_storeu_d_128:
1355; X86:       # %bb.0:
1356; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1357; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1358; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1359; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1360; X86-NEXT:    vmovdqu32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x01]
1361; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
1362; X86-NEXT:    retl # encoding: [0xc3]
1363;
1364; X64-LABEL: test_int_x86_avx512_mask_storeu_d_128:
1365; X64:       # %bb.0:
1366; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1367; X64-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
1368; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
1369; X64-NEXT:    retq # encoding: [0xc3]
1370  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
1371  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
1372  ret void
1373}
1374
1375declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
1376
1377define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
1378; X86-LABEL: test_int_x86_avx512_mask_storeu_d_256:
1379; X86:       # %bb.0:
1380; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1381; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1382; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1383; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1384; X86-NEXT:    vmovdqu32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x01]
1385; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
1386; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1387; X86-NEXT:    retl # encoding: [0xc3]
1388;
1389; X64-LABEL: test_int_x86_avx512_mask_storeu_d_256:
1390; X64:       # %bb.0:
1391; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1392; X64-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
1393; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
1394; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1395; X64-NEXT:    retq # encoding: [0xc3]
1396  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
1397  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
1398  ret void
1399}
1400
1401declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
1402
1403define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
1404; X86-LABEL: test_int_x86_avx512_mask_store_q_128:
1405; X86:       # %bb.0:
1406; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1407; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1408; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1409; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1410; X86-NEXT:    vmovdqa64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x01]
1411; X86-NEXT:    vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
1412; X86-NEXT:    retl # encoding: [0xc3]
1413;
1414; X64-LABEL: test_int_x86_avx512_mask_store_q_128:
1415; X64:       # %bb.0:
1416; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1417; X64-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
1418; X64-NEXT:    vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
1419; X64-NEXT:    retq # encoding: [0xc3]
1420  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
1421  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
1422  ret void
1423}
1424
1425declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
1426
1427define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
1428; X86-LABEL: test_int_x86_avx512_mask_store_q_256:
1429; X86:       # %bb.0:
1430; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1431; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1432; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1433; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1434; X86-NEXT:    vmovdqa64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x01]
1435; X86-NEXT:    vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
1436; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1437; X86-NEXT:    retl # encoding: [0xc3]
1438;
1439; X64-LABEL: test_int_x86_avx512_mask_store_q_256:
1440; X64:       # %bb.0:
1441; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1442; X64-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
1443; X64-NEXT:    vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
1444; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1445; X64-NEXT:    retq # encoding: [0xc3]
1446  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
1447  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
1448  ret void
1449}
1450
1451declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
1452
1453define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
1454; X86-LABEL: test_int_x86_avx512_mask_store_d_128:
1455; X86:       # %bb.0:
1456; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1457; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1458; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1459; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1460; X86-NEXT:    vmovdqa32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x01]
1461; X86-NEXT:    vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
1462; X86-NEXT:    retl # encoding: [0xc3]
1463;
1464; X64-LABEL: test_int_x86_avx512_mask_store_d_128:
1465; X64:       # %bb.0:
1466; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1467; X64-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
1468; X64-NEXT:    vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
1469; X64-NEXT:    retq # encoding: [0xc3]
1470  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
1471  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
1472  ret void
1473}
1474
1475declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
1476
1477define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
1478; X86-LABEL: test_int_x86_avx512_mask_store_d_256:
1479; X86:       # %bb.0:
1480; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1481; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1482; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1483; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1484; X86-NEXT:    vmovdqa32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x01]
1485; X86-NEXT:    vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
1486; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1487; X86-NEXT:    retl # encoding: [0xc3]
1488;
1489; X64-LABEL: test_int_x86_avx512_mask_store_d_256:
1490; X64:       # %bb.0:
1491; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1492; X64-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
1493; X64-NEXT:    vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
1494; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1495; X64-NEXT:    retq # encoding: [0xc3]
1496  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
1497  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
1498  ret void
1499}
1500
1501define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
1502; X86-LABEL: test_mask_load_aligned_ps_256:
1503; X86:       # %bb.0:
1504; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1505; X86-NEXT:    vmovaps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x00]
1506; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1507; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1508; X86-NEXT:    vmovaps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x00]
1509; X86-NEXT:    vmovaps (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x08]
1510; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1511; X86-NEXT:    retl # encoding: [0xc3]
1512;
1513; X64-LABEL: test_mask_load_aligned_ps_256:
1514; X64:       # %bb.0:
1515; X64-NEXT:    vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
1516; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1517; X64-NEXT:    vmovaps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
1518; X64-NEXT:    vmovaps (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f]
1519; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1520; X64-NEXT:    retq # encoding: [0xc3]
1521  %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
1522  %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
1523  %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
1524  %res4 = fadd <8 x float> %res2, %res1
1525  ret <8 x float> %res4
1526}
1527
1528declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
1529
1530define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
1531; X86-LABEL: test_mask_load_unaligned_ps_256:
1532; X86:       # %bb.0:
1533; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1534; X86-NEXT:    vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
1535; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1536; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1537; X86-NEXT:    vmovups (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x00]
1538; X86-NEXT:    vmovups (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x08]
1539; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1540; X86-NEXT:    retl # encoding: [0xc3]
1541;
1542; X64-LABEL: test_mask_load_unaligned_ps_256:
1543; X64:       # %bb.0:
1544; X64-NEXT:    vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
1545; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1546; X64-NEXT:    vmovups (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
1547; X64-NEXT:    vmovups (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f]
1548; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1549; X64-NEXT:    retq # encoding: [0xc3]
1550  %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
1551  %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
1552  %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
1553  %res4 = fadd <8 x float> %res2, %res1
1554  ret <8 x float> %res4
1555}
1556
1557declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
1558
1559define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
1560; X86-LABEL: test_mask_load_aligned_pd_256:
1561; X86:       # %bb.0:
1562; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1563; X86-NEXT:    vmovapd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x00]
1564; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1565; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1566; X86-NEXT:    vmovapd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x00]
1567; X86-NEXT:    vmovapd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x08]
1568; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1569; X86-NEXT:    retl # encoding: [0xc3]
1570;
1571; X64-LABEL: test_mask_load_aligned_pd_256:
1572; X64:       # %bb.0:
1573; X64-NEXT:    vmovapd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07]
1574; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1575; X64-NEXT:    vmovapd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
1576; X64-NEXT:    vmovapd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f]
1577; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1578; X64-NEXT:    retq # encoding: [0xc3]
1579  %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
1580  %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
1581  %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
1582  %res4 = fadd <4 x double> %res2, %res1
1583  ret <4 x double> %res4
1584}
1585
1586declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
1587
1588define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
1589; X86-LABEL: test_mask_load_unaligned_pd_256:
1590; X86:       # %bb.0:
1591; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1592; X86-NEXT:    vmovupd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x00]
1593; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1594; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1595; X86-NEXT:    vmovupd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x00]
1596; X86-NEXT:    vmovupd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x08]
1597; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1598; X86-NEXT:    retl # encoding: [0xc3]
1599;
1600; X64-LABEL: test_mask_load_unaligned_pd_256:
1601; X64:       # %bb.0:
1602; X64-NEXT:    vmovupd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07]
1603; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1604; X64-NEXT:    vmovupd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
1605; X64-NEXT:    vmovupd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f]
1606; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1607; X64-NEXT:    retq # encoding: [0xc3]
1608  %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
1609  %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
1610  %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
1611  %res4 = fadd <4 x double> %res2, %res1
1612  ret <4 x double> %res4
1613}
1614
1615declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
1616
1617define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
1618; X86-LABEL: test_mask_load_aligned_ps_128:
1619; X86:       # %bb.0:
1620; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1621; X86-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
1622; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1623; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1624; X86-NEXT:    vmovaps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x00]
1625; X86-NEXT:    vmovaps (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x08]
1626; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1627; X86-NEXT:    retl # encoding: [0xc3]
1628;
1629; X64-LABEL: test_mask_load_aligned_ps_128:
1630; X64:       # %bb.0:
1631; X64-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
1632; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1633; X64-NEXT:    vmovaps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
1634; X64-NEXT:    vmovaps (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f]
1635; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1636; X64-NEXT:    retq # encoding: [0xc3]
1637  %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
1638  %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
1639  %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
1640  %res4 = fadd <4 x float> %res2, %res1
1641  ret <4 x float> %res4
1642}
1643
1644declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
1645
1646define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
1647; X86-LABEL: test_mask_load_unaligned_ps_128:
1648; X86:       # %bb.0:
1649; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1650; X86-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
1651; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1652; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1653; X86-NEXT:    vmovups (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x00]
1654; X86-NEXT:    vmovups (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x08]
1655; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1656; X86-NEXT:    retl # encoding: [0xc3]
1657;
1658; X64-LABEL: test_mask_load_unaligned_ps_128:
1659; X64:       # %bb.0:
1660; X64-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
1661; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1662; X64-NEXT:    vmovups (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
1663; X64-NEXT:    vmovups (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f]
1664; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1665; X64-NEXT:    retq # encoding: [0xc3]
1666  %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
1667  %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
1668  %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
1669  %res4 = fadd <4 x float> %res2, %res1
1670  ret <4 x float> %res4
1671}
1672
1673declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
1674
1675define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
1676; X86-LABEL: test_mask_load_aligned_pd_128:
1677; X86:       # %bb.0:
1678; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1679; X86-NEXT:    vmovapd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
1680; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1681; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1682; X86-NEXT:    vmovapd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x00]
1683; X86-NEXT:    vmovapd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x08]
1684; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1685; X86-NEXT:    retl # encoding: [0xc3]
1686;
1687; X64-LABEL: test_mask_load_aligned_pd_128:
1688; X64:       # %bb.0:
1689; X64-NEXT:    vmovapd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
1690; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1691; X64-NEXT:    vmovapd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
1692; X64-NEXT:    vmovapd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f]
1693; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1694; X64-NEXT:    retq # encoding: [0xc3]
1695  %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
1696  %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
1697  %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
1698  %res4 = fadd <2 x double> %res2, %res1
1699  ret <2 x double> %res4
1700}
1701
1702declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
1703
1704define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
1705; X86-LABEL: test_mask_load_unaligned_pd_128:
1706; X86:       # %bb.0:
1707; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1708; X86-NEXT:    vmovupd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x00]
1709; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1710; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1711; X86-NEXT:    vmovupd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x00]
1712; X86-NEXT:    vmovupd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x08]
1713; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1714; X86-NEXT:    retl # encoding: [0xc3]
1715;
1716; X64-LABEL: test_mask_load_unaligned_pd_128:
1717; X64:       # %bb.0:
1718; X64-NEXT:    vmovupd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07]
1719; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1720; X64-NEXT:    vmovupd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
1721; X64-NEXT:    vmovupd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f]
1722; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1723; X64-NEXT:    retq # encoding: [0xc3]
1724  %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
1725  %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
1726  %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
1727  %res4 = fadd <2 x double> %res2, %res1
1728  ret <2 x double> %res4
1729}
1730
1731declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
1732
1733declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8)
1734
1735define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) {
1736; X86-LABEL: test_mask_load_unaligned_d_128:
1737; X86:       # %bb.0:
1738; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1739; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1740; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
1741; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1742; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1743; X86-NEXT:    vmovdqu32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x00]
1744; X86-NEXT:    vmovdqu32 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x09]
1745; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1746; X86-NEXT:    retl # encoding: [0xc3]
1747;
1748; X64-LABEL: test_mask_load_unaligned_d_128:
1749; X64:       # %bb.0:
1750; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
1751; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1752; X64-NEXT:    vmovdqu32 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06]
1753; X64-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f]
1754; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1755; X64-NEXT:    retq # encoding: [0xc3]
1756  %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
1757  %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask)
1758  %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
1759  %res4 = add <4 x i32> %res2, %res1
1760  ret <4 x i32> %res4
1761}
1762
1763declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8)
1764
1765define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) {
1766; X86-LABEL: test_mask_load_unaligned_d_256:
1767; X86:       # %bb.0:
1768; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1769; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1770; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
1771; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1772; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1773; X86-NEXT:    vmovdqu32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x00]
1774; X86-NEXT:    vmovdqu32 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x09]
1775; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1776; X86-NEXT:    retl # encoding: [0xc3]
1777;
1778; X64-LABEL: test_mask_load_unaligned_d_256:
1779; X64:       # %bb.0:
1780; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
1781; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1782; X64-NEXT:    vmovdqu32 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06]
1783; X64-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f]
1784; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1785; X64-NEXT:    retq # encoding: [0xc3]
1786  %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
1787  %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask)
1788  %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
1789  %res4 = add <8 x i32> %res2, %res1
1790  ret <8 x i32> %res4
1791}
1792
1793declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8)
1794
1795define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) {
1796; X86-LABEL: test_mask_load_unaligned_q_128:
1797; X86:       # %bb.0:
1798; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1799; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1800; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
1801; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1802; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1803; X86-NEXT:    vmovdqu64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x00]
1804; X86-NEXT:    vmovdqu64 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x09]
1805; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1806; X86-NEXT:    retl # encoding: [0xc3]
1807;
1808; X64-LABEL: test_mask_load_unaligned_q_128:
1809; X64:       # %bb.0:
1810; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
1811; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1812; X64-NEXT:    vmovdqu64 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06]
1813; X64-NEXT:    vmovdqu64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f]
1814; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1815; X64-NEXT:    retq # encoding: [0xc3]
1816  %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
1817  %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask)
1818  %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
1819  %res4 = add <2 x i64> %res2, %res1
1820  ret <2 x i64> %res4
1821}
1822
1823declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8)
1824
1825define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) {
1826; X86-LABEL: test_mask_load_unaligned_q_256:
1827; X86:       # %bb.0:
1828; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
1829; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1830; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
1831; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
1832; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1833; X86-NEXT:    vmovdqu64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x00]
1834; X86-NEXT:    vmovdqu64 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x09]
1835; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1836; X86-NEXT:    retl # encoding: [0xc3]
1837;
1838; X64-LABEL: test_mask_load_unaligned_q_256:
1839; X64:       # %bb.0:
1840; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
1841; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
1842; X64-NEXT:    vmovdqu64 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06]
1843; X64-NEXT:    vmovdqu64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f]
1844; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1845; X64-NEXT:    retq # encoding: [0xc3]
1846  %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
1847  %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask)
1848  %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
1849  %res4 = add <4 x i64> %res2, %res1
1850  ret <4 x i64> %res4
1851}
1852
1853declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
1854
1855define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
1856; X86-LABEL: test_mask_load_aligned_d_128:
1857; X86:       # %bb.0:
1858; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1859; X86-NEXT:    vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
1860; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1861; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1862; X86-NEXT:    vmovdqa32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x00]
1863; X86-NEXT:    vmovdqa32 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x08]
1864; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1865; X86-NEXT:    retl # encoding: [0xc3]
1866;
1867; X64-LABEL: test_mask_load_aligned_d_128:
1868; X64:       # %bb.0:
1869; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
1870; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1871; X64-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
1872; X64-NEXT:    vmovdqa32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f]
1873; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
1874; X64-NEXT:    retq # encoding: [0xc3]
1875  %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
1876  %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask)
1877  %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
1878  %res4 = add <4 x i32> %res2, %res1
1879  ret <4 x i32> %res4
1880}
1881
1882declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
1883
1884define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
1885; X86-LABEL: test_mask_load_aligned_d_256:
1886; X86:       # %bb.0:
1887; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1888; X86-NEXT:    vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
1889; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1890; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1891; X86-NEXT:    vmovdqa32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x00]
1892; X86-NEXT:    vmovdqa32 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x08]
1893; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1894; X86-NEXT:    retl # encoding: [0xc3]
1895;
1896; X64-LABEL: test_mask_load_aligned_d_256:
1897; X64:       # %bb.0:
1898; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
1899; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1900; X64-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
1901; X64-NEXT:    vmovdqa32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f]
1902; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
1903; X64-NEXT:    retq # encoding: [0xc3]
1904  %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
1905  %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask)
1906  %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
1907  %res4 = add <8 x i32> %res2, %res1
1908  ret <8 x i32> %res4
1909}
1910
1911declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
1912
1913define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
1914; X86-LABEL: test_mask_load_aligned_q_128:
1915; X86:       # %bb.0:
1916; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1917; X86-NEXT:    vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
1918; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1919; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1920; X86-NEXT:    vmovdqa64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x00]
1921; X86-NEXT:    vmovdqa64 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x08]
1922; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1923; X86-NEXT:    retl # encoding: [0xc3]
1924;
1925; X64-LABEL: test_mask_load_aligned_q_128:
1926; X64:       # %bb.0:
1927; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
1928; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1929; X64-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
1930; X64-NEXT:    vmovdqa64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f]
1931; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
1932; X64-NEXT:    retq # encoding: [0xc3]
1933  %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
1934  %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask)
1935  %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
1936  %res4 = add <2 x i64> %res2, %res1
1937  ret <2 x i64> %res4
1938}
1939
1940declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
1941
1942define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
1943; X86-LABEL: test_mask_load_aligned_q_256:
1944; X86:       # %bb.0:
1945; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1946; X86-NEXT:    vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
1947; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
1948; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
1949; X86-NEXT:    vmovdqa64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x00]
1950; X86-NEXT:    vmovdqa64 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x08]
1951; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1952; X86-NEXT:    retl # encoding: [0xc3]
1953;
1954; X64-LABEL: test_mask_load_aligned_q_256:
1955; X64:       # %bb.0:
1956; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
1957; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1958; X64-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
1959; X64-NEXT:    vmovdqa64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f]
1960; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
1961; X64-NEXT:    retq # encoding: [0xc3]
1962  %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
1963  %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask)
1964  %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
1965  %res4 = add <4 x i64> %res2, %res1
1966  ret <4 x i64> %res4
1967}
1968
1969declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8)
1970
1971define <4 x i32>@test_int_x86_avx512_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2) {
1972; CHECK-LABEL: test_int_x86_avx512_pshuf_d_128:
1973; CHECK:       # %bb.0:
1974; CHECK-NEXT:    vpermilps $3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x03]
1975; CHECK-NEXT:    # xmm0 = xmm0[3,0,0,0]
1976; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1977  %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
1978  ret <4 x i32> %res
1979}
1980
1981define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
1982; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
1983; X86:       # %bb.0:
1984; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
1985; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1986; X86-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
1987; X86-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0]
1988; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1989; X86-NEXT:    retl # encoding: [0xc3]
1990;
1991; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
1992; X64:       # %bb.0:
1993; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1994; X64-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
1995; X64-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0]
1996; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
1997; X64-NEXT:    retq # encoding: [0xc3]
1998  %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
1999  ret <4 x i32> %res
2000}
2001
2002define <4 x i32>@test_int_x86_avx512_maskz_pshuf_d_128(<4 x i32> %x0, i32 %x1, i8 %x3) {
2003; X86-LABEL: test_int_x86_avx512_maskz_pshuf_d_128:
2004; X86:       # %bb.0:
2005; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2006; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2007; X86-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
2008; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0]
2009; X86-NEXT:    retl # encoding: [0xc3]
2010;
2011; X64-LABEL: test_int_x86_avx512_maskz_pshuf_d_128:
2012; X64:       # %bb.0:
2013; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2014; X64-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
2015; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0]
2016; X64-NEXT:    retq # encoding: [0xc3]
2017  %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
2018  ret <4 x i32> %res
2019}
2020
2021declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8)
2022
2023define <8 x i32>@test_int_x86_avx512_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2) {
2024; CHECK-LABEL: test_int_x86_avx512_pshuf_d_256:
2025; CHECK:       # %bb.0:
2026; CHECK-NEXT:    vpermilps $3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x03]
2027; CHECK-NEXT:    # ymm0 = ymm0[3,0,0,0,7,4,4,4]
2028; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2029  %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
2030  ret <8 x i32> %res
2031}
2032
2033define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
2034; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
2035; X86:       # %bb.0:
2036; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2037; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2038; X86-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
2039; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
2040; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2041; X86-NEXT:    retl # encoding: [0xc3]
2042;
2043; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
2044; X64:       # %bb.0:
2045; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2046; X64-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
2047; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
2048; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
2049; X64-NEXT:    retq # encoding: [0xc3]
2050  %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
2051  ret <8 x i32> %res
2052}
2053
2054define <8 x i32>@test_int_x86_avx512_maskz_pshuf_d_256(<8 x i32> %x0, i32 %x1, i8 %x3) {
2055; X86-LABEL: test_int_x86_avx512_maskz_pshuf_d_256:
2056; X86:       # %bb.0:
2057; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
2058; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2059; X86-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
2060; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
2061; X86-NEXT:    retl # encoding: [0xc3]
2062;
2063; X64-LABEL: test_int_x86_avx512_maskz_pshuf_d_256:
2064; X64:       # %bb.0:
2065; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2066; X64-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
2067; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
2068; X64-NEXT:    retq # encoding: [0xc3]
2069  %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
2070  ret <8 x i32> %res
2071}
2072
2073define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
2074; CHECK-LABEL: test_pcmpeq_d_256:
2075; CHECK:       # %bb.0:
2076; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
2077; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2078; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2079; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2080; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2081  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
2082  ret i8 %res
2083}
2084
2085define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2086; X86-LABEL: test_mask_pcmpeq_d_256:
2087; X86:       # %bb.0:
2088; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
2089; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2090; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
2091; X86-NEXT:    # kill: def $al killed $al killed $eax
2092; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2093; X86-NEXT:    retl # encoding: [0xc3]
2094;
2095; X64-LABEL: test_mask_pcmpeq_d_256:
2096; X64:       # %bb.0:
2097; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
2098; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2099; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
2100; X64-NEXT:    # kill: def $al killed $al killed $eax
2101; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2102; X64-NEXT:    retq # encoding: [0xc3]
2103  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
2104  ret i8 %res
2105}
2106
2107declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
2108
2109define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
2110; CHECK-LABEL: test_pcmpeq_q_256:
2111; CHECK:       # %bb.0:
2112; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
2113; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2114; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2115; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2116; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2117  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
2118  ret i8 %res
2119}
2120
2121define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2122; X86-LABEL: test_mask_pcmpeq_q_256:
2123; X86:       # %bb.0:
2124; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2125; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2126; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
2127; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2128; X86-NEXT:    # kill: def $al killed $al killed $eax
2129; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2130; X86-NEXT:    retl # encoding: [0xc3]
2131;
2132; X64-LABEL: test_mask_pcmpeq_q_256:
2133; X64:       # %bb.0:
2134; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2135; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
2136; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2137; X64-NEXT:    # kill: def $al killed $al killed $eax
2138; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2139; X64-NEXT:    retq # encoding: [0xc3]
2140  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
2141  ret i8 %res
2142}
2143
2144declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
2145
2146define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
2147; CHECK-LABEL: test_pcmpgt_d_256:
2148; CHECK:       # %bb.0:
2149; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
2150; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2151; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2152; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2153; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2154  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
2155  ret i8 %res
2156}
2157
2158define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
2159; X86-LABEL: test_mask_pcmpgt_d_256:
2160; X86:       # %bb.0:
2161; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
2162; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2163; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
2164; X86-NEXT:    # kill: def $al killed $al killed $eax
2165; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2166; X86-NEXT:    retl # encoding: [0xc3]
2167;
2168; X64-LABEL: test_mask_pcmpgt_d_256:
2169; X64:       # %bb.0:
2170; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
2171; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2172; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
2173; X64-NEXT:    # kill: def $al killed $al killed $eax
2174; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2175; X64-NEXT:    retq # encoding: [0xc3]
2176  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
2177  ret i8 %res
2178}
2179
2180declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
2181
2182define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
2183; CHECK-LABEL: test_pcmpgt_q_256:
2184; CHECK:       # %bb.0:
2185; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
2186; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2187; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2188; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2189; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2190  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
2191  ret i8 %res
2192}
2193
2194define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
2195; X86-LABEL: test_mask_pcmpgt_q_256:
2196; X86:       # %bb.0:
2197; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2198; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2199; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
2200; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2201; X86-NEXT:    # kill: def $al killed $al killed $eax
2202; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2203; X86-NEXT:    retl # encoding: [0xc3]
2204;
2205; X64-LABEL: test_mask_pcmpgt_q_256:
2206; X64:       # %bb.0:
2207; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2208; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
2209; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2210; X64-NEXT:    # kill: def $al killed $al killed $eax
2211; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
2212; X64-NEXT:    retq # encoding: [0xc3]
2213  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
2214  ret i8 %res
2215}
2216
2217declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
2218
2219define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
2220; CHECK-LABEL: test_pcmpeq_d_128:
2221; CHECK:       # %bb.0:
2222; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
2223; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2224; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2225; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2226  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
2227  ret i8 %res
2228}
2229
2230define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2231; X86-LABEL: test_mask_pcmpeq_d_128:
2232; X86:       # %bb.0:
2233; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2234; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2235; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
2236; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2237; X86-NEXT:    # kill: def $al killed $al killed $eax
2238; X86-NEXT:    retl # encoding: [0xc3]
2239;
2240; X64-LABEL: test_mask_pcmpeq_d_128:
2241; X64:       # %bb.0:
2242; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2243; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
2244; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2245; X64-NEXT:    # kill: def $al killed $al killed $eax
2246; X64-NEXT:    retq # encoding: [0xc3]
2247  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
2248  ret i8 %res
2249}
2250
2251declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
2252
2253define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
2254; CHECK-LABEL: test_pcmpeq_q_128:
2255; CHECK:       # %bb.0:
2256; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
2257; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2258; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2259; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2260  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
2261  ret i8 %res
2262}
2263
2264define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2265; X86-LABEL: test_mask_pcmpeq_q_128:
2266; X86:       # %bb.0:
2267; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2268; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2269; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
2270; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2271; X86-NEXT:    # kill: def $al killed $al killed $eax
2272; X86-NEXT:    retl # encoding: [0xc3]
2273;
2274; X64-LABEL: test_mask_pcmpeq_q_128:
2275; X64:       # %bb.0:
2276; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2277; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
2278; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2279; X64-NEXT:    # kill: def $al killed $al killed $eax
2280; X64-NEXT:    retq # encoding: [0xc3]
2281  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
2282  ret i8 %res
2283}
2284
2285declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
2286
2287define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
2288; CHECK-LABEL: test_pcmpgt_d_128:
2289; CHECK:       # %bb.0:
2290; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
2291; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2292; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2293; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2294  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
2295  ret i8 %res
2296}
2297
2298define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2299; X86-LABEL: test_mask_pcmpgt_d_128:
2300; X86:       # %bb.0:
2301; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2302; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2303; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
2304; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2305; X86-NEXT:    # kill: def $al killed $al killed $eax
2306; X86-NEXT:    retl # encoding: [0xc3]
2307;
2308; X64-LABEL: test_mask_pcmpgt_d_128:
2309; X64:       # %bb.0:
2310; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2311; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
2312; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2313; X64-NEXT:    # kill: def $al killed $al killed $eax
2314; X64-NEXT:    retq # encoding: [0xc3]
2315  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
2316  ret i8 %res
2317}
2318
2319declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
2320
2321define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
2322; CHECK-LABEL: test_pcmpgt_q_128:
2323; CHECK:       # %bb.0:
2324; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
2325; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2326; CHECK-NEXT:    # kill: def $al killed $al killed $eax
2327; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2328  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
2329  ret i8 %res
2330}
2331
2332define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
2333; X86-LABEL: test_mask_pcmpgt_q_128:
2334; X86:       # %bb.0:
2335; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2336; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2337; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
2338; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2339; X86-NEXT:    # kill: def $al killed $al killed $eax
2340; X86-NEXT:    retl # encoding: [0xc3]
2341;
2342; X64-LABEL: test_mask_pcmpgt_q_128:
2343; X64:       # %bb.0:
2344; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2345; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
2346; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
2347; X64-NEXT:    # kill: def $al killed $al killed $eax
2348; X64-NEXT:    retq # encoding: [0xc3]
2349  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
2350  ret i8 %res
2351}
2352
2353declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
2354
2355declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2356
2357define <2 x double>@test_int_x86_avx512_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2) {
2358; CHECK-LABEL: test_int_x86_avx512_unpckh_pd_128:
2359; CHECK:       # %bb.0:
2360; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
2361; CHECK-NEXT:    # xmm0 = xmm0[1],xmm1[1]
2362; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2363  %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2364  ret <2 x double> %res
2365}
2366
2367define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2368; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
2369; X86:       # %bb.0:
2370; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2371; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2372; X86-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
2373; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2374; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
2375; X86-NEXT:    retl # encoding: [0xc3]
2376;
2377; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
2378; X64:       # %bb.0:
2379; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2380; X64-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
2381; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2382; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
2383; X64-NEXT:    retq # encoding: [0xc3]
2384  %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2385  ret <2 x double> %res
2386}
2387
2388declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2389
2390define <4 x double>@test_int_x86_avx512_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2391; CHECK-LABEL: test_int_x86_avx512_unpckh_pd_256:
2392; CHECK:       # %bb.0:
2393; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xc1]
2394; CHECK-NEXT:    # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2395; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2396  %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2397  ret <4 x double> %res
2398}
2399
2400define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2401; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
2402; X86:       # %bb.0:
2403; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2404; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2405; X86-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
2406; X86-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2407; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
2408; X86-NEXT:    retl # encoding: [0xc3]
2409;
2410; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
2411; X64:       # %bb.0:
2412; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2413; X64-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
2414; X64-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2415; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
2416; X64-NEXT:    retq # encoding: [0xc3]
2417  %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2418  ret <4 x double> %res
2419}
2420
2421declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2422
2423define <4 x float>@test_int_x86_avx512_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
2424; CHECK-LABEL: test_int_x86_avx512_unpckh_ps_128:
2425; CHECK:       # %bb.0:
2426; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
2427; CHECK-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2428; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2429  %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2430  ret <4 x float> %res
2431}
2432
2433define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2434; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
2435; X86:       # %bb.0:
2436; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2437; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2438; X86-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
2439; X86-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2440; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
2441; X86-NEXT:    retl # encoding: [0xc3]
2442;
2443; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
2444; X64:       # %bb.0:
2445; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2446; X64-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
2447; X64-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2448; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
2449; X64-NEXT:    retq # encoding: [0xc3]
2450  %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2451  ret <4 x float> %res
2452}
2453
2454declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2455
2456define <8 x float>@test_int_x86_avx512_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2) {
2457; CHECK-LABEL: test_int_x86_avx512_unpckh_ps_256:
2458; CHECK:       # %bb.0:
2459; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xc1]
2460; CHECK-NEXT:    # ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2461; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2462  %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2463  ret <8 x float> %res
2464}
2465
2466define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2467; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
2468; X86:       # %bb.0:
2469; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2470; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2471; X86-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
2472; X86-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2473; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
2474; X86-NEXT:    retl # encoding: [0xc3]
2475;
2476; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
2477; X64:       # %bb.0:
2478; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2479; X64-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
2480; X64-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2481; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
2482; X64-NEXT:    retq # encoding: [0xc3]
2483  %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2484  ret <8 x float> %res
2485}
2486
2487declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
2488
2489define <2 x double>@test_int_x86_avx512_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2) {
2490; CHECK-LABEL: test_int_x86_avx512_unpckl_pd_128:
2491; CHECK:       # %bb.0:
2492; CHECK-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
2493; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0]
2494; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2495  %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
2496  ret <2 x double> %res
2497}
2498
2499define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
2500; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
2501; X86:       # %bb.0:
2502; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2503; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2504; X86-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
2505; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2506; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
2507; X86-NEXT:    retl # encoding: [0xc3]
2508;
2509; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
2510; X64:       # %bb.0:
2511; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2512; X64-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
2513; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2514; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
2515; X64-NEXT:    retq # encoding: [0xc3]
2516  %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
2517  ret <2 x double> %res
2518}
2519
2520declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
2521
2522define <4 x double>@test_int_x86_avx512_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2) {
2523; CHECK-LABEL: test_int_x86_avx512_unpckl_pd_256:
2524; CHECK:       # %bb.0:
2525; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xc1]
2526; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2527; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2528  %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
2529  ret <4 x double> %res
2530}
2531
2532define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
2533; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
2534; X86:       # %bb.0:
2535; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2536; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2537; X86-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
2538; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2539; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
2540; X86-NEXT:    retl # encoding: [0xc3]
2541;
2542; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
2543; X64:       # %bb.0:
2544; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2545; X64-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
2546; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2547; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
2548; X64-NEXT:    retq # encoding: [0xc3]
2549  %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
2550  ret <4 x double> %res
2551}
2552
2553declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
2554
2555define <4 x float>@test_int_x86_avx512_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2) {
2556; CHECK-LABEL: test_int_x86_avx512_unpckl_ps_128:
2557; CHECK:       # %bb.0:
2558; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
2559; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2560; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2561  %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
2562  ret <4 x float> %res
2563}
2564
2565define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
2566; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
2567; X86:       # %bb.0:
2568; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2569; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2570; X86-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
2571; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2572; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
2573; X86-NEXT:    retl # encoding: [0xc3]
2574;
2575; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
2576; X64:       # %bb.0:
2577; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2578; X64-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
2579; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2580; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
2581; X64-NEXT:    retq # encoding: [0xc3]
2582  %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
2583  ret <4 x float> %res
2584}
2585
2586declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
2587
2588define <8 x float>@test_int_x86_avx512_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2) {
2589; CHECK-LABEL: test_int_x86_avx512_unpckl_ps_256:
2590; CHECK:       # %bb.0:
2591; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xc1]
2592; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2593; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2594  %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
2595  ret <8 x float> %res
2596}
2597
2598define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
2599; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
2600; X86:       # %bb.0:
2601; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2602; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2603; X86-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
2604; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2605; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
2606; X86-NEXT:    retl # encoding: [0xc3]
2607;
2608; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
2609; X64:       # %bb.0:
2610; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2611; X64-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
2612; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2613; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
2614; X64-NEXT:    retq # encoding: [0xc3]
2615  %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
2616  ret <8 x float> %res
2617}
2618
2619declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2620
2621define <4 x i32>@test_int_x86_avx512_ask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
2622; CHECK-LABEL: test_int_x86_avx512_ask_punpckhd_q_128:
2623; CHECK:       # %bb.0:
2624; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1]
2625; CHECK-NEXT:    # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2626; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2627  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2628  ret <4 x i32> %res
2629}
2630
2631define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2632; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
2633; X86:       # %bb.0:
2634; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2635; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2636; X86-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
2637; X86-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2638; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2639; X86-NEXT:    retl # encoding: [0xc3]
2640;
2641; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
2642; X64:       # %bb.0:
2643; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2644; X64-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
2645; X64-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2646; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2647; X64-NEXT:    retq # encoding: [0xc3]
2648  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2649  ret <4 x i32> %res
2650}
2651
2652declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
2653
2654define <4 x i32>@test_int_x86_avx512_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
2655; CHECK-LABEL: test_int_x86_avx512_punpckld_q_128:
2656; CHECK:       # %bb.0:
2657; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1]
2658; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2659; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2660  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
2661  ret <4 x i32> %res
2662}
2663
2664define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
2665; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
2666; X86:       # %bb.0:
2667; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2668; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2669; X86-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
2670; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2671; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2672; X86-NEXT:    retl # encoding: [0xc3]
2673;
2674; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
2675; X64:       # %bb.0:
2676; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2677; X64-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
2678; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2679; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2680; X64-NEXT:    retq # encoding: [0xc3]
2681  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
2682  ret <4 x i32> %res
2683}
2684
2685declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2686
2687define <8 x i32>@test_int_x86_avx512_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
2688; CHECK-LABEL: test_int_x86_avx512_punpckhd_q_256:
2689; CHECK:       # %bb.0:
2690; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xc1]
2691; CHECK-NEXT:    # ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2692; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2693  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2694  ret <8 x i32> %res
2695}
2696
2697define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2698; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
2699; X86:       # %bb.0:
2700; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2701; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2702; X86-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
2703; X86-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2704; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2705; X86-NEXT:    retl # encoding: [0xc3]
2706;
2707; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
2708; X64:       # %bb.0:
2709; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2710; X64-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
2711; X64-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
2712; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2713; X64-NEXT:    retq # encoding: [0xc3]
2714  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2715  ret <8 x i32> %res
2716}
2717
2718declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
2719
2720define <8 x i32>@test_int_x86_avx512_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
2721; CHECK-LABEL: test_int_x86_avx512_punpckld_q_256:
2722; CHECK:       # %bb.0:
2723; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xc1]
2724; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2725; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2726  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
2727  ret <8 x i32> %res
2728}
2729
2730define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
2731; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
2732; X86:       # %bb.0:
2733; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2734; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2735; X86-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
2736; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2737; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2738; X86-NEXT:    retl # encoding: [0xc3]
2739;
2740; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
2741; X64:       # %bb.0:
2742; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2743; X64-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
2744; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
2745; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2746; X64-NEXT:    retq # encoding: [0xc3]
2747  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
2748  ret <8 x i32> %res
2749}
2750
2751declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2752
2753define <2 x i64>@test_int_x86_avx512_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
2754; CHECK-LABEL: test_int_x86_avx512_punpckhqd_q_128:
2755; CHECK:       # %bb.0:
2756; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1]
2757; CHECK-NEXT:    # xmm0 = xmm0[1],xmm1[1]
2758; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2759  %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2760  ret <2 x i64> %res
2761}
2762
2763define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2764; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
2765; X86:       # %bb.0:
2766; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2767; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2768; X86-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
2769; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2770; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2771; X86-NEXT:    retl # encoding: [0xc3]
2772;
2773; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
2774; X64:       # %bb.0:
2775; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2776; X64-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
2777; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
2778; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2779; X64-NEXT:    retq # encoding: [0xc3]
2780  %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2781  ret <2 x i64> %res
2782}
2783
2784declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
2785
2786define <2 x i64>@test_int_x86_avx512_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
2787; CHECK-LABEL: test_int_x86_avx512_punpcklqd_q_128:
2788; CHECK:       # %bb.0:
2789; CHECK-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1]
2790; CHECK-NEXT:    # xmm0 = xmm0[0],xmm1[0]
2791; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2792  %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
2793  ret <2 x i64> %res
2794}
2795
2796define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
2797; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
2798; X86:       # %bb.0:
2799; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2800; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2801; X86-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
2802; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2803; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2804; X86-NEXT:    retl # encoding: [0xc3]
2805;
2806; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
2807; X64:       # %bb.0:
2808; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2809; X64-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
2810; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
2811; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2812; X64-NEXT:    retq # encoding: [0xc3]
2813  %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
2814  ret <2 x i64> %res
2815}
2816
2817declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2818
2819define <4 x i64>@test_int_x86_avx512_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
2820; CHECK-LABEL: test_int_x86_avx512_punpcklqd_q_256:
2821; CHECK:       # %bb.0:
2822; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xc1]
2823; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2824; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2825  %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
2826  ret <4 x i64> %res
2827}
2828
2829define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
2830; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
2831; X86:       # %bb.0:
2832; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2833; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2834; X86-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
2835; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2836; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2837; X86-NEXT:    retl # encoding: [0xc3]
2838;
2839; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
2840; X64:       # %bb.0:
2841; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2842; X64-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
2843; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
2844; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2845; X64-NEXT:    retq # encoding: [0xc3]
2846  %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
2847  ret <4 x i64> %res
2848}
2849
2850declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
2851
2852define <4 x i64>@test_int_x86_avx512_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
2853; CHECK-LABEL: test_int_x86_avx512_punpckhqd_q_256:
2854; CHECK:       # %bb.0:
2855; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xc1]
2856; CHECK-NEXT:    # ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2857; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2858  %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
2859  ret <4 x i64> %res
2860}
2861
2862define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
2863; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
2864; X86:       # %bb.0:
2865; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2866; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2867; X86-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
2868; X86-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2869; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2870; X86-NEXT:    retl # encoding: [0xc3]
2871;
2872; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
2873; X64:       # %bb.0:
2874; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2875; X64-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
2876; X64-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2877; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
2878; X64-NEXT:    retq # encoding: [0xc3]
2879  %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
2880  ret <4 x i64> %res
2881}
2882
2883define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
2884; CHECK-LABEL: test_mask_and_epi32_rr_128:
2885; CHECK:       # %bb.0:
2886; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
2887; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
2888  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2889  ret <4 x i32> %res
2890}
2891
2892define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
2893; X86-LABEL: test_mask_and_epi32_rrk_128:
2894; X86:       # %bb.0:
2895; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2896; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2897; X86-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
2898; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2899; X86-NEXT:    retl # encoding: [0xc3]
2900;
2901; X64-LABEL: test_mask_and_epi32_rrk_128:
2902; X64:       # %bb.0:
2903; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2904; X64-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
2905; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
2906; X64-NEXT:    retq # encoding: [0xc3]
2907  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2908  ret <4 x i32> %res
2909}
2910
2911define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
2912; X86-LABEL: test_mask_and_epi32_rrkz_128:
2913; X86:       # %bb.0:
2914; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
2915; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
2916; X86-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
2917; X86-NEXT:    retl # encoding: [0xc3]
2918;
2919; X64-LABEL: test_mask_and_epi32_rrkz_128:
2920; X64:       # %bb.0:
2921; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
2922; X64-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
2923; X64-NEXT:    retq # encoding: [0xc3]
2924  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2925  ret <4 x i32> %res
2926}
2927
2928define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
2929; X86-LABEL: test_mask_and_epi32_rm_128:
2930; X86:       # %bb.0:
2931; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2932; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00]
2933; X86-NEXT:    retl # encoding: [0xc3]
2934;
2935; X64-LABEL: test_mask_and_epi32_rm_128:
2936; X64:       # %bb.0:
2937; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07]
2938; X64-NEXT:    retq # encoding: [0xc3]
2939  %b = load <4 x i32>, <4 x i32>* %ptr_b
2940  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2941  ret <4 x i32> %res
2942}
2943
2944define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
2945; X86-LABEL: test_mask_and_epi32_rmk_128:
2946; X86:       # %bb.0:
2947; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2948; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2949; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2950; X86-NEXT:    vpandd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x08]
2951; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2952; X86-NEXT:    retl # encoding: [0xc3]
2953;
2954; X64-LABEL: test_mask_and_epi32_rmk_128:
2955; X64:       # %bb.0:
2956; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2957; X64-NEXT:    vpandd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
2958; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
2959; X64-NEXT:    retq # encoding: [0xc3]
2960  %b = load <4 x i32>, <4 x i32>* %ptr_b
2961  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
2962  ret <4 x i32> %res
2963}
2964
2965define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
2966; X86-LABEL: test_mask_and_epi32_rmkz_128:
2967; X86:       # %bb.0:
2968; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2969; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
2970; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
2971; X86-NEXT:    vpandd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x00]
2972; X86-NEXT:    retl # encoding: [0xc3]
2973;
2974; X64-LABEL: test_mask_and_epi32_rmkz_128:
2975; X64:       # %bb.0:
2976; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
2977; X64-NEXT:    vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
2978; X64-NEXT:    retq # encoding: [0xc3]
2979  %b = load <4 x i32>, <4 x i32>* %ptr_b
2980  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
2981  ret <4 x i32> %res
2982}
2983
2984define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
2985; X86-LABEL: test_mask_and_epi32_rmb_128:
2986; X86:       # %bb.0:
2987; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
2988; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x00]
2989; X86-NEXT:    retl # encoding: [0xc3]
2990;
2991; X64-LABEL: test_mask_and_epi32_rmb_128:
2992; X64:       # %bb.0:
2993; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
2994; X64-NEXT:    retq # encoding: [0xc3]
2995  %q = load i32, i32* %ptr_b
2996  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
2997  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
2998  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
2999  ret <4 x i32> %res
3000}
3001
3002define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3003; X86-LABEL: test_mask_and_epi32_rmbk_128:
3004; X86:       # %bb.0:
3005; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3006; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3007; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3008; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x08]
3009; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3010; X86-NEXT:    retl # encoding: [0xc3]
3011;
3012; X64-LABEL: test_mask_and_epi32_rmbk_128:
3013; X64:       # %bb.0:
3014; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3015; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
3016; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3017; X64-NEXT:    retq # encoding: [0xc3]
3018  %q = load i32, i32* %ptr_b
3019  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3020  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3021  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3022  ret <4 x i32> %res
3023}
3024
3025define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3026; X86-LABEL: test_mask_and_epi32_rmbkz_128:
3027; X86:       # %bb.0:
3028; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3029; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3030; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3031; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x00]
3032; X86-NEXT:    retl # encoding: [0xc3]
3033;
3034; X64-LABEL: test_mask_and_epi32_rmbkz_128:
3035; X64:       # %bb.0:
3036; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3037; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
3038; X64-NEXT:    retq # encoding: [0xc3]
3039  %q = load i32, i32* %ptr_b
3040  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3041  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3042  %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3043  ret <4 x i32> %res
3044}
3045
3046declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3047
3048define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3049; CHECK-LABEL: test_mask_and_epi32_rr_256:
3050; CHECK:       # %bb.0:
3051; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1]
3052; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3053  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3054  ret <8 x i32> %res
3055}
3056
3057define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3058; X86-LABEL: test_mask_and_epi32_rrk_256:
3059; X86:       # %bb.0:
3060; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3061; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3062; X86-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
3063; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3064; X86-NEXT:    retl # encoding: [0xc3]
3065;
3066; X64-LABEL: test_mask_and_epi32_rrk_256:
3067; X64:       # %bb.0:
3068; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3069; X64-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
3070; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3071; X64-NEXT:    retq # encoding: [0xc3]
3072  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3073  ret <8 x i32> %res
3074}
3075
3076define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3077; X86-LABEL: test_mask_and_epi32_rrkz_256:
3078; X86:       # %bb.0:
3079; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3080; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3081; X86-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
3082; X86-NEXT:    retl # encoding: [0xc3]
3083;
3084; X64-LABEL: test_mask_and_epi32_rrkz_256:
3085; X64:       # %bb.0:
3086; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3087; X64-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
3088; X64-NEXT:    retq # encoding: [0xc3]
3089  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3090  ret <8 x i32> %res
3091}
3092
3093define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3094; X86-LABEL: test_mask_and_epi32_rm_256:
3095; X86:       # %bb.0:
3096; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3097; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00]
3098; X86-NEXT:    retl # encoding: [0xc3]
3099;
3100; X64-LABEL: test_mask_and_epi32_rm_256:
3101; X64:       # %bb.0:
3102; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07]
3103; X64-NEXT:    retq # encoding: [0xc3]
3104  %b = load <8 x i32>, <8 x i32>* %ptr_b
3105  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3106  ret <8 x i32> %res
3107}
3108
3109define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3110; X86-LABEL: test_mask_and_epi32_rmk_256:
3111; X86:       # %bb.0:
3112; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3113; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3114; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3115; X86-NEXT:    vpandd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x08]
3116; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3117; X86-NEXT:    retl # encoding: [0xc3]
3118;
3119; X64-LABEL: test_mask_and_epi32_rmk_256:
3120; X64:       # %bb.0:
3121; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3122; X64-NEXT:    vpandd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
3123; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3124; X64-NEXT:    retq # encoding: [0xc3]
3125  %b = load <8 x i32>, <8 x i32>* %ptr_b
3126  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3127  ret <8 x i32> %res
3128}
3129
3130define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3131; X86-LABEL: test_mask_and_epi32_rmkz_256:
3132; X86:       # %bb.0:
3133; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3134; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3135; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3136; X86-NEXT:    vpandd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x00]
3137; X86-NEXT:    retl # encoding: [0xc3]
3138;
3139; X64-LABEL: test_mask_and_epi32_rmkz_256:
3140; X64:       # %bb.0:
3141; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3142; X64-NEXT:    vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
3143; X64-NEXT:    retq # encoding: [0xc3]
3144  %b = load <8 x i32>, <8 x i32>* %ptr_b
3145  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3146  ret <8 x i32> %res
3147}
3148
3149define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3150; X86-LABEL: test_mask_and_epi32_rmb_256:
3151; X86:       # %bb.0:
3152; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3153; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x00]
3154; X86-NEXT:    retl # encoding: [0xc3]
3155;
3156; X64-LABEL: test_mask_and_epi32_rmb_256:
3157; X64:       # %bb.0:
3158; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
3159; X64-NEXT:    retq # encoding: [0xc3]
3160  %q = load i32, i32* %ptr_b
3161  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3162  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3163  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3164  ret <8 x i32> %res
3165}
3166
3167define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3168; X86-LABEL: test_mask_and_epi32_rmbk_256:
3169; X86:       # %bb.0:
3170; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3171; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3172; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3173; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x08]
3174; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3175; X86-NEXT:    retl # encoding: [0xc3]
3176;
3177; X64-LABEL: test_mask_and_epi32_rmbk_256:
3178; X64:       # %bb.0:
3179; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3180; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
3181; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3182; X64-NEXT:    retq # encoding: [0xc3]
3183  %q = load i32, i32* %ptr_b
3184  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3185  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3186  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3187  ret <8 x i32> %res
3188}
3189
3190define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3191; X86-LABEL: test_mask_and_epi32_rmbkz_256:
3192; X86:       # %bb.0:
3193; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3194; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3195; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3196; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x00]
3197; X86-NEXT:    retl # encoding: [0xc3]
3198;
3199; X64-LABEL: test_mask_and_epi32_rmbkz_256:
3200; X64:       # %bb.0:
3201; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3202; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
3203; X64-NEXT:    retq # encoding: [0xc3]
3204  %q = load i32, i32* %ptr_b
3205  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3206  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3207  %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3208  ret <8 x i32> %res
3209}
3210
3211declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3212
3213define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3214; CHECK-LABEL: test_mask_or_epi32_rr_128:
3215; CHECK:       # %bb.0:
3216; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
3217; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3218  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3219  ret <4 x i32> %res
3220}
3221
3222define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3223; X86-LABEL: test_mask_or_epi32_rrk_128:
3224; X86:       # %bb.0:
3225; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3226; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3227; X86-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
3228; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3229; X86-NEXT:    retl # encoding: [0xc3]
3230;
3231; X64-LABEL: test_mask_or_epi32_rrk_128:
3232; X64:       # %bb.0:
3233; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3234; X64-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
3235; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3236; X64-NEXT:    retq # encoding: [0xc3]
3237  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3238  ret <4 x i32> %res
3239}
3240
3241define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3242; X86-LABEL: test_mask_or_epi32_rrkz_128:
3243; X86:       # %bb.0:
3244; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3245; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3246; X86-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
3247; X86-NEXT:    retl # encoding: [0xc3]
3248;
3249; X64-LABEL: test_mask_or_epi32_rrkz_128:
3250; X64:       # %bb.0:
3251; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3252; X64-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
3253; X64-NEXT:    retq # encoding: [0xc3]
3254  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3255  ret <4 x i32> %res
3256}
3257
3258define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3259; X86-LABEL: test_mask_or_epi32_rm_128:
3260; X86:       # %bb.0:
3261; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3262; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00]
3263; X86-NEXT:    retl # encoding: [0xc3]
3264;
3265; X64-LABEL: test_mask_or_epi32_rm_128:
3266; X64:       # %bb.0:
3267; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07]
3268; X64-NEXT:    retq # encoding: [0xc3]
3269  %b = load <4 x i32>, <4 x i32>* %ptr_b
3270  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3271  ret <4 x i32> %res
3272}
3273
3274define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3275; X86-LABEL: test_mask_or_epi32_rmk_128:
3276; X86:       # %bb.0:
3277; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3278; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3279; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3280; X86-NEXT:    vpord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x08]
3281; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3282; X86-NEXT:    retl # encoding: [0xc3]
3283;
3284; X64-LABEL: test_mask_or_epi32_rmk_128:
3285; X64:       # %bb.0:
3286; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3287; X64-NEXT:    vpord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
3288; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3289; X64-NEXT:    retq # encoding: [0xc3]
3290  %b = load <4 x i32>, <4 x i32>* %ptr_b
3291  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3292  ret <4 x i32> %res
3293}
3294
3295define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3296; X86-LABEL: test_mask_or_epi32_rmkz_128:
3297; X86:       # %bb.0:
3298; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3299; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3300; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3301; X86-NEXT:    vpord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x00]
3302; X86-NEXT:    retl # encoding: [0xc3]
3303;
3304; X64-LABEL: test_mask_or_epi32_rmkz_128:
3305; X64:       # %bb.0:
3306; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3307; X64-NEXT:    vpord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
3308; X64-NEXT:    retq # encoding: [0xc3]
3309  %b = load <4 x i32>, <4 x i32>* %ptr_b
3310  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3311  ret <4 x i32> %res
3312}
3313
3314define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3315; X86-LABEL: test_mask_or_epi32_rmb_128:
3316; X86:       # %bb.0:
3317; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3318; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x00]
3319; X86-NEXT:    retl # encoding: [0xc3]
3320;
3321; X64-LABEL: test_mask_or_epi32_rmb_128:
3322; X64:       # %bb.0:
3323; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
3324; X64-NEXT:    retq # encoding: [0xc3]
3325  %q = load i32, i32* %ptr_b
3326  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3327  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3328  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3329  ret <4 x i32> %res
3330}
3331
3332define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3333; X86-LABEL: test_mask_or_epi32_rmbk_128:
3334; X86:       # %bb.0:
3335; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3336; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3337; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3338; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x08]
3339; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3340; X86-NEXT:    retl # encoding: [0xc3]
3341;
3342; X64-LABEL: test_mask_or_epi32_rmbk_128:
3343; X64:       # %bb.0:
3344; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3345; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
3346; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3347; X64-NEXT:    retq # encoding: [0xc3]
3348  %q = load i32, i32* %ptr_b
3349  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3350  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3351  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3352  ret <4 x i32> %res
3353}
3354
3355define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3356; X86-LABEL: test_mask_or_epi32_rmbkz_128:
3357; X86:       # %bb.0:
3358; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3359; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3360; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3361; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x00]
3362; X86-NEXT:    retl # encoding: [0xc3]
3363;
3364; X64-LABEL: test_mask_or_epi32_rmbkz_128:
3365; X64:       # %bb.0:
3366; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3367; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
3368; X64-NEXT:    retq # encoding: [0xc3]
3369  %q = load i32, i32* %ptr_b
3370  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3371  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3372  %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3373  ret <4 x i32> %res
3374}
3375
3376declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3377
3378define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3379; CHECK-LABEL: test_mask_or_epi32_rr_256:
3380; CHECK:       # %bb.0:
3381; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1]
3382; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3383  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3384  ret <8 x i32> %res
3385}
3386
3387define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3388; X86-LABEL: test_mask_or_epi32_rrk_256:
3389; X86:       # %bb.0:
3390; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3391; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3392; X86-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
3393; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3394; X86-NEXT:    retl # encoding: [0xc3]
3395;
3396; X64-LABEL: test_mask_or_epi32_rrk_256:
3397; X64:       # %bb.0:
3398; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3399; X64-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
3400; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3401; X64-NEXT:    retq # encoding: [0xc3]
3402  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3403  ret <8 x i32> %res
3404}
3405
3406define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3407; X86-LABEL: test_mask_or_epi32_rrkz_256:
3408; X86:       # %bb.0:
3409; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3410; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3411; X86-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
3412; X86-NEXT:    retl # encoding: [0xc3]
3413;
3414; X64-LABEL: test_mask_or_epi32_rrkz_256:
3415; X64:       # %bb.0:
3416; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3417; X64-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
3418; X64-NEXT:    retq # encoding: [0xc3]
3419  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3420  ret <8 x i32> %res
3421}
3422
3423define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3424; X86-LABEL: test_mask_or_epi32_rm_256:
3425; X86:       # %bb.0:
3426; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3427; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00]
3428; X86-NEXT:    retl # encoding: [0xc3]
3429;
3430; X64-LABEL: test_mask_or_epi32_rm_256:
3431; X64:       # %bb.0:
3432; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07]
3433; X64-NEXT:    retq # encoding: [0xc3]
3434  %b = load <8 x i32>, <8 x i32>* %ptr_b
3435  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3436  ret <8 x i32> %res
3437}
3438
3439define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3440; X86-LABEL: test_mask_or_epi32_rmk_256:
3441; X86:       # %bb.0:
3442; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3443; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3444; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3445; X86-NEXT:    vpord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x08]
3446; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3447; X86-NEXT:    retl # encoding: [0xc3]
3448;
3449; X64-LABEL: test_mask_or_epi32_rmk_256:
3450; X64:       # %bb.0:
3451; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3452; X64-NEXT:    vpord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
3453; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3454; X64-NEXT:    retq # encoding: [0xc3]
3455  %b = load <8 x i32>, <8 x i32>* %ptr_b
3456  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3457  ret <8 x i32> %res
3458}
3459
3460define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3461; X86-LABEL: test_mask_or_epi32_rmkz_256:
3462; X86:       # %bb.0:
3463; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3464; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3465; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3466; X86-NEXT:    vpord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x00]
3467; X86-NEXT:    retl # encoding: [0xc3]
3468;
3469; X64-LABEL: test_mask_or_epi32_rmkz_256:
3470; X64:       # %bb.0:
3471; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3472; X64-NEXT:    vpord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
3473; X64-NEXT:    retq # encoding: [0xc3]
3474  %b = load <8 x i32>, <8 x i32>* %ptr_b
3475  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3476  ret <8 x i32> %res
3477}
3478
3479define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3480; X86-LABEL: test_mask_or_epi32_rmb_256:
3481; X86:       # %bb.0:
3482; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3483; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x00]
3484; X86-NEXT:    retl # encoding: [0xc3]
3485;
3486; X64-LABEL: test_mask_or_epi32_rmb_256:
3487; X64:       # %bb.0:
3488; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
3489; X64-NEXT:    retq # encoding: [0xc3]
3490  %q = load i32, i32* %ptr_b
3491  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3492  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3493  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3494  ret <8 x i32> %res
3495}
3496
3497define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3498; X86-LABEL: test_mask_or_epi32_rmbk_256:
3499; X86:       # %bb.0:
3500; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3501; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3502; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3503; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x08]
3504; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3505; X86-NEXT:    retl # encoding: [0xc3]
3506;
3507; X64-LABEL: test_mask_or_epi32_rmbk_256:
3508; X64:       # %bb.0:
3509; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3510; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
3511; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3512; X64-NEXT:    retq # encoding: [0xc3]
3513  %q = load i32, i32* %ptr_b
3514  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3515  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3516  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3517  ret <8 x i32> %res
3518}
3519
3520define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3521; X86-LABEL: test_mask_or_epi32_rmbkz_256:
3522; X86:       # %bb.0:
3523; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3524; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3525; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3526; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x00]
3527; X86-NEXT:    retl # encoding: [0xc3]
3528;
3529; X64-LABEL: test_mask_or_epi32_rmbkz_256:
3530; X64:       # %bb.0:
3531; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3532; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
3533; X64-NEXT:    retq # encoding: [0xc3]
3534  %q = load i32, i32* %ptr_b
3535  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3536  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3537  %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3538  ret <8 x i32> %res
3539}
3540
3541declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3542
3543define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3544; CHECK-LABEL: test_mask_xor_epi32_rr_128:
3545; CHECK:       # %bb.0:
3546; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
3547; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3548  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3549  ret <4 x i32> %res
3550}
3551
3552define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3553; X86-LABEL: test_mask_xor_epi32_rrk_128:
3554; X86:       # %bb.0:
3555; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3556; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3557; X86-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
3558; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3559; X86-NEXT:    retl # encoding: [0xc3]
3560;
3561; X64-LABEL: test_mask_xor_epi32_rrk_128:
3562; X64:       # %bb.0:
3563; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3564; X64-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
3565; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3566; X64-NEXT:    retq # encoding: [0xc3]
3567  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3568  ret <4 x i32> %res
3569}
3570
3571define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3572; X86-LABEL: test_mask_xor_epi32_rrkz_128:
3573; X86:       # %bb.0:
3574; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3575; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3576; X86-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
3577; X86-NEXT:    retl # encoding: [0xc3]
3578;
3579; X64-LABEL: test_mask_xor_epi32_rrkz_128:
3580; X64:       # %bb.0:
3581; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3582; X64-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
3583; X64-NEXT:    retq # encoding: [0xc3]
3584  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3585  ret <4 x i32> %res
3586}
3587
3588define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3589; X86-LABEL: test_mask_xor_epi32_rm_128:
3590; X86:       # %bb.0:
3591; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3592; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00]
3593; X86-NEXT:    retl # encoding: [0xc3]
3594;
3595; X64-LABEL: test_mask_xor_epi32_rm_128:
3596; X64:       # %bb.0:
3597; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07]
3598; X64-NEXT:    retq # encoding: [0xc3]
3599  %b = load <4 x i32>, <4 x i32>* %ptr_b
3600  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3601  ret <4 x i32> %res
3602}
3603
3604define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3605; X86-LABEL: test_mask_xor_epi32_rmk_128:
3606; X86:       # %bb.0:
3607; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3608; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3609; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3610; X86-NEXT:    vpxord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x08]
3611; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3612; X86-NEXT:    retl # encoding: [0xc3]
3613;
3614; X64-LABEL: test_mask_xor_epi32_rmk_128:
3615; X64:       # %bb.0:
3616; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3617; X64-NEXT:    vpxord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
3618; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3619; X64-NEXT:    retq # encoding: [0xc3]
3620  %b = load <4 x i32>, <4 x i32>* %ptr_b
3621  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3622  ret <4 x i32> %res
3623}
3624
3625define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3626; X86-LABEL: test_mask_xor_epi32_rmkz_128:
3627; X86:       # %bb.0:
3628; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3629; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3630; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3631; X86-NEXT:    vpxord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x00]
3632; X86-NEXT:    retl # encoding: [0xc3]
3633;
3634; X64-LABEL: test_mask_xor_epi32_rmkz_128:
3635; X64:       # %bb.0:
3636; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3637; X64-NEXT:    vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
3638; X64-NEXT:    retq # encoding: [0xc3]
3639  %b = load <4 x i32>, <4 x i32>* %ptr_b
3640  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3641  ret <4 x i32> %res
3642}
3643
3644define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3645; X86-LABEL: test_mask_xor_epi32_rmb_128:
3646; X86:       # %bb.0:
3647; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3648; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x00]
3649; X86-NEXT:    retl # encoding: [0xc3]
3650;
3651; X64-LABEL: test_mask_xor_epi32_rmb_128:
3652; X64:       # %bb.0:
3653; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
3654; X64-NEXT:    retq # encoding: [0xc3]
3655  %q = load i32, i32* %ptr_b
3656  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3657  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3658  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3659  ret <4 x i32> %res
3660}
3661
3662define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3663; X86-LABEL: test_mask_xor_epi32_rmbk_128:
3664; X86:       # %bb.0:
3665; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3666; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3667; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3668; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x08]
3669; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3670; X86-NEXT:    retl # encoding: [0xc3]
3671;
3672; X64-LABEL: test_mask_xor_epi32_rmbk_128:
3673; X64:       # %bb.0:
3674; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3675; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
3676; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3677; X64-NEXT:    retq # encoding: [0xc3]
3678  %q = load i32, i32* %ptr_b
3679  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3680  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3681  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3682  ret <4 x i32> %res
3683}
3684
3685define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
3686; X86-LABEL: test_mask_xor_epi32_rmbkz_128:
3687; X86:       # %bb.0:
3688; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3689; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3690; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3691; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x00]
3692; X86-NEXT:    retl # encoding: [0xc3]
3693;
3694; X64-LABEL: test_mask_xor_epi32_rmbkz_128:
3695; X64:       # %bb.0:
3696; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3697; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
3698; X64-NEXT:    retq # encoding: [0xc3]
3699  %q = load i32, i32* %ptr_b
3700  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3701  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3702  %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3703  ret <4 x i32> %res
3704}
3705
3706declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
3707
3708define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
3709; CHECK-LABEL: test_mask_xor_epi32_rr_256:
3710; CHECK:       # %bb.0:
3711; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1]
3712; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3713  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3714  ret <8 x i32> %res
3715}
3716
3717define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
3718; X86-LABEL: test_mask_xor_epi32_rrk_256:
3719; X86:       # %bb.0:
3720; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3721; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3722; X86-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
3723; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3724; X86-NEXT:    retl # encoding: [0xc3]
3725;
3726; X64-LABEL: test_mask_xor_epi32_rrk_256:
3727; X64:       # %bb.0:
3728; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3729; X64-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
3730; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
3731; X64-NEXT:    retq # encoding: [0xc3]
3732  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3733  ret <8 x i32> %res
3734}
3735
3736define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
3737; X86-LABEL: test_mask_xor_epi32_rrkz_256:
3738; X86:       # %bb.0:
3739; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3740; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3741; X86-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
3742; X86-NEXT:    retl # encoding: [0xc3]
3743;
3744; X64-LABEL: test_mask_xor_epi32_rrkz_256:
3745; X64:       # %bb.0:
3746; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3747; X64-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
3748; X64-NEXT:    retq # encoding: [0xc3]
3749  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3750  ret <8 x i32> %res
3751}
3752
3753define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
3754; X86-LABEL: test_mask_xor_epi32_rm_256:
3755; X86:       # %bb.0:
3756; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3757; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00]
3758; X86-NEXT:    retl # encoding: [0xc3]
3759;
3760; X64-LABEL: test_mask_xor_epi32_rm_256:
3761; X64:       # %bb.0:
3762; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07]
3763; X64-NEXT:    retq # encoding: [0xc3]
3764  %b = load <8 x i32>, <8 x i32>* %ptr_b
3765  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3766  ret <8 x i32> %res
3767}
3768
3769define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3770; X86-LABEL: test_mask_xor_epi32_rmk_256:
3771; X86:       # %bb.0:
3772; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3773; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3774; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3775; X86-NEXT:    vpxord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x08]
3776; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3777; X86-NEXT:    retl # encoding: [0xc3]
3778;
3779; X64-LABEL: test_mask_xor_epi32_rmk_256:
3780; X64:       # %bb.0:
3781; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3782; X64-NEXT:    vpxord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
3783; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3784; X64-NEXT:    retq # encoding: [0xc3]
3785  %b = load <8 x i32>, <8 x i32>* %ptr_b
3786  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3787  ret <8 x i32> %res
3788}
3789
3790define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
3791; X86-LABEL: test_mask_xor_epi32_rmkz_256:
3792; X86:       # %bb.0:
3793; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3794; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3795; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3796; X86-NEXT:    vpxord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x00]
3797; X86-NEXT:    retl # encoding: [0xc3]
3798;
3799; X64-LABEL: test_mask_xor_epi32_rmkz_256:
3800; X64:       # %bb.0:
3801; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3802; X64-NEXT:    vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
3803; X64-NEXT:    retq # encoding: [0xc3]
3804  %b = load <8 x i32>, <8 x i32>* %ptr_b
3805  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3806  ret <8 x i32> %res
3807}
3808
3809define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
3810; X86-LABEL: test_mask_xor_epi32_rmb_256:
3811; X86:       # %bb.0:
3812; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3813; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x00]
3814; X86-NEXT:    retl # encoding: [0xc3]
3815;
3816; X64-LABEL: test_mask_xor_epi32_rmb_256:
3817; X64:       # %bb.0:
3818; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
3819; X64-NEXT:    retq # encoding: [0xc3]
3820  %q = load i32, i32* %ptr_b
3821  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3822  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3823  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
3824  ret <8 x i32> %res
3825}
3826
3827define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
3828; X86-LABEL: test_mask_xor_epi32_rmbk_256:
3829; X86:       # %bb.0:
3830; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3831; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3832; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3833; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x08]
3834; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3835; X86-NEXT:    retl # encoding: [0xc3]
3836;
3837; X64-LABEL: test_mask_xor_epi32_rmbk_256:
3838; X64:       # %bb.0:
3839; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3840; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
3841; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
3842; X64-NEXT:    retq # encoding: [0xc3]
3843  %q = load i32, i32* %ptr_b
3844  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3845  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3846  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
3847  ret <8 x i32> %res
3848}
3849
3850define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
3851; X86-LABEL: test_mask_xor_epi32_rmbkz_256:
3852; X86:       # %bb.0:
3853; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3854; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3855; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3856; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x00]
3857; X86-NEXT:    retl # encoding: [0xc3]
3858;
3859; X64-LABEL: test_mask_xor_epi32_rmbkz_256:
3860; X64:       # %bb.0:
3861; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3862; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
3863; X64-NEXT:    retq # encoding: [0xc3]
3864  %q = load i32, i32* %ptr_b
3865  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
3866  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
3867  %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
3868  ret <8 x i32> %res
3869}
3870
3871declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
3872
3873define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
3874; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
3875; CHECK:       # %bb.0:
3876; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
3877; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
3878  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3879  ret <4 x i32> %res
3880}
3881
3882define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
3883; X86-LABEL: test_mask_andnot_epi32_rrk_128:
3884; X86:       # %bb.0:
3885; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3886; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3887; X86-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
3888; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3889; X86-NEXT:    retl # encoding: [0xc3]
3890;
3891; X64-LABEL: test_mask_andnot_epi32_rrk_128:
3892; X64:       # %bb.0:
3893; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3894; X64-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
3895; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
3896; X64-NEXT:    retq # encoding: [0xc3]
3897  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3898  ret <4 x i32> %res
3899}
3900
3901define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
3902; X86-LABEL: test_mask_andnot_epi32_rrkz_128:
3903; X86:       # %bb.0:
3904; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
3905; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
3906; X86-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
3907; X86-NEXT:    retl # encoding: [0xc3]
3908;
3909; X64-LABEL: test_mask_andnot_epi32_rrkz_128:
3910; X64:       # %bb.0:
3911; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
3912; X64-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
3913; X64-NEXT:    retq # encoding: [0xc3]
3914  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3915  ret <4 x i32> %res
3916}
3917
3918define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
3919; X86-LABEL: test_mask_andnot_epi32_rm_128:
3920; X86:       # %bb.0:
3921; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3922; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
3923; X86-NEXT:    retl # encoding: [0xc3]
3924;
3925; X64-LABEL: test_mask_andnot_epi32_rm_128:
3926; X64:       # %bb.0:
3927; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
3928; X64-NEXT:    retq # encoding: [0xc3]
3929  %b = load <4 x i32>, <4 x i32>* %ptr_b
3930  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3931  ret <4 x i32> %res
3932}
3933
3934define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3935; X86-LABEL: test_mask_andnot_epi32_rmk_128:
3936; X86:       # %bb.0:
3937; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3938; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3939; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3940; X86-NEXT:    vpandnd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x08]
3941; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3942; X86-NEXT:    retl # encoding: [0xc3]
3943;
3944; X64-LABEL: test_mask_andnot_epi32_rmk_128:
3945; X64:       # %bb.0:
3946; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3947; X64-NEXT:    vpandnd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
3948; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
3949; X64-NEXT:    retq # encoding: [0xc3]
3950  %b = load <4 x i32>, <4 x i32>* %ptr_b
3951  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
3952  ret <4 x i32> %res
3953}
3954
3955define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
3956; X86-LABEL: test_mask_andnot_epi32_rmkz_128:
3957; X86:       # %bb.0:
3958; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3959; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3960; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3961; X86-NEXT:    vpandnd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x00]
3962; X86-NEXT:    retl # encoding: [0xc3]
3963;
3964; X64-LABEL: test_mask_andnot_epi32_rmkz_128:
3965; X64:       # %bb.0:
3966; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
3967; X64-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
3968; X64-NEXT:    retq # encoding: [0xc3]
3969  %b = load <4 x i32>, <4 x i32>* %ptr_b
3970  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
3971  ret <4 x i32> %res
3972}
3973
3974define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
3975; X86-LABEL: test_mask_andnot_epi32_rmb_128:
3976; X86:       # %bb.0:
3977; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3978; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x00]
3979; X86-NEXT:    retl # encoding: [0xc3]
3980;
3981; X64-LABEL: test_mask_andnot_epi32_rmb_128:
3982; X64:       # %bb.0:
3983; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
3984; X64-NEXT:    retq # encoding: [0xc3]
3985  %q = load i32, i32* %ptr_b
3986  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
3987  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
3988  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
3989  ret <4 x i32> %res
3990}
3991
3992define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
3993; X86-LABEL: test_mask_andnot_epi32_rmbk_128:
3994; X86:       # %bb.0:
3995; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
3996; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
3997; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
3998; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x08]
3999; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4000; X86-NEXT:    retl # encoding: [0xc3]
4001;
4002; X64-LABEL: test_mask_andnot_epi32_rmbk_128:
4003; X64:       # %bb.0:
4004; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4005; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
4006; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4007; X64-NEXT:    retq # encoding: [0xc3]
4008  %q = load i32, i32* %ptr_b
4009  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4010  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4011  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4012  ret <4 x i32> %res
4013}
4014
4015define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4016; X86-LABEL: test_mask_andnot_epi32_rmbkz_128:
4017; X86:       # %bb.0:
4018; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4019; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4020; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4021; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x00]
4022; X86-NEXT:    retl # encoding: [0xc3]
4023;
4024; X64-LABEL: test_mask_andnot_epi32_rmbkz_128:
4025; X64:       # %bb.0:
4026; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4027; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
4028; X64-NEXT:    retq # encoding: [0xc3]
4029  %q = load i32, i32* %ptr_b
4030  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4031  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4032  %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4033  ret <4 x i32> %res
4034}
4035
4036declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4037
4038define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4039; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
4040; CHECK:       # %bb.0:
4041; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
4042; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4043  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4044  ret <8 x i32> %res
4045}
4046
4047define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
4048; X86-LABEL: test_mask_andnot_epi32_rrk_256:
4049; X86:       # %bb.0:
4050; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4051; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4052; X86-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
4053; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4054; X86-NEXT:    retl # encoding: [0xc3]
4055;
4056; X64-LABEL: test_mask_andnot_epi32_rrk_256:
4057; X64:       # %bb.0:
4058; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4059; X64-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
4060; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4061; X64-NEXT:    retq # encoding: [0xc3]
4062  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4063  ret <8 x i32> %res
4064}
4065
4066define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
4067; X86-LABEL: test_mask_andnot_epi32_rrkz_256:
4068; X86:       # %bb.0:
4069; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4070; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4071; X86-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
4072; X86-NEXT:    retl # encoding: [0xc3]
4073;
4074; X64-LABEL: test_mask_andnot_epi32_rrkz_256:
4075; X64:       # %bb.0:
4076; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4077; X64-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
4078; X64-NEXT:    retq # encoding: [0xc3]
4079  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4080  ret <8 x i32> %res
4081}
4082
4083define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4084; X86-LABEL: test_mask_andnot_epi32_rm_256:
4085; X86:       # %bb.0:
4086; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4087; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
4088; X86-NEXT:    retl # encoding: [0xc3]
4089;
4090; X64-LABEL: test_mask_andnot_epi32_rm_256:
4091; X64:       # %bb.0:
4092; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
4093; X64-NEXT:    retq # encoding: [0xc3]
4094  %b = load <8 x i32>, <8 x i32>* %ptr_b
4095  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4096  ret <8 x i32> %res
4097}
4098
4099define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4100; X86-LABEL: test_mask_andnot_epi32_rmk_256:
4101; X86:       # %bb.0:
4102; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4103; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4104; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4105; X86-NEXT:    vpandnd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x08]
4106; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4107; X86-NEXT:    retl # encoding: [0xc3]
4108;
4109; X64-LABEL: test_mask_andnot_epi32_rmk_256:
4110; X64:       # %bb.0:
4111; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4112; X64-NEXT:    vpandnd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
4113; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4114; X64-NEXT:    retq # encoding: [0xc3]
4115  %b = load <8 x i32>, <8 x i32>* %ptr_b
4116  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4117  ret <8 x i32> %res
4118}
4119
4120define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
4121; X86-LABEL: test_mask_andnot_epi32_rmkz_256:
4122; X86:       # %bb.0:
4123; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4124; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4125; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4126; X86-NEXT:    vpandnd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x00]
4127; X86-NEXT:    retl # encoding: [0xc3]
4128;
4129; X64-LABEL: test_mask_andnot_epi32_rmkz_256:
4130; X64:       # %bb.0:
4131; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4132; X64-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
4133; X64-NEXT:    retq # encoding: [0xc3]
4134  %b = load <8 x i32>, <8 x i32>* %ptr_b
4135  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4136  ret <8 x i32> %res
4137}
4138
4139define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4140; X86-LABEL: test_mask_andnot_epi32_rmb_256:
4141; X86:       # %bb.0:
4142; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4143; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x00]
4144; X86-NEXT:    retl # encoding: [0xc3]
4145;
4146; X64-LABEL: test_mask_andnot_epi32_rmb_256:
4147; X64:       # %bb.0:
4148; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
4149; X64-NEXT:    retq # encoding: [0xc3]
4150  %q = load i32, i32* %ptr_b
4151  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4152  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4153  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4154  ret <8 x i32> %res
4155}
4156
4157define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4158; X86-LABEL: test_mask_andnot_epi32_rmbk_256:
4159; X86:       # %bb.0:
4160; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4161; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4162; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4163; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x08]
4164; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4165; X86-NEXT:    retl # encoding: [0xc3]
4166;
4167; X64-LABEL: test_mask_andnot_epi32_rmbk_256:
4168; X64:       # %bb.0:
4169; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4170; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
4171; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4172; X64-NEXT:    retq # encoding: [0xc3]
4173  %q = load i32, i32* %ptr_b
4174  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4175  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4176  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4177  ret <8 x i32> %res
4178}
4179
4180define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
4181; X86-LABEL: test_mask_andnot_epi32_rmbkz_256:
4182; X86:       # %bb.0:
4183; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4184; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4185; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4186; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x00]
4187; X86-NEXT:    retl # encoding: [0xc3]
4188;
4189; X64-LABEL: test_mask_andnot_epi32_rmbkz_256:
4190; X64:       # %bb.0:
4191; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4192; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
4193; X64-NEXT:    retq # encoding: [0xc3]
4194  %q = load i32, i32* %ptr_b
4195  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4196  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4197  %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4198  ret <8 x i32> %res
4199}
4200
4201declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
4202
4203define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
4204; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
4205; CHECK:       # %bb.0:
4206; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
4207; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4208  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
4209  ret <2 x i64> %res
4210}
4211
4212define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
4213; X86-LABEL: test_mask_andnot_epi64_rrk_128:
4214; X86:       # %bb.0:
4215; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4216; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4217; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
4218; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4219; X86-NEXT:    retl # encoding: [0xc3]
4220;
4221; X64-LABEL: test_mask_andnot_epi64_rrk_128:
4222; X64:       # %bb.0:
4223; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4224; X64-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
4225; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4226; X64-NEXT:    retq # encoding: [0xc3]
4227  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
4228  ret <2 x i64> %res
4229}
4230
4231define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
4232; X86-LABEL: test_mask_andnot_epi64_rrkz_128:
4233; X86:       # %bb.0:
4234; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4235; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4236; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
4237; X86-NEXT:    retl # encoding: [0xc3]
4238;
4239; X64-LABEL: test_mask_andnot_epi64_rrkz_128:
4240; X64:       # %bb.0:
4241; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4242; X64-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
4243; X64-NEXT:    retq # encoding: [0xc3]
4244  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
4245  ret <2 x i64> %res
4246}
4247
4248define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
4249; X86-LABEL: test_mask_andnot_epi64_rm_128:
4250; X86:       # %bb.0:
4251; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4252; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
4253; X86-NEXT:    retl # encoding: [0xc3]
4254;
4255; X64-LABEL: test_mask_andnot_epi64_rm_128:
4256; X64:       # %bb.0:
4257; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
4258; X64-NEXT:    retq # encoding: [0xc3]
4259  %b = load <2 x i64>, <2 x i64>* %ptr_b
4260  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
4261  ret <2 x i64> %res
4262}
4263
4264define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
4265; X86-LABEL: test_mask_andnot_epi64_rmk_128:
4266; X86:       # %bb.0:
4267; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4268; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4269; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4270; X86-NEXT:    vpandnq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x08]
4271; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4272; X86-NEXT:    retl # encoding: [0xc3]
4273;
4274; X64-LABEL: test_mask_andnot_epi64_rmk_128:
4275; X64:       # %bb.0:
4276; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4277; X64-NEXT:    vpandnq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
4278; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4279; X64-NEXT:    retq # encoding: [0xc3]
4280  %b = load <2 x i64>, <2 x i64>* %ptr_b
4281  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
4282  ret <2 x i64> %res
4283}
4284
4285define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
4286; X86-LABEL: test_mask_andnot_epi64_rmkz_128:
4287; X86:       # %bb.0:
4288; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4289; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4290; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4291; X86-NEXT:    vpandnq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x00]
4292; X86-NEXT:    retl # encoding: [0xc3]
4293;
4294; X64-LABEL: test_mask_andnot_epi64_rmkz_128:
4295; X64:       # %bb.0:
4296; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4297; X64-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
4298; X64-NEXT:    retq # encoding: [0xc3]
4299  %b = load <2 x i64>, <2 x i64>* %ptr_b
4300  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
4301  ret <2 x i64> %res
4302}
4303
4304define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
4305; X86-LABEL: test_mask_andnot_epi64_rmb_128:
4306; X86:       # %bb.0:
4307; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4308; X86-NEXT:    vpandnq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x00]
4309; X86-NEXT:    retl # encoding: [0xc3]
4310;
4311; X64-LABEL: test_mask_andnot_epi64_rmb_128:
4312; X64:       # %bb.0:
4313; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
4314; X64-NEXT:    retq # encoding: [0xc3]
4315  %q = load i64, i64* %ptr_b
4316  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
4317  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
4318  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
4319  ret <2 x i64> %res
4320}
4321
4322define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
4323; X86-LABEL: test_mask_andnot_epi64_rmbk_128:
4324; X86:       # %bb.0:
4325; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4326; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4327; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4328; X86-NEXT:    vpandnq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x08]
4329; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4330; X86-NEXT:    retl # encoding: [0xc3]
4331;
4332; X64-LABEL: test_mask_andnot_epi64_rmbk_128:
4333; X64:       # %bb.0:
4334; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4335; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
4336; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4337; X64-NEXT:    retq # encoding: [0xc3]
4338  %q = load i64, i64* %ptr_b
4339  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
4340  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
4341  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
4342  ret <2 x i64> %res
4343}
4344
4345define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
4346; X86-LABEL: test_mask_andnot_epi64_rmbkz_128:
4347; X86:       # %bb.0:
4348; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4349; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4350; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4351; X86-NEXT:    vpandnq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x00]
4352; X86-NEXT:    retl # encoding: [0xc3]
4353;
4354; X64-LABEL: test_mask_andnot_epi64_rmbkz_128:
4355; X64:       # %bb.0:
4356; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4357; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
4358; X64-NEXT:    retq # encoding: [0xc3]
4359  %q = load i64, i64* %ptr_b
4360  %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
4361  %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
4362  %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
4363  ret <2 x i64> %res
4364}
4365
4366declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
4367
4368define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
4369; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
4370; CHECK:       # %bb.0:
4371; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
4372; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4373  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4374  ret <4 x i64> %res
4375}
4376
4377define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
4378; X86-LABEL: test_mask_andnot_epi64_rrk_256:
4379; X86:       # %bb.0:
4380; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4381; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4382; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
4383; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4384; X86-NEXT:    retl # encoding: [0xc3]
4385;
4386; X64-LABEL: test_mask_andnot_epi64_rrk_256:
4387; X64:       # %bb.0:
4388; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4389; X64-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
4390; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4391; X64-NEXT:    retq # encoding: [0xc3]
4392  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4393  ret <4 x i64> %res
4394}
4395
4396define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
4397; X86-LABEL: test_mask_andnot_epi64_rrkz_256:
4398; X86:       # %bb.0:
4399; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4400; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4401; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4402; X86-NEXT:    retl # encoding: [0xc3]
4403;
4404; X64-LABEL: test_mask_andnot_epi64_rrkz_256:
4405; X64:       # %bb.0:
4406; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4407; X64-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
4408; X64-NEXT:    retq # encoding: [0xc3]
4409  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4410  ret <4 x i64> %res
4411}
4412
4413define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
4414; X86-LABEL: test_mask_andnot_epi64_rm_256:
4415; X86:       # %bb.0:
4416; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4417; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
4418; X86-NEXT:    retl # encoding: [0xc3]
4419;
4420; X64-LABEL: test_mask_andnot_epi64_rm_256:
4421; X64:       # %bb.0:
4422; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
4423; X64-NEXT:    retq # encoding: [0xc3]
4424  %b = load <4 x i64>, <4 x i64>* %ptr_b
4425  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4426  ret <4 x i64> %res
4427}
4428
4429define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
4430; X86-LABEL: test_mask_andnot_epi64_rmk_256:
4431; X86:       # %bb.0:
4432; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4433; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4434; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4435; X86-NEXT:    vpandnq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x08]
4436; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4437; X86-NEXT:    retl # encoding: [0xc3]
4438;
4439; X64-LABEL: test_mask_andnot_epi64_rmk_256:
4440; X64:       # %bb.0:
4441; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4442; X64-NEXT:    vpandnq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
4443; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4444; X64-NEXT:    retq # encoding: [0xc3]
4445  %b = load <4 x i64>, <4 x i64>* %ptr_b
4446  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4447  ret <4 x i64> %res
4448}
4449
4450define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
4451; X86-LABEL: test_mask_andnot_epi64_rmkz_256:
4452; X86:       # %bb.0:
4453; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4454; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4455; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4456; X86-NEXT:    vpandnq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x00]
4457; X86-NEXT:    retl # encoding: [0xc3]
4458;
4459; X64-LABEL: test_mask_andnot_epi64_rmkz_256:
4460; X64:       # %bb.0:
4461; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4462; X64-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
4463; X64-NEXT:    retq # encoding: [0xc3]
4464  %b = load <4 x i64>, <4 x i64>* %ptr_b
4465  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4466  ret <4 x i64> %res
4467}
4468
4469define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
4470; X86-LABEL: test_mask_andnot_epi64_rmb_256:
4471; X86:       # %bb.0:
4472; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4473; X86-NEXT:    vpandnq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x00]
4474; X86-NEXT:    retl # encoding: [0xc3]
4475;
4476; X64-LABEL: test_mask_andnot_epi64_rmb_256:
4477; X64:       # %bb.0:
4478; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
4479; X64-NEXT:    retq # encoding: [0xc3]
4480  %q = load i64, i64* %ptr_b
4481  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4482  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4483  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
4484  ret <4 x i64> %res
4485}
4486
4487define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
4488; X86-LABEL: test_mask_andnot_epi64_rmbk_256:
4489; X86:       # %bb.0:
4490; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4491; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4492; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4493; X86-NEXT:    vpandnq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x08]
4494; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4495; X86-NEXT:    retl # encoding: [0xc3]
4496;
4497; X64-LABEL: test_mask_andnot_epi64_rmbk_256:
4498; X64:       # %bb.0:
4499; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4500; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
4501; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4502; X64-NEXT:    retq # encoding: [0xc3]
4503  %q = load i64, i64* %ptr_b
4504  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4505  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4506  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
4507  ret <4 x i64> %res
4508}
4509
4510define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
4511; X86-LABEL: test_mask_andnot_epi64_rmbkz_256:
4512; X86:       # %bb.0:
4513; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4514; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4515; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4516; X86-NEXT:    vpandnq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x00]
4517; X86-NEXT:    retl # encoding: [0xc3]
4518;
4519; X64-LABEL: test_mask_andnot_epi64_rmbkz_256:
4520; X64:       # %bb.0:
4521; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4522; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
4523; X64-NEXT:    retq # encoding: [0xc3]
4524  %q = load i64, i64* %ptr_b
4525  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
4526  %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
4527  %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
4528  ret <4 x i64> %res
4529}
4530
4531declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
4532
4533define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4534; CHECK-LABEL: test_mask_add_epi32_rr_128:
4535; CHECK:       # %bb.0:
4536; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
4537; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4538  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4539  ret <4 x i32> %res
4540}
4541
4542define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
4543; X86-LABEL: test_mask_add_epi32_rrk_128:
4544; X86:       # %bb.0:
4545; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4546; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4547; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
4548; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4549; X86-NEXT:    retl # encoding: [0xc3]
4550;
4551; X64-LABEL: test_mask_add_epi32_rrk_128:
4552; X64:       # %bb.0:
4553; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4554; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
4555; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4556; X64-NEXT:    retq # encoding: [0xc3]
4557  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4558  ret <4 x i32> %res
4559}
4560
4561define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4562; X86-LABEL: test_mask_add_epi32_rrkz_128:
4563; X86:       # %bb.0:
4564; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4565; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4566; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
4567; X86-NEXT:    retl # encoding: [0xc3]
4568;
4569; X64-LABEL: test_mask_add_epi32_rrkz_128:
4570; X64:       # %bb.0:
4571; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4572; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
4573; X64-NEXT:    retq # encoding: [0xc3]
4574  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4575  ret <4 x i32> %res
4576}
4577
4578define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4579; X86-LABEL: test_mask_add_epi32_rm_128:
4580; X86:       # %bb.0:
4581; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4582; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x00]
4583; X86-NEXT:    retl # encoding: [0xc3]
4584;
4585; X64-LABEL: test_mask_add_epi32_rm_128:
4586; X64:       # %bb.0:
4587; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07]
4588; X64-NEXT:    retq # encoding: [0xc3]
4589  %b = load <4 x i32>, <4 x i32>* %ptr_b
4590  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4591  ret <4 x i32> %res
4592}
4593
4594define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4595; X86-LABEL: test_mask_add_epi32_rmk_128:
4596; X86:       # %bb.0:
4597; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4598; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4599; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4600; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x08]
4601; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4602; X86-NEXT:    retl # encoding: [0xc3]
4603;
4604; X64-LABEL: test_mask_add_epi32_rmk_128:
4605; X64:       # %bb.0:
4606; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4607; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
4608; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4609; X64-NEXT:    retq # encoding: [0xc3]
4610  %b = load <4 x i32>, <4 x i32>* %ptr_b
4611  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4612  ret <4 x i32> %res
4613}
4614
4615define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4616; X86-LABEL: test_mask_add_epi32_rmkz_128:
4617; X86:       # %bb.0:
4618; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4619; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4620; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4621; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x00]
4622; X86-NEXT:    retl # encoding: [0xc3]
4623;
4624; X64-LABEL: test_mask_add_epi32_rmkz_128:
4625; X64:       # %bb.0:
4626; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4627; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
4628; X64-NEXT:    retq # encoding: [0xc3]
4629  %b = load <4 x i32>, <4 x i32>* %ptr_b
4630  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4631  ret <4 x i32> %res
4632}
4633
4634define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4635; X86-LABEL: test_mask_add_epi32_rmb_128:
4636; X86:       # %bb.0:
4637; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4638; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x00]
4639; X86-NEXT:    retl # encoding: [0xc3]
4640;
4641; X64-LABEL: test_mask_add_epi32_rmb_128:
4642; X64:       # %bb.0:
4643; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
4644; X64-NEXT:    retq # encoding: [0xc3]
4645  %q = load i32, i32* %ptr_b
4646  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4647  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4648  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4649  ret <4 x i32> %res
4650}
4651
4652define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4653; X86-LABEL: test_mask_add_epi32_rmbk_128:
4654; X86:       # %bb.0:
4655; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4656; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4657; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4658; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x08]
4659; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4660; X86-NEXT:    retl # encoding: [0xc3]
4661;
4662; X64-LABEL: test_mask_add_epi32_rmbk_128:
4663; X64:       # %bb.0:
4664; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4665; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
4666; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4667; X64-NEXT:    retq # encoding: [0xc3]
4668  %q = load i32, i32* %ptr_b
4669  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4670  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4671  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4672  ret <4 x i32> %res
4673}
4674
4675define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4676; X86-LABEL: test_mask_add_epi32_rmbkz_128:
4677; X86:       # %bb.0:
4678; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4679; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4680; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4681; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x00]
4682; X86-NEXT:    retl # encoding: [0xc3]
4683;
4684; X64-LABEL: test_mask_add_epi32_rmbkz_128:
4685; X64:       # %bb.0:
4686; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4687; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
4688; X64-NEXT:    retq # encoding: [0xc3]
4689  %q = load i32, i32* %ptr_b
4690  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4691  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4692  %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4693  ret <4 x i32> %res
4694}
4695
4696declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4697
4698define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
4699; CHECK-LABEL: test_mask_sub_epi32_rr_128:
4700; CHECK:       # %bb.0:
4701; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
4702; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4703  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4704  ret <4 x i32> %res
4705}
4706
4707define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
4708; X86-LABEL: test_mask_sub_epi32_rrk_128:
4709; X86:       # %bb.0:
4710; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4711; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4712; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
4713; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4714; X86-NEXT:    retl # encoding: [0xc3]
4715;
4716; X64-LABEL: test_mask_sub_epi32_rrk_128:
4717; X64:       # %bb.0:
4718; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4719; X64-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
4720; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
4721; X64-NEXT:    retq # encoding: [0xc3]
4722  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4723  ret <4 x i32> %res
4724}
4725
4726define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
4727; X86-LABEL: test_mask_sub_epi32_rrkz_128:
4728; X86:       # %bb.0:
4729; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4730; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4731; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
4732; X86-NEXT:    retl # encoding: [0xc3]
4733;
4734; X64-LABEL: test_mask_sub_epi32_rrkz_128:
4735; X64:       # %bb.0:
4736; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4737; X64-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
4738; X64-NEXT:    retq # encoding: [0xc3]
4739  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4740  ret <4 x i32> %res
4741}
4742
4743define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
4744; X86-LABEL: test_mask_sub_epi32_rm_128:
4745; X86:       # %bb.0:
4746; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4747; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x00]
4748; X86-NEXT:    retl # encoding: [0xc3]
4749;
4750; X64-LABEL: test_mask_sub_epi32_rm_128:
4751; X64:       # %bb.0:
4752; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07]
4753; X64-NEXT:    retq # encoding: [0xc3]
4754  %b = load <4 x i32>, <4 x i32>* %ptr_b
4755  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4756  ret <4 x i32> %res
4757}
4758
4759define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4760; X86-LABEL: test_mask_sub_epi32_rmk_128:
4761; X86:       # %bb.0:
4762; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4763; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4764; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4765; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x08]
4766; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4767; X86-NEXT:    retl # encoding: [0xc3]
4768;
4769; X64-LABEL: test_mask_sub_epi32_rmk_128:
4770; X64:       # %bb.0:
4771; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4772; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
4773; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4774; X64-NEXT:    retq # encoding: [0xc3]
4775  %b = load <4 x i32>, <4 x i32>* %ptr_b
4776  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4777  ret <4 x i32> %res
4778}
4779
4780define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
4781; X86-LABEL: test_mask_sub_epi32_rmkz_128:
4782; X86:       # %bb.0:
4783; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4784; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4785; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4786; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x00]
4787; X86-NEXT:    retl # encoding: [0xc3]
4788;
4789; X64-LABEL: test_mask_sub_epi32_rmkz_128:
4790; X64:       # %bb.0:
4791; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4792; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
4793; X64-NEXT:    retq # encoding: [0xc3]
4794  %b = load <4 x i32>, <4 x i32>* %ptr_b
4795  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4796  ret <4 x i32> %res
4797}
4798
4799define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
4800; X86-LABEL: test_mask_sub_epi32_rmb_128:
4801; X86:       # %bb.0:
4802; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4803; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x00]
4804; X86-NEXT:    retl # encoding: [0xc3]
4805;
4806; X64-LABEL: test_mask_sub_epi32_rmb_128:
4807; X64:       # %bb.0:
4808; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
4809; X64-NEXT:    retq # encoding: [0xc3]
4810  %q = load i32, i32* %ptr_b
4811  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4812  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4813  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
4814  ret <4 x i32> %res
4815}
4816
4817define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
4818; X86-LABEL: test_mask_sub_epi32_rmbk_128:
4819; X86:       # %bb.0:
4820; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4821; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4822; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4823; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x08]
4824; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4825; X86-NEXT:    retl # encoding: [0xc3]
4826;
4827; X64-LABEL: test_mask_sub_epi32_rmbk_128:
4828; X64:       # %bb.0:
4829; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4830; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
4831; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
4832; X64-NEXT:    retq # encoding: [0xc3]
4833  %q = load i32, i32* %ptr_b
4834  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4835  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4836  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
4837  ret <4 x i32> %res
4838}
4839
4840define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
4841; X86-LABEL: test_mask_sub_epi32_rmbkz_128:
4842; X86:       # %bb.0:
4843; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4844; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4845; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4846; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x00]
4847; X86-NEXT:    retl # encoding: [0xc3]
4848;
4849; X64-LABEL: test_mask_sub_epi32_rmbkz_128:
4850; X64:       # %bb.0:
4851; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4852; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
4853; X64-NEXT:    retq # encoding: [0xc3]
4854  %q = load i32, i32* %ptr_b
4855  %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
4856  %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
4857  %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
4858  ret <4 x i32> %res
4859}
4860
4861declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
4862
4863define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
4864; CHECK-LABEL: test_mask_sub_epi32_rr_256:
4865; CHECK:       # %bb.0:
4866; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
4867; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
4868  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4869  ret <8 x i32> %res
4870}
4871
4872define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
4873; X86-LABEL: test_mask_sub_epi32_rrk_256:
4874; X86:       # %bb.0:
4875; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4876; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4877; X86-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
4878; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4879; X86-NEXT:    retl # encoding: [0xc3]
4880;
4881; X64-LABEL: test_mask_sub_epi32_rrk_256:
4882; X64:       # %bb.0:
4883; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4884; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
4885; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
4886; X64-NEXT:    retq # encoding: [0xc3]
4887  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4888  ret <8 x i32> %res
4889}
4890
4891define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
4892; X86-LABEL: test_mask_sub_epi32_rrkz_256:
4893; X86:       # %bb.0:
4894; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
4895; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
4896; X86-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
4897; X86-NEXT:    retl # encoding: [0xc3]
4898;
4899; X64-LABEL: test_mask_sub_epi32_rrkz_256:
4900; X64:       # %bb.0:
4901; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
4902; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
4903; X64-NEXT:    retq # encoding: [0xc3]
4904  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4905  ret <8 x i32> %res
4906}
4907
4908define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
4909; X86-LABEL: test_mask_sub_epi32_rm_256:
4910; X86:       # %bb.0:
4911; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4912; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x00]
4913; X86-NEXT:    retl # encoding: [0xc3]
4914;
4915; X64-LABEL: test_mask_sub_epi32_rm_256:
4916; X64:       # %bb.0:
4917; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07]
4918; X64-NEXT:    retq # encoding: [0xc3]
4919  %b = load <8 x i32>, <8 x i32>* %ptr_b
4920  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4921  ret <8 x i32> %res
4922}
4923
4924define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4925; X86-LABEL: test_mask_sub_epi32_rmk_256:
4926; X86:       # %bb.0:
4927; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4928; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4929; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4930; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x08]
4931; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4932; X86-NEXT:    retl # encoding: [0xc3]
4933;
4934; X64-LABEL: test_mask_sub_epi32_rmk_256:
4935; X64:       # %bb.0:
4936; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4937; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
4938; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4939; X64-NEXT:    retq # encoding: [0xc3]
4940  %b = load <8 x i32>, <8 x i32>* %ptr_b
4941  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
4942  ret <8 x i32> %res
4943}
4944
4945define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
4946; X86-LABEL: test_mask_sub_epi32_rmkz_256:
4947; X86:       # %bb.0:
4948; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4949; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4950; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4951; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x00]
4952; X86-NEXT:    retl # encoding: [0xc3]
4953;
4954; X64-LABEL: test_mask_sub_epi32_rmkz_256:
4955; X64:       # %bb.0:
4956; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4957; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
4958; X64-NEXT:    retq # encoding: [0xc3]
4959  %b = load <8 x i32>, <8 x i32>* %ptr_b
4960  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
4961  ret <8 x i32> %res
4962}
4963
4964define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
4965; X86-LABEL: test_mask_sub_epi32_rmb_256:
4966; X86:       # %bb.0:
4967; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4968; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x00]
4969; X86-NEXT:    retl # encoding: [0xc3]
4970;
4971; X64-LABEL: test_mask_sub_epi32_rmb_256:
4972; X64:       # %bb.0:
4973; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
4974; X64-NEXT:    retq # encoding: [0xc3]
4975  %q = load i32, i32* %ptr_b
4976  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
4977  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
4978  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
4979  ret <8 x i32> %res
4980}
4981
4982define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
4983; X86-LABEL: test_mask_sub_epi32_rmbk_256:
4984; X86:       # %bb.0:
4985; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
4986; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
4987; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
4988; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x08]
4989; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4990; X86-NEXT:    retl # encoding: [0xc3]
4991;
4992; X64-LABEL: test_mask_sub_epi32_rmbk_256:
4993; X64:       # %bb.0:
4994; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
4995; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
4996; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
4997; X64-NEXT:    retq # encoding: [0xc3]
4998  %q = load i32, i32* %ptr_b
4999  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
5000  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
5001  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
5002  ret <8 x i32> %res
5003}
5004
5005define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
5006; X86-LABEL: test_mask_sub_epi32_rmbkz_256:
5007; X86:       # %bb.0:
5008; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5009; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5010; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
5011; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x00]
5012; X86-NEXT:    retl # encoding: [0xc3]
5013;
5014; X64-LABEL: test_mask_sub_epi32_rmbkz_256:
5015; X64:       # %bb.0:
5016; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
5017; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
5018; X64-NEXT:    retq # encoding: [0xc3]
5019  %q = load i32, i32* %ptr_b
5020  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
5021  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
5022  %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
5023  ret <8 x i32> %res
5024}
5025
5026declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5027
5028define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
5029; CHECK-LABEL: test_mask_add_epi32_rr_256:
5030; CHECK:       # %bb.0:
5031; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
5032; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5033  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
5034  ret <8 x i32> %res
5035}
5036
5037define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
5038; X86-LABEL: test_mask_add_epi32_rrk_256:
5039; X86:       # %bb.0:
5040; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5041; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5042; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
5043; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5044; X86-NEXT:    retl # encoding: [0xc3]
5045;
5046; X64-LABEL: test_mask_add_epi32_rrk_256:
5047; X64:       # %bb.0:
5048; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5049; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
5050; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5051; X64-NEXT:    retq # encoding: [0xc3]
5052  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
5053  ret <8 x i32> %res
5054}
5055
5056define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
5057; X86-LABEL: test_mask_add_epi32_rrkz_256:
5058; X86:       # %bb.0:
5059; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5060; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5061; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
5062; X86-NEXT:    retl # encoding: [0xc3]
5063;
5064; X64-LABEL: test_mask_add_epi32_rrkz_256:
5065; X64:       # %bb.0:
5066; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5067; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
5068; X64-NEXT:    retq # encoding: [0xc3]
5069  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
5070  ret <8 x i32> %res
5071}
5072
5073define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
5074; X86-LABEL: test_mask_add_epi32_rm_256:
5075; X86:       # %bb.0:
5076; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5077; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x00]
5078; X86-NEXT:    retl # encoding: [0xc3]
5079;
5080; X64-LABEL: test_mask_add_epi32_rm_256:
5081; X64:       # %bb.0:
5082; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07]
5083; X64-NEXT:    retq # encoding: [0xc3]
5084  %b = load <8 x i32>, <8 x i32>* %ptr_b
5085  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
5086  ret <8 x i32> %res
5087}
5088
5089define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
5090; X86-LABEL: test_mask_add_epi32_rmk_256:
5091; X86:       # %bb.0:
5092; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5093; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5094; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
5095; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x08]
5096; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5097; X86-NEXT:    retl # encoding: [0xc3]
5098;
5099; X64-LABEL: test_mask_add_epi32_rmk_256:
5100; X64:       # %bb.0:
5101; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
5102; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
5103; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5104; X64-NEXT:    retq # encoding: [0xc3]
5105  %b = load <8 x i32>, <8 x i32>* %ptr_b
5106  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
5107  ret <8 x i32> %res
5108}
5109
5110define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
5111; X86-LABEL: test_mask_add_epi32_rmkz_256:
5112; X86:       # %bb.0:
5113; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5114; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5115; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
5116; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x00]
5117; X86-NEXT:    retl # encoding: [0xc3]
5118;
5119; X64-LABEL: test_mask_add_epi32_rmkz_256:
5120; X64:       # %bb.0:
5121; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
5122; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
5123; X64-NEXT:    retq # encoding: [0xc3]
5124  %b = load <8 x i32>, <8 x i32>* %ptr_b
5125  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
5126  ret <8 x i32> %res
5127}
5128
5129define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
5130; X86-LABEL: test_mask_add_epi32_rmb_256:
5131; X86:       # %bb.0:
5132; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5133; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x00]
5134; X86-NEXT:    retl # encoding: [0xc3]
5135;
5136; X64-LABEL: test_mask_add_epi32_rmb_256:
5137; X64:       # %bb.0:
5138; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
5139; X64-NEXT:    retq # encoding: [0xc3]
5140  %q = load i32, i32* %ptr_b
5141  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
5142  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
5143  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
5144  ret <8 x i32> %res
5145}
5146
5147define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
5148; X86-LABEL: test_mask_add_epi32_rmbk_256:
5149; X86:       # %bb.0:
5150; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5151; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5152; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
5153; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x08]
5154; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5155; X86-NEXT:    retl # encoding: [0xc3]
5156;
5157; X64-LABEL: test_mask_add_epi32_rmbk_256:
5158; X64:       # %bb.0:
5159; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
5160; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
5161; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
5162; X64-NEXT:    retq # encoding: [0xc3]
5163  %q = load i32, i32* %ptr_b
5164  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
5165  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
5166  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
5167  ret <8 x i32> %res
5168}
5169
5170define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
5171; X86-LABEL: test_mask_add_epi32_rmbkz_256:
5172; X86:       # %bb.0:
5173; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
5174; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
5175; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
5176; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x00]
5177; X86-NEXT:    retl # encoding: [0xc3]
5178;
5179; X64-LABEL: test_mask_add_epi32_rmbkz_256:
5180; X64:       # %bb.0:
5181; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
5182; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
5183; X64-NEXT:    retq # encoding: [0xc3]
5184  %q = load i32, i32* %ptr_b
5185  %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
5186  %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
5187  %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
5188  ret <8 x i32> %res
5189}
5190
5191declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5192
5193define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5194; X86-LABEL: test_mm512_maskz_add_ps_256:
5195; X86:       # %bb.0:
5196; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5197; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5198; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
5199; X86-NEXT:    retl # encoding: [0xc3]
5200;
5201; X64-LABEL: test_mm512_maskz_add_ps_256:
5202; X64:       # %bb.0:
5203; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5204; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
5205; X64-NEXT:    retq # encoding: [0xc3]
5206  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5207  ret <8 x float> %res
5208}
5209
5210define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5211; X86-LABEL: test_mm512_mask_add_ps_256:
5212; X86:       # %bb.0:
5213; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5214; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5215; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
5216; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5217; X86-NEXT:    retl # encoding: [0xc3]
5218;
5219; X64-LABEL: test_mm512_mask_add_ps_256:
5220; X64:       # %bb.0:
5221; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5222; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
5223; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5224; X64-NEXT:    retq # encoding: [0xc3]
5225  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5226  ret <8 x float> %res
5227}
5228
5229define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5230; CHECK-LABEL: test_mm512_add_ps_256:
5231; CHECK:       # %bb.0:
5232; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
5233; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5234  %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5235  ret <8 x float> %res
5236}
5237declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5238
5239define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5240; X86-LABEL: test_mm512_maskz_add_ps_128:
5241; X86:       # %bb.0:
5242; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5243; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5244; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
5245; X86-NEXT:    retl # encoding: [0xc3]
5246;
5247; X64-LABEL: test_mm512_maskz_add_ps_128:
5248; X64:       # %bb.0:
5249; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5250; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
5251; X64-NEXT:    retq # encoding: [0xc3]
5252  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5253  ret <4 x float> %res
5254}
5255
5256define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5257; X86-LABEL: test_mm512_mask_add_ps_128:
5258; X86:       # %bb.0:
5259; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5260; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5261; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
5262; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5263; X86-NEXT:    retl # encoding: [0xc3]
5264;
5265; X64-LABEL: test_mm512_mask_add_ps_128:
5266; X64:       # %bb.0:
5267; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5268; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
5269; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5270; X64-NEXT:    retq # encoding: [0xc3]
5271  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5272  ret <4 x float> %res
5273}
5274
5275define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5276; CHECK-LABEL: test_mm512_add_ps_128:
5277; CHECK:       # %bb.0:
5278; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
5279; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5280  %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5281  ret <4 x float> %res
5282}
5283declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5284
5285define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5286; X86-LABEL: test_mm512_maskz_sub_ps_256:
5287; X86:       # %bb.0:
5288; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5289; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5290; X86-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
5291; X86-NEXT:    retl # encoding: [0xc3]
5292;
5293; X64-LABEL: test_mm512_maskz_sub_ps_256:
5294; X64:       # %bb.0:
5295; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5296; X64-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
5297; X64-NEXT:    retq # encoding: [0xc3]
5298  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5299  ret <8 x float> %res
5300}
5301
5302define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5303; X86-LABEL: test_mm512_mask_sub_ps_256:
5304; X86:       # %bb.0:
5305; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5306; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5307; X86-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
5308; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5309; X86-NEXT:    retl # encoding: [0xc3]
5310;
5311; X64-LABEL: test_mm512_mask_sub_ps_256:
5312; X64:       # %bb.0:
5313; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5314; X64-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
5315; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5316; X64-NEXT:    retq # encoding: [0xc3]
5317  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5318  ret <8 x float> %res
5319}
5320
5321define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5322; CHECK-LABEL: test_mm512_sub_ps_256:
5323; CHECK:       # %bb.0:
5324; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1]
5325; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5326  %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5327  ret <8 x float> %res
5328}
5329declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5330
5331define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5332; X86-LABEL: test_mm512_maskz_sub_ps_128:
5333; X86:       # %bb.0:
5334; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5335; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5336; X86-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
5337; X86-NEXT:    retl # encoding: [0xc3]
5338;
5339; X64-LABEL: test_mm512_maskz_sub_ps_128:
5340; X64:       # %bb.0:
5341; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5342; X64-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
5343; X64-NEXT:    retq # encoding: [0xc3]
5344  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5345  ret <4 x float> %res
5346}
5347
5348define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5349; X86-LABEL: test_mm512_mask_sub_ps_128:
5350; X86:       # %bb.0:
5351; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5352; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5353; X86-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
5354; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5355; X86-NEXT:    retl # encoding: [0xc3]
5356;
5357; X64-LABEL: test_mm512_mask_sub_ps_128:
5358; X64:       # %bb.0:
5359; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5360; X64-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
5361; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5362; X64-NEXT:    retq # encoding: [0xc3]
5363  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5364  ret <4 x float> %res
5365}
5366
5367define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5368; CHECK-LABEL: test_mm512_sub_ps_128:
5369; CHECK:       # %bb.0:
5370; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1]
5371; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5372  %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5373  ret <4 x float> %res
5374}
5375declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5376
5377define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5378; X86-LABEL: test_mm512_maskz_mul_ps_256:
5379; X86:       # %bb.0:
5380; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5381; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5382; X86-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
5383; X86-NEXT:    retl # encoding: [0xc3]
5384;
5385; X64-LABEL: test_mm512_maskz_mul_ps_256:
5386; X64:       # %bb.0:
5387; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5388; X64-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
5389; X64-NEXT:    retq # encoding: [0xc3]
5390  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5391  ret <8 x float> %res
5392}
5393
5394define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5395; X86-LABEL: test_mm512_mask_mul_ps_256:
5396; X86:       # %bb.0:
5397; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5398; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5399; X86-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
5400; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5401; X86-NEXT:    retl # encoding: [0xc3]
5402;
5403; X64-LABEL: test_mm512_mask_mul_ps_256:
5404; X64:       # %bb.0:
5405; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5406; X64-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
5407; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5408; X64-NEXT:    retq # encoding: [0xc3]
5409  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5410  ret <8 x float> %res
5411}
5412
5413define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5414; CHECK-LABEL: test_mm512_mul_ps_256:
5415; CHECK:       # %bb.0:
5416; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1]
5417; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5418  %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5419  ret <8 x float> %res
5420}
5421declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5422
5423define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5424; X86-LABEL: test_mm512_maskz_mul_ps_128:
5425; X86:       # %bb.0:
5426; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5427; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5428; X86-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
5429; X86-NEXT:    retl # encoding: [0xc3]
5430;
5431; X64-LABEL: test_mm512_maskz_mul_ps_128:
5432; X64:       # %bb.0:
5433; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5434; X64-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
5435; X64-NEXT:    retq # encoding: [0xc3]
5436  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5437  ret <4 x float> %res
5438}
5439
5440define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5441; X86-LABEL: test_mm512_mask_mul_ps_128:
5442; X86:       # %bb.0:
5443; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5444; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5445; X86-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
5446; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5447; X86-NEXT:    retl # encoding: [0xc3]
5448;
5449; X64-LABEL: test_mm512_mask_mul_ps_128:
5450; X64:       # %bb.0:
5451; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5452; X64-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
5453; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5454; X64-NEXT:    retq # encoding: [0xc3]
5455  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5456  ret <4 x float> %res
5457}
5458
5459define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5460; CHECK-LABEL: test_mm512_mul_ps_128:
5461; CHECK:       # %bb.0:
5462; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1]
5463; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5464  %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5465  ret <4 x float> %res
5466}
5467declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5468
5469define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5470; X86-LABEL: test_mm512_maskz_div_ps_256:
5471; X86:       # %bb.0:
5472; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5473; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5474; X86-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
5475; X86-NEXT:    retl # encoding: [0xc3]
5476;
5477; X64-LABEL: test_mm512_maskz_div_ps_256:
5478; X64:       # %bb.0:
5479; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5480; X64-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
5481; X64-NEXT:    retq # encoding: [0xc3]
5482  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
5483  ret <8 x float> %res
5484}
5485
5486define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
5487; X86-LABEL: test_mm512_mask_div_ps_256:
5488; X86:       # %bb.0:
5489; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5490; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5491; X86-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
5492; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5493; X86-NEXT:    retl # encoding: [0xc3]
5494;
5495; X64-LABEL: test_mm512_mask_div_ps_256:
5496; X64:       # %bb.0:
5497; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5498; X64-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
5499; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5500; X64-NEXT:    retq # encoding: [0xc3]
5501  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
5502  ret <8 x float> %res
5503}
5504
5505define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
5506; CHECK-LABEL: test_mm512_div_ps_256:
5507; CHECK:       # %bb.0:
5508; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1]
5509; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5510  %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
5511  ret <8 x float> %res
5512}
5513declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
5514
5515define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5516; X86-LABEL: test_mm512_maskz_div_ps_128:
5517; X86:       # %bb.0:
5518; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5519; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5520; X86-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
5521; X86-NEXT:    retl # encoding: [0xc3]
5522;
5523; X64-LABEL: test_mm512_maskz_div_ps_128:
5524; X64:       # %bb.0:
5525; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5526; X64-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
5527; X64-NEXT:    retq # encoding: [0xc3]
5528  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
5529  ret <4 x float> %res
5530}
5531
5532define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
5533; X86-LABEL: test_mm512_mask_div_ps_128:
5534; X86:       # %bb.0:
5535; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5536; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5537; X86-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
5538; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5539; X86-NEXT:    retl # encoding: [0xc3]
5540;
5541; X64-LABEL: test_mm512_mask_div_ps_128:
5542; X64:       # %bb.0:
5543; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5544; X64-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
5545; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5546; X64-NEXT:    retq # encoding: [0xc3]
5547  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
5548  ret <4 x float> %res
5549}
5550
5551define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
5552; CHECK-LABEL: test_mm512_div_ps_128:
5553; CHECK:       # %bb.0:
5554; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1]
5555; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5556  %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
5557  ret <4 x float> %res
5558}
5559declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
5560
5561declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
5562
5563define <8 x float>@test_int_x86_avx512_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3) {
5564; CHECK-LABEL: test_int_x86_avx512_shuf_f32x4_256:
5565; CHECK:       # %bb.0:
5566; CHECK-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5567; CHECK-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5568; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5569  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
5570  ret <8 x float> %res
5571}
5572
5573define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
5574; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
5575; X86:       # %bb.0:
5576; X86-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5577; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5578; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5579; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5580; X86-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x65,0xc0]
5581; X86-NEXT:    retl # encoding: [0xc3]
5582;
5583; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
5584; X64:       # %bb.0:
5585; X64-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5586; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5587; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5588; X64-NEXT:    vblendmps %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x65,0xc0]
5589; X64-NEXT:    retq # encoding: [0xc3]
5590  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
5591  ret <8 x float> %res
5592}
5593
5594define <8 x float>@test_int_x86_avx512_maskz_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, i8 %x4) {
5595; X86-LABEL: test_int_x86_avx512_maskz_shuf_f32x4_256:
5596; X86:       # %bb.0:
5597; X86-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5598; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5599; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5600; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5601; X86-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc0]
5602; X86-NEXT:    retl # encoding: [0xc3]
5603;
5604; X64-LABEL: test_int_x86_avx512_maskz_shuf_f32x4_256:
5605; X64:       # %bb.0:
5606; X64-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5607; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5608; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5609; X64-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc0]
5610; X64-NEXT:    retq # encoding: [0xc3]
5611  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
5612  ret <8 x float> %res
5613}
5614
5615declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
5616
5617define <4 x double>@test_int_x86_avx512_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5618; CHECK-LABEL: test_int_x86_avx512_shuf_f64x2_256:
5619; CHECK:       # %bb.0:
5620; CHECK-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5621; CHECK-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5622; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5623  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
5624  ret <4 x double> %res
5625}
5626
5627define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5628; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
5629; X86:       # %bb.0:
5630; X86-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5631; X86-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
5632; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5633; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5634; X86-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x65,0xc0]
5635; X86-NEXT:    retl # encoding: [0xc3]
5636;
5637; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
5638; X64:       # %bb.0:
5639; X64-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5640; X64-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
5641; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5642; X64-NEXT:    vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x65,0xc0]
5643; X64-NEXT:    retq # encoding: [0xc3]
5644  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
5645  ret <4 x double> %res
5646}
5647
5648define <4 x double>@test_int_x86_avx512_maskz_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, i8 %x4) {
5649; X86-LABEL: test_int_x86_avx512_maskz_shuf_f64x2_256:
5650; X86:       # %bb.0:
5651; X86-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5652; X86-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
5653; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5654; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5655; X86-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc0]
5656; X86-NEXT:    retl # encoding: [0xc3]
5657;
5658; X64-LABEL: test_int_x86_avx512_maskz_shuf_f64x2_256:
5659; X64:       # %bb.0:
5660; X64-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
5661; X64-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
5662; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5663; X64-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc0]
5664; X64-NEXT:    retq # encoding: [0xc3]
5665  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
5666  ret <4 x double> %res
5667}
5668
5669declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
5670
5671define <8 x i32>@test_int_x86_avx512_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
5672; CHECK-LABEL: test_int_x86_avx512_shuf_i32x4_256:
5673; CHECK:       # %bb.0:
5674; CHECK-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5675; CHECK-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5676; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5677  %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
5678  ret <8 x i32> %res
5679}
5680
5681define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
5682; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
5683; X86:       # %bb.0:
5684; X86-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5685; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5686; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5687; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5688; X86-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x64,0xc0]
5689; X86-NEXT:    retl # encoding: [0xc3]
5690;
5691; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
5692; X64:       # %bb.0:
5693; X64-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5694; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5695; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5696; X64-NEXT:    vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x64,0xc0]
5697; X64-NEXT:    retq # encoding: [0xc3]
5698  %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
5699  ret <8 x i32> %res
5700}
5701
5702declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
5703
5704define <4 x i64>@test_int_x86_avx512_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3) {
5705; CHECK-LABEL: test_int_x86_avx512_shuf_i64x2_256:
5706; CHECK:       # %bb.0:
5707; CHECK-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
5708; CHECK-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5709; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5710  %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
5711  ret <4 x i64> %res
5712}
5713
5714define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
5715; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
5716; X86:       # %bb.0:
5717; X86-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5718; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5719; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5720; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5721; X86-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0]
5722; X86-NEXT:    retl # encoding: [0xc3]
5723;
5724; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
5725; X64:       # %bb.0:
5726; X64-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
5727; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
5728; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5729; X64-NEXT:    vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x64,0xc0]
5730; X64-NEXT:    retq # encoding: [0xc3]
5731  %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
5732  ret <4 x i64> %res
5733}
5734
5735declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
5736
5737define <2 x double>@test_int_x86_avx512_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5738; CHECK-LABEL: test_int_x86_avx512_shuf_pd_128:
5739; CHECK:       # %bb.0:
5740; CHECK-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01]
5741; CHECK-NEXT:    # xmm0 = xmm0[1],xmm1[0]
5742; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5743  %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1)
5744  ret <2 x double> %res
5745}
5746
5747define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
5748; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
5749; X86:       # %bb.0:
5750; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5751; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5752; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
5753; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[0]
5754; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5755; X86-NEXT:    retl # encoding: [0xc3]
5756;
5757; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
5758; X64:       # %bb.0:
5759; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5760; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
5761; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[0]
5762; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
5763; X64-NEXT:    retq # encoding: [0xc3]
5764  %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4)
5765  ret <2 x double> %res
5766}
5767
5768define <2 x double>@test_int_x86_avx512_maskz_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, i8 %x4) {
5769; X86-LABEL: test_int_x86_avx512_maskz_shuf_pd_128:
5770; X86:       # %bb.0:
5771; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5772; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5773; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
5774; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
5775; X86-NEXT:    retl # encoding: [0xc3]
5776;
5777; X64-LABEL: test_int_x86_avx512_maskz_shuf_pd_128:
5778; X64:       # %bb.0:
5779; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5780; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
5781; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
5782; X64-NEXT:    retq # encoding: [0xc3]
5783  %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> zeroinitializer, i8 %x4)
5784  ret <2 x double> %res
5785}
5786
5787declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
5788
5789define <4 x double>@test_int_x86_avx512_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3) {
5790; CHECK-LABEL: test_int_x86_avx512_shuf_pd_256:
5791; CHECK:       # %bb.0:
5792; CHECK-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xc1,0x06]
5793; CHECK-NEXT:    # ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5794; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5795  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1)
5796  ret <4 x double> %res
5797}
5798
5799define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
5800; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
5801; X86:       # %bb.0:
5802; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5803; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5804; X86-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
5805; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5806; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5807; X86-NEXT:    retl # encoding: [0xc3]
5808;
5809; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
5810; X64:       # %bb.0:
5811; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5812; X64-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
5813; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
5814; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
5815; X64-NEXT:    retq # encoding: [0xc3]
5816  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4)
5817  ret <4 x double> %res
5818}
5819
5820declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
5821
5822define <4 x float>@test_int_x86_avx512_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3) {
5823; CHECK-LABEL: test_int_x86_avx512_shuf_ps_128:
5824; CHECK:       # %bb.0:
5825; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xc1,0x16]
5826; CHECK-NEXT:    # xmm0 = xmm0[2,1],xmm1[1,0]
5827; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5828  %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
5829  ret <4 x float> %res
5830}
5831
5832define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
5833; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
5834; X86:       # %bb.0:
5835; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5836; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5837; X86-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5838; X86-NEXT:    # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
5839; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5840; X86-NEXT:    retl # encoding: [0xc3]
5841;
5842; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
5843; X64:       # %bb.0:
5844; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5845; X64-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
5846; X64-NEXT:    # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
5847; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
5848; X64-NEXT:    retq # encoding: [0xc3]
5849  %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
5850  ret <4 x float> %res
5851}
5852
5853declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
5854
5855define <8 x float>@test_int_x86_avx512_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3) {
5856; CHECK-LABEL: test_int_x86_avx512_shuf_ps_256:
5857; CHECK:       # %bb.0:
5858; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xc1,0x16]
5859; CHECK-NEXT:    # ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5860; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5861  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
5862  ret <8 x float> %res
5863}
5864
5865define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
5866; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
5867; X86:       # %bb.0:
5868; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5869; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5870; X86-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5871; X86-NEXT:    # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5872; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5873; X86-NEXT:    retl # encoding: [0xc3]
5874;
5875; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
5876; X64:       # %bb.0:
5877; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5878; X64-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
5879; X64-NEXT:    # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
5880; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
5881; X64-NEXT:    retq # encoding: [0xc3]
5882  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
5883  ret <8 x float> %res
5884}
5885
5886declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
5887
5888define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
5889; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
5890; X86:       # %bb.0:
5891; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5892; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5893; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
5894; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5895; X86-NEXT:    retl # encoding: [0xc3]
5896;
5897; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
5898; X64:       # %bb.0:
5899; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5900; X64-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
5901; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5902; X64-NEXT:    retq # encoding: [0xc3]
5903  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
5904  ret <4 x i32> %res
5905}
5906
5907define <4 x i32>@test_int_x86_avx512_maskz_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
5908; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_d_128:
5909; X86:       # %bb.0:
5910; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5911; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5912; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
5913; X86-NEXT:    retl # encoding: [0xc3]
5914;
5915; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_d_128:
5916; X64:       # %bb.0:
5917; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5918; X64-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
5919; X64-NEXT:    retq # encoding: [0xc3]
5920  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
5921  ret <4 x i32> %res
5922}
5923
5924declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
5925
5926define <8 x i32>@test_int_x86_avx512_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
5927; CHECK-LABEL: test_int_x86_avx512_pmaxs_d_256:
5928; CHECK:       # %bb.0:
5929; CHECK-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
5930; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5931  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
5932  ret <8 x i32> %res
5933}
5934
5935define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
5936; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
5937; X86:       # %bb.0:
5938; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5939; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5940; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
5941; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5942; X86-NEXT:    retl # encoding: [0xc3]
5943;
5944; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
5945; X64:       # %bb.0:
5946; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5947; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
5948; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5949; X64-NEXT:    retq # encoding: [0xc3]
5950  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
5951  ret <8 x i32> %res
5952}
5953
5954declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5955
5956define <2 x i64>@test_int_x86_avx512_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
5957; CHECK-LABEL: test_int_x86_avx512_pmaxs_q_128:
5958; CHECK:       # %bb.0:
5959; CHECK-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xc1]
5960; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
5961  %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
5962  ret <2 x i64> %res
5963}
5964
5965define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
5966; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
5967; X86:       # %bb.0:
5968; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5969; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5970; X86-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
5971; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5972; X86-NEXT:    retl # encoding: [0xc3]
5973;
5974; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
5975; X64:       # %bb.0:
5976; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5977; X64-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
5978; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
5979; X64-NEXT:    retq # encoding: [0xc3]
5980  %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
5981  ret <2 x i64> %res
5982}
5983
5984declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
5985
5986define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
5987; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
5988; X86:       # %bb.0:
5989; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
5990; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
5991; X86-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
5992; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
5993; X86-NEXT:    retl # encoding: [0xc3]
5994;
5995; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
5996; X64:       # %bb.0:
5997; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
5998; X64-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
5999; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6000; X64-NEXT:    retq # encoding: [0xc3]
6001  %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
6002  ret <4 x i64> %res
6003}
6004
6005define <4 x i64>@test_int_x86_avx512_maskz_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) {
6006; X86-LABEL: test_int_x86_avx512_maskz_pmaxs_q_256:
6007; X86:       # %bb.0:
6008; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6009; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6010; X86-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
6011; X86-NEXT:    retl # encoding: [0xc3]
6012;
6013; X64-LABEL: test_int_x86_avx512_maskz_pmaxs_q_256:
6014; X64:       # %bb.0:
6015; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6016; X64-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
6017; X64-NEXT:    retq # encoding: [0xc3]
6018  %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
6019  ret <4 x i64> %res
6020}
6021
6022declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6023
6024define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
6025; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
6026; X86:       # %bb.0:
6027; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6028; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6029; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
6030; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6031; X86-NEXT:    retl # encoding: [0xc3]
6032;
6033; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
6034; X64:       # %bb.0:
6035; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6036; X64-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
6037; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6038; X64-NEXT:    retq # encoding: [0xc3]
6039  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
6040  ret <4 x i32> %res
6041}
6042
6043define <4 x i32>@test_int_x86_avx512_maskz_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
6044; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_d_128:
6045; X86:       # %bb.0:
6046; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6047; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6048; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
6049; X86-NEXT:    retl # encoding: [0xc3]
6050;
6051; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_d_128:
6052; X64:       # %bb.0:
6053; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6054; X64-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
6055; X64-NEXT:    retq # encoding: [0xc3]
6056  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
6057  ret <4 x i32> %res
6058}
6059
6060declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6061
6062define <8 x i32>@test_int_x86_avx512_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
6063; CHECK-LABEL: test_int_x86_avx512_pmaxu_d_256:
6064; CHECK:       # %bb.0:
6065; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
6066; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6067  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6068  ret <8 x i32> %res
6069}
6070
6071define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6072; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
6073; X86:       # %bb.0:
6074; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6075; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6076; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
6077; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6078; X86-NEXT:    retl # encoding: [0xc3]
6079;
6080; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
6081; X64:       # %bb.0:
6082; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6083; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
6084; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6085; X64-NEXT:    retq # encoding: [0xc3]
6086  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6087  ret <8 x i32> %res
6088}
6089
6090declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6091
6092define <2 x i64>@test_int_x86_avx512_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
6093; CHECK-LABEL: test_int_x86_avx512_pmaxu_q_128:
6094; CHECK:       # %bb.0:
6095; CHECK-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xc1]
6096; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6097  %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6098  ret <2 x i64> %res
6099}
6100
6101define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6102; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
6103; X86:       # %bb.0:
6104; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6105; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6106; X86-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
6107; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6108; X86-NEXT:    retl # encoding: [0xc3]
6109;
6110; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
6111; X64:       # %bb.0:
6112; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6113; X64-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
6114; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6115; X64-NEXT:    retq # encoding: [0xc3]
6116  %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6117  ret <2 x i64> %res
6118}
6119
6120declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6121
6122define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
6123; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
6124; X86:       # %bb.0:
6125; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6126; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6127; X86-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
6128; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6129; X86-NEXT:    retl # encoding: [0xc3]
6130;
6131; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
6132; X64:       # %bb.0:
6133; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6134; X64-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
6135; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6136; X64-NEXT:    retq # encoding: [0xc3]
6137  %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
6138  ret <4 x i64> %res
6139}
6140
6141define <4 x i64>@test_int_x86_avx512_maskz_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) {
6142; X86-LABEL: test_int_x86_avx512_maskz_pmaxu_q_256:
6143; X86:       # %bb.0:
6144; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6145; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6146; X86-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
6147; X86-NEXT:    retl # encoding: [0xc3]
6148;
6149; X64-LABEL: test_int_x86_avx512_maskz_pmaxu_q_256:
6150; X64:       # %bb.0:
6151; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6152; X64-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
6153; X64-NEXT:    retq # encoding: [0xc3]
6154  %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
6155  ret <4 x i64> %res
6156}
6157
6158declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6159
6160define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
6161; X86-LABEL: test_int_x86_avx512_mask_pmins_d_128:
6162; X86:       # %bb.0:
6163; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6164; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6165; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
6166; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6167; X86-NEXT:    retl # encoding: [0xc3]
6168;
6169; X64-LABEL: test_int_x86_avx512_mask_pmins_d_128:
6170; X64:       # %bb.0:
6171; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6172; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
6173; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6174; X64-NEXT:    retq # encoding: [0xc3]
6175  %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
6176  ret <4 x i32> %res
6177}
6178
6179define <4 x i32>@test_int_x86_avx512_maskz_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
6180; X86-LABEL: test_int_x86_avx512_maskz_pmins_d_128:
6181; X86:       # %bb.0:
6182; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6183; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6184; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
6185; X86-NEXT:    retl # encoding: [0xc3]
6186;
6187; X64-LABEL: test_int_x86_avx512_maskz_pmins_d_128:
6188; X64:       # %bb.0:
6189; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6190; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
6191; X64-NEXT:    retq # encoding: [0xc3]
6192  %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
6193  ret <4 x i32> %res
6194}
6195
6196declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6197
6198define <8 x i32>@test_int_x86_avx512_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
6199; CHECK-LABEL: test_int_x86_avx512_pmins_d_256:
6200; CHECK:       # %bb.0:
6201; CHECK-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
6202; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6203  %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6204  ret <8 x i32> %res
6205}
6206
6207define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6208; X86-LABEL: test_int_x86_avx512_mask_pmins_d_256:
6209; X86:       # %bb.0:
6210; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6211; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6212; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
6213; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6214; X86-NEXT:    retl # encoding: [0xc3]
6215;
6216; X64-LABEL: test_int_x86_avx512_mask_pmins_d_256:
6217; X64:       # %bb.0:
6218; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6219; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
6220; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6221; X64-NEXT:    retq # encoding: [0xc3]
6222  %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6223  ret <8 x i32> %res
6224}
6225
6226declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6227
6228define <2 x i64>@test_int_x86_avx512_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
6229; CHECK-LABEL: test_int_x86_avx512_pmins_q_128:
6230; CHECK:       # %bb.0:
6231; CHECK-NEXT:    vpminsq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xc1]
6232; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6233  %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6234  ret <2 x i64> %res
6235}
6236
6237define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6238; X86-LABEL: test_int_x86_avx512_mask_pmins_q_128:
6239; X86:       # %bb.0:
6240; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6241; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6242; X86-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
6243; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6244; X86-NEXT:    retl # encoding: [0xc3]
6245;
6246; X64-LABEL: test_int_x86_avx512_mask_pmins_q_128:
6247; X64:       # %bb.0:
6248; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6249; X64-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
6250; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6251; X64-NEXT:    retq # encoding: [0xc3]
6252  %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6253  ret <2 x i64> %res
6254}
6255
6256declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6257
6258define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
6259; X86-LABEL: test_int_x86_avx512_mask_pmins_q_256:
6260; X86:       # %bb.0:
6261; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6262; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6263; X86-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
6264; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6265; X86-NEXT:    retl # encoding: [0xc3]
6266;
6267; X64-LABEL: test_int_x86_avx512_mask_pmins_q_256:
6268; X64:       # %bb.0:
6269; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6270; X64-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
6271; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6272; X64-NEXT:    retq # encoding: [0xc3]
6273  %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
6274  ret <4 x i64> %res
6275}
6276
6277define <4 x i64>@test_int_x86_avx512_maskz_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) {
6278; X86-LABEL: test_int_x86_avx512_maskz_pmins_q_256:
6279; X86:       # %bb.0:
6280; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6281; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6282; X86-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
6283; X86-NEXT:    retl # encoding: [0xc3]
6284;
6285; X64-LABEL: test_int_x86_avx512_maskz_pmins_q_256:
6286; X64:       # %bb.0:
6287; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6288; X64-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
6289; X64-NEXT:    retq # encoding: [0xc3]
6290  %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
6291  ret <4 x i64> %res
6292}
6293
6294declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6295
6296define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
6297; X86-LABEL: test_int_x86_avx512_mask_pminu_d_128:
6298; X86:       # %bb.0:
6299; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6300; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6301; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
6302; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6303; X86-NEXT:    retl # encoding: [0xc3]
6304;
6305; X64-LABEL: test_int_x86_avx512_mask_pminu_d_128:
6306; X64:       # %bb.0:
6307; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6308; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
6309; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6310; X64-NEXT:    retq # encoding: [0xc3]
6311  %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
6312  ret <4 x i32> %res
6313}
6314
6315define <4 x i32>@test_int_x86_avx512_maskz_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
6316; X86-LABEL: test_int_x86_avx512_maskz_pminu_d_128:
6317; X86:       # %bb.0:
6318; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6319; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6320; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
6321; X86-NEXT:    retl # encoding: [0xc3]
6322;
6323; X64-LABEL: test_int_x86_avx512_maskz_pminu_d_128:
6324; X64:       # %bb.0:
6325; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6326; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
6327; X64-NEXT:    retq # encoding: [0xc3]
6328  %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
6329  ret <4 x i32> %res
6330}
6331
6332declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
6333
6334define <8 x i32>@test_int_x86_avx512_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
6335; CHECK-LABEL: test_int_x86_avx512_pminu_d_256:
6336; CHECK:       # %bb.0:
6337; CHECK-NEXT:    vpminud %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
6338; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6339  %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
6340  ret <8 x i32> %res
6341}
6342
6343define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6344; X86-LABEL: test_int_x86_avx512_mask_pminu_d_256:
6345; X86:       # %bb.0:
6346; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6347; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6348; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
6349; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6350; X86-NEXT:    retl # encoding: [0xc3]
6351;
6352; X64-LABEL: test_int_x86_avx512_mask_pminu_d_256:
6353; X64:       # %bb.0:
6354; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6355; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
6356; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6357; X64-NEXT:    retq # encoding: [0xc3]
6358  %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
6359  ret <8 x i32> %res
6360}
6361
6362declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6363
6364define <2 x i64>@test_int_x86_avx512_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
6365; CHECK-LABEL: test_int_x86_avx512_pminu_q_128:
6366; CHECK:       # %bb.0:
6367; CHECK-NEXT:    vpminuq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xc1]
6368; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6369  %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6370  ret <2 x i64> %res
6371}
6372
6373define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6374; X86-LABEL: test_int_x86_avx512_mask_pminu_q_128:
6375; X86:       # %bb.0:
6376; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6377; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6378; X86-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
6379; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6380; X86-NEXT:    retl # encoding: [0xc3]
6381;
6382; X64-LABEL: test_int_x86_avx512_mask_pminu_q_128:
6383; X64:       # %bb.0:
6384; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6385; X64-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
6386; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6387; X64-NEXT:    retq # encoding: [0xc3]
6388  %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6389  ret <2 x i64> %res
6390}
6391
6392declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
6393
6394define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
6395; X86-LABEL: test_int_x86_avx512_mask_pminu_q_256:
6396; X86:       # %bb.0:
6397; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6398; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6399; X86-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
6400; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6401; X86-NEXT:    retl # encoding: [0xc3]
6402;
6403; X64-LABEL: test_int_x86_avx512_mask_pminu_q_256:
6404; X64:       # %bb.0:
6405; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6406; X64-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
6407; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6408; X64-NEXT:    retq # encoding: [0xc3]
6409  %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
6410  ret <4 x i64> %res
6411}
6412
6413define <4 x i64>@test_int_x86_avx512_maskz_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %mask) {
6414; X86-LABEL: test_int_x86_avx512_maskz_pminu_q_256:
6415; X86:       # %bb.0:
6416; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6417; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6418; X86-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
6419; X86-NEXT:    retl # encoding: [0xc3]
6420;
6421; X64-LABEL: test_int_x86_avx512_maskz_pminu_q_256:
6422; X64:       # %bb.0:
6423; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6424; X64-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
6425; X64-NEXT:    retq # encoding: [0xc3]
6426  %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
6427  ret <4 x i64> %res
6428}
6429
6430declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
6431
6432define <2 x i64>@test_int_x86_avx512_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
6433; CHECK-LABEL: test_int_x86_avx512_psrl_q_128:
6434; CHECK:       # %bb.0:
6435; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1]
6436; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6437  %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
6438  ret <2 x i64> %res
6439}
6440
6441define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
6442; X86-LABEL: test_int_x86_avx512_mask_psrl_q_128:
6443; X86:       # %bb.0:
6444; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6445; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6446; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
6447; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6448; X86-NEXT:    retl # encoding: [0xc3]
6449;
6450; X64-LABEL: test_int_x86_avx512_mask_psrl_q_128:
6451; X64:       # %bb.0:
6452; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6453; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
6454; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6455; X64-NEXT:    retq # encoding: [0xc3]
6456  %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
6457  ret <2 x i64> %res
6458}
6459
6460define <2 x i64>@test_int_x86_avx512_maskz_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
6461; X86-LABEL: test_int_x86_avx512_maskz_psrl_q_128:
6462; X86:       # %bb.0:
6463; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6464; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6465; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
6466; X86-NEXT:    retl # encoding: [0xc3]
6467;
6468; X64-LABEL: test_int_x86_avx512_maskz_psrl_q_128:
6469; X64:       # %bb.0:
6470; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6471; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
6472; X64-NEXT:    retq # encoding: [0xc3]
6473  %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
6474  ret <2 x i64> %res
6475}
6476
6477declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
6478
6479define <4 x i64>@test_int_x86_avx512_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2) {
6480; CHECK-LABEL: test_int_x86_avx512_psrl_q_256:
6481; CHECK:       # %bb.0:
6482; CHECK-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1]
6483; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6484  %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
6485  ret <4 x i64> %res
6486}
6487
6488define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6489; X86-LABEL: test_int_x86_avx512_mask_psrl_q_256:
6490; X86:       # %bb.0:
6491; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6492; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6493; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
6494; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6495; X86-NEXT:    retl # encoding: [0xc3]
6496;
6497; X64-LABEL: test_int_x86_avx512_mask_psrl_q_256:
6498; X64:       # %bb.0:
6499; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6500; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
6501; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6502; X64-NEXT:    retq # encoding: [0xc3]
6503  %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
6504  ret <4 x i64> %res
6505}
6506
6507define <4 x i64>@test_int_x86_avx512_maskz_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x3) {
6508; X86-LABEL: test_int_x86_avx512_maskz_psrl_q_256:
6509; X86:       # %bb.0:
6510; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6511; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6512; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
6513; X86-NEXT:    retl # encoding: [0xc3]
6514;
6515; X64-LABEL: test_int_x86_avx512_maskz_psrl_q_256:
6516; X64:       # %bb.0:
6517; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6518; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
6519; X64-NEXT:    retq # encoding: [0xc3]
6520  %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6521  ret <4 x i64> %res
6522}
6523
6524declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6525
6526define <4 x i32>@test_int_x86_avx512_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
6527; CHECK-LABEL: test_int_x86_avx512_psrl_d_128:
6528; CHECK:       # %bb.0:
6529; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1]
6530; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6531  %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6532  ret <4 x i32> %res
6533}
6534
6535define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6536; X86-LABEL: test_int_x86_avx512_mask_psrl_d_128:
6537; X86:       # %bb.0:
6538; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6539; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6540; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
6541; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6542; X86-NEXT:    retl # encoding: [0xc3]
6543;
6544; X64-LABEL: test_int_x86_avx512_mask_psrl_d_128:
6545; X64:       # %bb.0:
6546; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6547; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
6548; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6549; X64-NEXT:    retq # encoding: [0xc3]
6550  %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6551  ret <4 x i32> %res
6552}
6553
6554define <4 x i32>@test_int_x86_avx512_maskz_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
6555; X86-LABEL: test_int_x86_avx512_maskz_psrl_d_128:
6556; X86:       # %bb.0:
6557; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6558; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6559; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
6560; X86-NEXT:    retl # encoding: [0xc3]
6561;
6562; X64-LABEL: test_int_x86_avx512_maskz_psrl_d_128:
6563; X64:       # %bb.0:
6564; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6565; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
6566; X64-NEXT:    retq # encoding: [0xc3]
6567  %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6568  ret <4 x i32> %res
6569}
6570
6571declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6572
6573define <8 x i32>@test_int_x86_avx512_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2) {
6574; CHECK-LABEL: test_int_x86_avx512_psrl_d_256:
6575; CHECK:       # %bb.0:
6576; CHECK-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1]
6577; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6578  %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6579  ret <8 x i32> %res
6580}
6581
6582define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6583; X86-LABEL: test_int_x86_avx512_mask_psrl_d_256:
6584; X86:       # %bb.0:
6585; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6586; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6587; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
6588; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6589; X86-NEXT:    retl # encoding: [0xc3]
6590;
6591; X64-LABEL: test_int_x86_avx512_mask_psrl_d_256:
6592; X64:       # %bb.0:
6593; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6594; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
6595; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6596; X64-NEXT:    retq # encoding: [0xc3]
6597  %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6598  ret <8 x i32> %res
6599}
6600
6601define <8 x i32>@test_int_x86_avx512_maskz_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x3) {
6602; X86-LABEL: test_int_x86_avx512_maskz_psrl_d_256:
6603; X86:       # %bb.0:
6604; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6605; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6606; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
6607; X86-NEXT:    retl # encoding: [0xc3]
6608;
6609; X64-LABEL: test_int_x86_avx512_maskz_psrl_d_256:
6610; X64:       # %bb.0:
6611; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6612; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
6613; X64-NEXT:    retq # encoding: [0xc3]
6614  %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6615  ret <8 x i32> %res
6616}
6617
6618declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6619
6620define <4 x i32>@test_int_x86_avx512_ask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
6621; CHECK-LABEL: test_int_x86_avx512_ask_psra_d_128:
6622; CHECK:       # %bb.0:
6623; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1]
6624; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6625  %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6626  ret <4 x i32> %res
6627}
6628
6629define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6630; X86-LABEL: test_int_x86_avx512_mask_psra_d_128:
6631; X86:       # %bb.0:
6632; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6633; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6634; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
6635; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6636; X86-NEXT:    retl # encoding: [0xc3]
6637;
6638; X64-LABEL: test_int_x86_avx512_mask_psra_d_128:
6639; X64:       # %bb.0:
6640; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6641; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
6642; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6643; X64-NEXT:    retq # encoding: [0xc3]
6644  %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6645  ret <4 x i32> %res
6646}
6647
6648define <4 x i32>@test_int_x86_avx512_maskz_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
6649; X86-LABEL: test_int_x86_avx512_maskz_psra_d_128:
6650; X86:       # %bb.0:
6651; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6652; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6653; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
6654; X86-NEXT:    retl # encoding: [0xc3]
6655;
6656; X64-LABEL: test_int_x86_avx512_maskz_psra_d_128:
6657; X64:       # %bb.0:
6658; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6659; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
6660; X64-NEXT:    retq # encoding: [0xc3]
6661  %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6662  ret <4 x i32> %res
6663}
6664
6665declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6666
6667define <8 x i32>@test_int_x86_avx512_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2) {
6668; CHECK-LABEL: test_int_x86_avx512_psra_d_256:
6669; CHECK:       # %bb.0:
6670; CHECK-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1]
6671; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6672  %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6673  ret <8 x i32> %res
6674}
6675
6676define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6677; X86-LABEL: test_int_x86_avx512_mask_psra_d_256:
6678; X86:       # %bb.0:
6679; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6680; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6681; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
6682; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6683; X86-NEXT:    retl # encoding: [0xc3]
6684;
6685; X64-LABEL: test_int_x86_avx512_mask_psra_d_256:
6686; X64:       # %bb.0:
6687; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6688; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
6689; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6690; X64-NEXT:    retq # encoding: [0xc3]
6691  %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6692  ret <8 x i32> %res
6693}
6694
6695define <8 x i32>@test_int_x86_avx512_maskz_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x3) {
6696; X86-LABEL: test_int_x86_avx512_maskz_psra_d_256:
6697; X86:       # %bb.0:
6698; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6699; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6700; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
6701; X86-NEXT:    retl # encoding: [0xc3]
6702;
6703; X64-LABEL: test_int_x86_avx512_maskz_psra_d_256:
6704; X64:       # %bb.0:
6705; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6706; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
6707; X64-NEXT:    retq # encoding: [0xc3]
6708  %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6709  ret <8 x i32> %res
6710}
6711
6712declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
6713
6714define <4 x i32>@test_int_x86_avx512_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
6715; CHECK-LABEL: test_int_x86_avx512_psll_d_128:
6716; CHECK:       # %bb.0:
6717; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1]
6718; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6719  %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
6720  ret <4 x i32> %res
6721}
6722
6723define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
6724; X86-LABEL: test_int_x86_avx512_mask_psll_d_128:
6725; X86:       # %bb.0:
6726; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6727; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6728; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
6729; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6730; X86-NEXT:    retl # encoding: [0xc3]
6731;
6732; X64-LABEL: test_int_x86_avx512_mask_psll_d_128:
6733; X64:       # %bb.0:
6734; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6735; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
6736; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
6737; X64-NEXT:    retq # encoding: [0xc3]
6738  %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
6739  ret <4 x i32> %res
6740}
6741
6742define <4 x i32>@test_int_x86_avx512_maskz_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
6743; X86-LABEL: test_int_x86_avx512_maskz_psll_d_128:
6744; X86:       # %bb.0:
6745; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6746; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6747; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
6748; X86-NEXT:    retl # encoding: [0xc3]
6749;
6750; X64-LABEL: test_int_x86_avx512_maskz_psll_d_128:
6751; X64:       # %bb.0:
6752; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6753; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
6754; X64-NEXT:    retq # encoding: [0xc3]
6755  %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
6756  ret <4 x i32> %res
6757}
6758
6759declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
6760
6761define <8 x i32>@test_int_x86_avx512_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2) {
6762; CHECK-LABEL: test_int_x86_avx512_psll_d_256:
6763; CHECK:       # %bb.0:
6764; CHECK-NEXT:    vpslld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1]
6765; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6766  %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
6767  ret <8 x i32> %res
6768}
6769
6770define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
6771; X86-LABEL: test_int_x86_avx512_mask_psll_d_256:
6772; X86:       # %bb.0:
6773; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6774; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6775; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
6776; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6777; X86-NEXT:    retl # encoding: [0xc3]
6778;
6779; X64-LABEL: test_int_x86_avx512_mask_psll_d_256:
6780; X64:       # %bb.0:
6781; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6782; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
6783; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6784; X64-NEXT:    retq # encoding: [0xc3]
6785  %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
6786  ret <8 x i32> %res
6787}
6788
6789define <8 x i32>@test_int_x86_avx512_maskz_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x3) {
6790; X86-LABEL: test_int_x86_avx512_maskz_psll_d_256:
6791; X86:       # %bb.0:
6792; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6793; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6794; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
6795; X86-NEXT:    retl # encoding: [0xc3]
6796;
6797; X64-LABEL: test_int_x86_avx512_maskz_psll_d_256:
6798; X64:       # %bb.0:
6799; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6800; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
6801; X64-NEXT:    retq # encoding: [0xc3]
6802  %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
6803  ret <8 x i32> %res
6804}
6805
6806declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
6807
6808define <4 x i64>@test_int_x86_avx512_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2) {
6809; CHECK-LABEL: test_int_x86_avx512_psll_q_256:
6810; CHECK:       # %bb.0:
6811; CHECK-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1]
6812; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
6813  %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
6814  ret <4 x i64> %res
6815}
6816
6817define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
6818; X86-LABEL: test_int_x86_avx512_mask_psll_q_256:
6819; X86:       # %bb.0:
6820; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6821; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6822; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
6823; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6824; X86-NEXT:    retl # encoding: [0xc3]
6825;
6826; X64-LABEL: test_int_x86_avx512_mask_psll_q_256:
6827; X64:       # %bb.0:
6828; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6829; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
6830; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
6831; X64-NEXT:    retq # encoding: [0xc3]
6832  %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
6833  ret <4 x i64> %res
6834}
6835
6836define <4 x i64>@test_int_x86_avx512_maskz_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x3) {
6837; X86-LABEL: test_int_x86_avx512_maskz_psll_q_256:
6838; X86:       # %bb.0:
6839; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
6840; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6841; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
6842; X86-NEXT:    retl # encoding: [0xc3]
6843;
6844; X64-LABEL: test_int_x86_avx512_maskz_psll_q_256:
6845; X64:       # %bb.0:
6846; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
6847; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
6848; X64-NEXT:    retq # encoding: [0xc3]
6849  %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
6850  ret <4 x i64> %res
6851}
6852
6853declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8)
6854
6855define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
6856; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
6857; X86:       # %bb.0:
6858; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6859; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6860; X86-NEXT:    vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
6861; X86-NEXT:    vpsrlq $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x04]
6862; X86-NEXT:    vpsrlq $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x05]
6863; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6864; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6865; X86-NEXT:    retl # encoding: [0xc3]
6866;
6867; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
6868; X64:       # %bb.0:
6869; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6870; X64-NEXT:    vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
6871; X64-NEXT:    vpsrlq $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x04]
6872; X64-NEXT:    vpsrlq $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x05]
6873; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
6874; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
6875; X64-NEXT:    retq # encoding: [0xc3]
6876  %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
6877  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 4, <2 x i64> %x2, i8 -1)
6878  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 5, <2 x i64> zeroinitializer, i8 %x3)
6879  %res3 = add <2 x i64> %res, %res1
6880  %res4 = add <2 x i64> %res2, %res3
6881  ret <2 x i64> %res4
6882}
6883
6884declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8)
6885
6886define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
6887; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
6888; X86:       # %bb.0:
6889; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6890; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6891; X86-NEXT:    vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
6892; X86-NEXT:    vpsrlq $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x04]
6893; X86-NEXT:    vpsrlq $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x05]
6894; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6895; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6896; X86-NEXT:    retl # encoding: [0xc3]
6897;
6898; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
6899; X64:       # %bb.0:
6900; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6901; X64-NEXT:    vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
6902; X64-NEXT:    vpsrlq $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x04]
6903; X64-NEXT:    vpsrlq $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x05]
6904; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
6905; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
6906; X64-NEXT:    retq # encoding: [0xc3]
6907  %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
6908  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 4, <4 x i64> %x2, i8 -1)
6909  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 5, <4 x i64> zeroinitializer, i8 %x3)
6910  %res3 = add <4 x i64> %res, %res1
6911  %res4 = add <4 x i64> %res2, %res3
6912  ret <4 x i64> %res4
6913}
6914
6915declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8)
6916
6917define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6918; X86-LABEL: test_int_x86_avx512_mask_psrl_di_128:
6919; X86:       # %bb.0:
6920; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6921; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6922; X86-NEXT:    vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
6923; X86-NEXT:    vpsrld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x04]
6924; X86-NEXT:    vpsrld $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x05]
6925; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6926; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6927; X86-NEXT:    retl # encoding: [0xc3]
6928;
6929; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128:
6930; X64:       # %bb.0:
6931; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6932; X64-NEXT:    vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
6933; X64-NEXT:    vpsrld $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x04]
6934; X64-NEXT:    vpsrld $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x05]
6935; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6936; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6937; X64-NEXT:    retq # encoding: [0xc3]
6938  %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
6939  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 4, <4 x i32> %x2, i8 -1)
6940  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 5, <4 x i32> zeroinitializer, i8 %x3)
6941  %res3 = add <4 x i32> %res, %res1
6942  %res4 = add <4 x i32> %res2, %res3
6943  ret <4 x i32> %res4
6944}
6945
6946declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8)
6947
6948define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
6949; X86-LABEL: test_int_x86_avx512_mask_psrl_di_256:
6950; X86:       # %bb.0:
6951; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6952; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6953; X86-NEXT:    vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
6954; X86-NEXT:    vpsrld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x04]
6955; X86-NEXT:    vpsrld $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x05]
6956; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6957; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6958; X86-NEXT:    retl # encoding: [0xc3]
6959;
6960; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256:
6961; X64:       # %bb.0:
6962; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6963; X64-NEXT:    vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
6964; X64-NEXT:    vpsrld $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x04]
6965; X64-NEXT:    vpsrld $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x05]
6966; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
6967; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
6968; X64-NEXT:    retq # encoding: [0xc3]
6969  %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
6970  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 4, <8 x i32> %x2, i8 -1)
6971  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 5, <8 x i32> zeroinitializer, i8 %x3)
6972  %res3 = add <8 x i32> %res, %res1
6973  %res4 = add <8 x i32> %res2, %res3
6974  ret <8 x i32> %res4
6975}
6976
6977declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8)
6978
6979define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
6980; X86-LABEL: test_int_x86_avx512_mask_psll_di_128:
6981; X86:       # %bb.0:
6982; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
6983; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
6984; X86-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
6985; X86-NEXT:    vpslld $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x04]
6986; X86-NEXT:    vpslld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x05]
6987; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6988; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6989; X86-NEXT:    retl # encoding: [0xc3]
6990;
6991; X64-LABEL: test_int_x86_avx512_mask_psll_di_128:
6992; X64:       # %bb.0:
6993; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
6994; X64-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
6995; X64-NEXT:    vpslld $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xf0,0x04]
6996; X64-NEXT:    vpslld $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x05]
6997; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
6998; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
6999; X64-NEXT:    retq # encoding: [0xc3]
7000  %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
7001  %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 4, <4 x i32> zeroinitializer, i8 %x3)
7002  %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 5, <4 x i32> %x2, i8 -1)
7003  %res3 = add <4 x i32> %res, %res1
7004  %res4 = add <4 x i32> %res3, %res2
7005  ret <4 x i32> %res4
7006}
7007
7008declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8)
7009
7010define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
7011; X86-LABEL: test_int_x86_avx512_mask_psll_di_256:
7012; X86:       # %bb.0:
7013; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
7014; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7015; X86-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
7016; X86-NEXT:    vpslld $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x04]
7017; X86-NEXT:    vpslld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x05]
7018; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
7019; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7020; X86-NEXT:    retl # encoding: [0xc3]
7021;
7022; X64-LABEL: test_int_x86_avx512_mask_psll_di_256:
7023; X64:       # %bb.0:
7024; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
7025; X64-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
7026; X64-NEXT:    vpslld $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xf0,0x04]
7027; X64-NEXT:    vpslld $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x05]
7028; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
7029; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
7030; X64-NEXT:    retq # encoding: [0xc3]
7031  %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
7032  %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 4, <8 x i32> zeroinitializer, i8 %x3)
7033  %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 5, <8 x i32> %x2, i8 -1)
7034  %res3 = add <8 x i32> %res, %res1
7035  %res4 = add <8 x i32> %res3, %res2
7036  ret <8 x i32> %res4
7037}
7038
7039declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
7040
7041define <2 x i64>@test_int_x86_avx512_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
7042; CHECK-LABEL: test_int_x86_avx512_psrlv2_di:
7043; CHECK:       # %bb.0:
7044; CHECK-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
7045; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7046  %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
7047  ret <2 x i64> %res
7048}
7049
7050define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7051; X86-LABEL: test_int_x86_avx512_mask_psrlv2_di:
7052; X86:       # %bb.0:
7053; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7054; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7055; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
7056; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7057; X86-NEXT:    retl # encoding: [0xc3]
7058;
7059; X64-LABEL: test_int_x86_avx512_mask_psrlv2_di:
7060; X64:       # %bb.0:
7061; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7062; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
7063; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7064; X64-NEXT:    retq # encoding: [0xc3]
7065  %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
7066  ret <2 x i64> %res
7067}
7068
7069define <2 x i64>@test_int_x86_avx512_maskz_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
7070; X86-LABEL: test_int_x86_avx512_maskz_psrlv2_di:
7071; X86:       # %bb.0:
7072; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7073; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7074; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
7075; X86-NEXT:    retl # encoding: [0xc3]
7076;
7077; X64-LABEL: test_int_x86_avx512_maskz_psrlv2_di:
7078; X64:       # %bb.0:
7079; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7080; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
7081; X64-NEXT:    retq # encoding: [0xc3]
7082  %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
7083  ret <2 x i64> %res
7084}
7085
7086declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
7087
7088define <4 x i64>@test_int_x86_avx512_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
7089; CHECK-LABEL: test_int_x86_avx512_psrlv4_di:
7090; CHECK:       # %bb.0:
7091; CHECK-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
7092; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7093  %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
7094  ret <4 x i64> %res
7095}
7096
7097define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
7098; X86-LABEL: test_int_x86_avx512_mask_psrlv4_di:
7099; X86:       # %bb.0:
7100; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7101; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7102; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
7103; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7104; X86-NEXT:    retl # encoding: [0xc3]
7105;
7106; X64-LABEL: test_int_x86_avx512_mask_psrlv4_di:
7107; X64:       # %bb.0:
7108; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7109; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
7110; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7111; X64-NEXT:    retq # encoding: [0xc3]
7112  %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
7113  ret <4 x i64> %res
7114}
7115
7116define <4 x i64>@test_int_x86_avx512_maskz_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
7117; X86-LABEL: test_int_x86_avx512_maskz_psrlv4_di:
7118; X86:       # %bb.0:
7119; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7120; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7121; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
7122; X86-NEXT:    retl # encoding: [0xc3]
7123;
7124; X64-LABEL: test_int_x86_avx512_maskz_psrlv4_di:
7125; X64:       # %bb.0:
7126; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7127; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
7128; X64-NEXT:    retq # encoding: [0xc3]
7129  %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
7130  ret <4 x i64> %res
7131}
7132
7133declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
7134
7135define <4 x i32>@test_int_x86_avx512_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
7136; CHECK-LABEL: test_int_x86_avx512_psrlv4_si:
7137; CHECK:       # %bb.0:
7138; CHECK-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1]
7139; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7140  %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
7141  ret <4 x i32> %res
7142}
7143
7144define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
7145; X86-LABEL: test_int_x86_avx512_mask_psrlv4_si:
7146; X86:       # %bb.0:
7147; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7148; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7149; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
7150; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7151; X86-NEXT:    retl # encoding: [0xc3]
7152;
7153; X64-LABEL: test_int_x86_avx512_mask_psrlv4_si:
7154; X64:       # %bb.0:
7155; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7156; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
7157; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7158; X64-NEXT:    retq # encoding: [0xc3]
7159  %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
7160  ret <4 x i32> %res
7161}
7162
7163define <4 x i32>@test_int_x86_avx512_maskz_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
7164; X86-LABEL: test_int_x86_avx512_maskz_psrlv4_si:
7165; X86:       # %bb.0:
7166; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7167; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7168; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
7169; X86-NEXT:    retl # encoding: [0xc3]
7170;
7171; X64-LABEL: test_int_x86_avx512_maskz_psrlv4_si:
7172; X64:       # %bb.0:
7173; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7174; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
7175; X64-NEXT:    retq # encoding: [0xc3]
7176  %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
7177  ret <4 x i32> %res
7178}
7179
7180declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
7181
7182define <8 x i32>@test_int_x86_avx512_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
7183; CHECK-LABEL: test_int_x86_avx512_psrlv8_si:
7184; CHECK:       # %bb.0:
7185; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
7186; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7187  %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
7188  ret <8 x i32> %res
7189}
7190
7191define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
7192; X86-LABEL: test_int_x86_avx512_mask_psrlv8_si:
7193; X86:       # %bb.0:
7194; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7195; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7196; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
7197; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7198; X86-NEXT:    retl # encoding: [0xc3]
7199;
7200; X64-LABEL: test_int_x86_avx512_mask_psrlv8_si:
7201; X64:       # %bb.0:
7202; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7203; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
7204; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7205; X64-NEXT:    retq # encoding: [0xc3]
7206  %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
7207  ret <8 x i32> %res
7208}
7209
7210define <8 x i32>@test_int_x86_avx512_maskz_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
7211; X86-LABEL: test_int_x86_avx512_maskz_psrlv8_si:
7212; X86:       # %bb.0:
7213; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7214; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7215; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
7216; X86-NEXT:    retl # encoding: [0xc3]
7217;
7218; X64-LABEL: test_int_x86_avx512_maskz_psrlv8_si:
7219; X64:       # %bb.0:
7220; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7221; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
7222; X64-NEXT:    retq # encoding: [0xc3]
7223  %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
7224  ret <8 x i32> %res
7225}
7226
7227declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
7228
7229define <4 x i32>@test_int_x86_avx512_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
7230; CHECK-LABEL: test_int_x86_avx512_psrav4_si:
7231; CHECK:       # %bb.0:
7232; CHECK-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1]
7233; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7234  %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
7235  ret <4 x i32> %res
7236}
7237
7238define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
7239; X86-LABEL: test_int_x86_avx512_mask_psrav4_si:
7240; X86:       # %bb.0:
7241; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7242; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7243; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
7244; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7245; X86-NEXT:    retl # encoding: [0xc3]
7246;
7247; X64-LABEL: test_int_x86_avx512_mask_psrav4_si:
7248; X64:       # %bb.0:
7249; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7250; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
7251; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7252; X64-NEXT:    retq # encoding: [0xc3]
7253  %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
7254  ret <4 x i32> %res
7255}
7256
7257define <4 x i32>@test_int_x86_avx512_maskz_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
7258; X86-LABEL: test_int_x86_avx512_maskz_psrav4_si:
7259; X86:       # %bb.0:
7260; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7261; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7262; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
7263; X86-NEXT:    retl # encoding: [0xc3]
7264;
7265; X64-LABEL: test_int_x86_avx512_maskz_psrav4_si:
7266; X64:       # %bb.0:
7267; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7268; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
7269; X64-NEXT:    retq # encoding: [0xc3]
7270  %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
7271  ret <4 x i32> %res
7272}
7273
7274declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
7275
7276define <8 x i32>@test_int_x86_avx512_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
7277; CHECK-LABEL: test_int_x86_avx512_psrav8_si:
7278; CHECK:       # %bb.0:
7279; CHECK-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
7280; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7281  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
7282  ret <8 x i32> %res
7283}
7284
7285define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
7286; X86-LABEL: test_int_x86_avx512_mask_psrav8_si:
7287; X86:       # %bb.0:
7288; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7289; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7290; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
7291; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7292; X86-NEXT:    retl # encoding: [0xc3]
7293;
7294; X64-LABEL: test_int_x86_avx512_mask_psrav8_si:
7295; X64:       # %bb.0:
7296; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7297; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
7298; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7299; X64-NEXT:    retq # encoding: [0xc3]
7300  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
7301  ret <8 x i32> %res
7302}
7303
7304define <8 x i32>@test_int_x86_avx512_maskz_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
7305; X86-LABEL: test_int_x86_avx512_maskz_psrav8_si:
7306; X86:       # %bb.0:
7307; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7308; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7309; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
7310; X86-NEXT:    retl # encoding: [0xc3]
7311;
7312; X64-LABEL: test_int_x86_avx512_maskz_psrav8_si:
7313; X64:       # %bb.0:
7314; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7315; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
7316; X64-NEXT:    retq # encoding: [0xc3]
7317  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
7318  ret <8 x i32> %res
7319}
7320
7321define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
7322; X86-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
7323; X86:       # %bb.0:
7324; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
7325; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
7326; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
7327; X86-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
7328; X86-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
7329; X86-NEXT:    retl # encoding: [0xc3]
7330;
7331; X64-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
7332; X64:       # %bb.0:
7333; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
7334; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
7335; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
7336; X64-NEXT:    vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
7337; X64-NEXT:    # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
7338; X64-NEXT:    retq # encoding: [0xc3]
7339  %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
7340  ret <8 x i32> %res
7341}
7342
7343declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
7344
7345define <2 x i64>@test_int_x86_avx512_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
7346; CHECK-LABEL: test_int_x86_avx512_psllv2_di:
7347; CHECK:       # %bb.0:
7348; CHECK-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
7349; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7350  %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
7351  ret <2 x i64> %res
7352}
7353
7354define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7355; X86-LABEL: test_int_x86_avx512_mask_psllv2_di:
7356; X86:       # %bb.0:
7357; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7358; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7359; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
7360; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7361; X86-NEXT:    retl # encoding: [0xc3]
7362;
7363; X64-LABEL: test_int_x86_avx512_mask_psllv2_di:
7364; X64:       # %bb.0:
7365; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7366; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
7367; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7368; X64-NEXT:    retq # encoding: [0xc3]
7369  %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
7370  ret <2 x i64> %res
7371}
7372
7373define <2 x i64>@test_int_x86_avx512_maskz_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
7374; X86-LABEL: test_int_x86_avx512_maskz_psllv2_di:
7375; X86:       # %bb.0:
7376; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7377; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7378; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
7379; X86-NEXT:    retl # encoding: [0xc3]
7380;
7381; X64-LABEL: test_int_x86_avx512_maskz_psllv2_di:
7382; X64:       # %bb.0:
7383; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7384; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
7385; X64-NEXT:    retq # encoding: [0xc3]
7386  %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
7387  ret <2 x i64> %res
7388}
7389
7390declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
7391
7392define <4 x i64>@test_int_x86_avx512_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
7393; CHECK-LABEL: test_int_x86_avx512_psllv4_di:
7394; CHECK:       # %bb.0:
7395; CHECK-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
7396; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7397  %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
7398  ret <4 x i64> %res
7399}
7400
7401define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
7402; X86-LABEL: test_int_x86_avx512_mask_psllv4_di:
7403; X86:       # %bb.0:
7404; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7405; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7406; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
7407; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7408; X86-NEXT:    retl # encoding: [0xc3]
7409;
7410; X64-LABEL: test_int_x86_avx512_mask_psllv4_di:
7411; X64:       # %bb.0:
7412; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7413; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
7414; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7415; X64-NEXT:    retq # encoding: [0xc3]
7416  %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
7417  ret <4 x i64> %res
7418}
7419
7420define <4 x i64>@test_int_x86_avx512_maskz_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
7421; X86-LABEL: test_int_x86_avx512_maskz_psllv4_di:
7422; X86:       # %bb.0:
7423; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7424; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7425; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
7426; X86-NEXT:    retl # encoding: [0xc3]
7427;
7428; X64-LABEL: test_int_x86_avx512_maskz_psllv4_di:
7429; X64:       # %bb.0:
7430; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7431; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
7432; X64-NEXT:    retq # encoding: [0xc3]
7433  %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
7434  ret <4 x i64> %res
7435}
7436
7437declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
7438
7439define <4 x i32>@test_int_x86_avx512_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
7440; CHECK-LABEL: test_int_x86_avx512_psllv4_si:
7441; CHECK:       # %bb.0:
7442; CHECK-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1]
7443; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7444  %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
7445  ret <4 x i32> %res
7446}
7447
7448define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
7449; X86-LABEL: test_int_x86_avx512_mask_psllv4_si:
7450; X86:       # %bb.0:
7451; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7452; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7453; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
7454; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7455; X86-NEXT:    retl # encoding: [0xc3]
7456;
7457; X64-LABEL: test_int_x86_avx512_mask_psllv4_si:
7458; X64:       # %bb.0:
7459; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7460; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
7461; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
7462; X64-NEXT:    retq # encoding: [0xc3]
7463  %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
7464  ret <4 x i32> %res
7465}
7466
7467define <4 x i32>@test_int_x86_avx512_maskz_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
7468; X86-LABEL: test_int_x86_avx512_maskz_psllv4_si:
7469; X86:       # %bb.0:
7470; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7471; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7472; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
7473; X86-NEXT:    retl # encoding: [0xc3]
7474;
7475; X64-LABEL: test_int_x86_avx512_maskz_psllv4_si:
7476; X64:       # %bb.0:
7477; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7478; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
7479; X64-NEXT:    retq # encoding: [0xc3]
7480  %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
7481  ret <4 x i32> %res
7482}
7483
7484declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
7485
7486define <8 x i32>@test_int_x86_avx512_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
7487; CHECK-LABEL: test_int_x86_avx512_psllv8_si:
7488; CHECK:       # %bb.0:
7489; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
7490; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7491  %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
7492  ret <8 x i32> %res
7493}
7494
7495define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
7496; X86-LABEL: test_int_x86_avx512_mask_psllv8_si:
7497; X86:       # %bb.0:
7498; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7499; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7500; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
7501; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7502; X86-NEXT:    retl # encoding: [0xc3]
7503;
7504; X64-LABEL: test_int_x86_avx512_mask_psllv8_si:
7505; X64:       # %bb.0:
7506; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7507; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
7508; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
7509; X64-NEXT:    retq # encoding: [0xc3]
7510  %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
7511  ret <8 x i32> %res
7512}
7513
7514define <8 x i32>@test_int_x86_avx512_maskz_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
7515; X86-LABEL: test_int_x86_avx512_maskz_psllv8_si:
7516; X86:       # %bb.0:
7517; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7518; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7519; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
7520; X86-NEXT:    retl # encoding: [0xc3]
7521;
7522; X64-LABEL: test_int_x86_avx512_maskz_psllv8_si:
7523; X64:       # %bb.0:
7524; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7525; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
7526; X64-NEXT:    retq # encoding: [0xc3]
7527  %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
7528  ret <8 x i32> %res
7529}
7530
7531declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
7532
7533define <4 x i32>@test_int_x86_avx512_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1) {
7534; CHECK-LABEL: test_int_x86_avx512_pmovzxb_d_128:
7535; CHECK:       # %bb.0:
7536; CHECK-NEXT:    vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
7537; CHECK-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
7538; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7539  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
7540  ret <4 x i32> %res
7541}
7542
7543define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
7544; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
7545; X86:       # %bb.0:
7546; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7547; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7548; X86-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
7549; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
7550; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7551; X86-NEXT:    retl # encoding: [0xc3]
7552;
7553; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
7554; X64:       # %bb.0:
7555; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7556; X64-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
7557; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
7558; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7559; X64-NEXT:    retq # encoding: [0xc3]
7560  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
7561  ret <4 x i32> %res
7562}
7563
7564define <4 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_128(<16 x i8> %x0, i8 %x2) {
7565; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_128:
7566; X86:       # %bb.0:
7567; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7568; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7569; X86-NEXT:    vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
7570; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
7571; X86-NEXT:    retl # encoding: [0xc3]
7572;
7573; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_128:
7574; X64:       # %bb.0:
7575; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7576; X64-NEXT:    vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
7577; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
7578; X64-NEXT:    retq # encoding: [0xc3]
7579  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
7580  ret <4 x i32> %res
7581}
7582
7583declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
7584
7585define <8 x i32>@test_int_x86_avx512_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1) {
7586; CHECK-LABEL: test_int_x86_avx512_pmovzxb_d_256:
7587; CHECK:       # %bb.0:
7588; CHECK-NEXT:    vpmovzxbd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xc0]
7589; CHECK-NEXT:    # ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7590; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7591  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
7592  ret <8 x i32> %res
7593}
7594
7595define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
7596; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
7597; X86:       # %bb.0:
7598; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7599; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7600; X86-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
7601; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7602; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7603; X86-NEXT:    retl # encoding: [0xc3]
7604;
7605; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
7606; X64:       # %bb.0:
7607; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7608; X64-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
7609; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7610; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7611; X64-NEXT:    retq # encoding: [0xc3]
7612  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
7613  ret <8 x i32> %res
7614}
7615
7616define <8 x i32>@test_int_x86_avx512_maskz_pmovzxb_d_256(<16 x i8> %x0, i8 %x2) {
7617; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_256:
7618; X86:       # %bb.0:
7619; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7620; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7621; X86-NEXT:    vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
7622; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7623; X86-NEXT:    retl # encoding: [0xc3]
7624;
7625; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_d_256:
7626; X64:       # %bb.0:
7627; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7628; X64-NEXT:    vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
7629; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
7630; X64-NEXT:    retq # encoding: [0xc3]
7631  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
7632  ret <8 x i32> %res
7633}
7634
7635declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
7636
7637define <2 x i64>@test_int_x86_avx512_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1) {
7638; CHECK-LABEL: test_int_x86_avx512_pmovzxb_q_128:
7639; CHECK:       # %bb.0:
7640; CHECK-NEXT:    vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
7641; CHECK-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
7642; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7643  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
7644  ret <2 x i64> %res
7645}
7646
7647define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
7648; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
7649; X86:       # %bb.0:
7650; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7651; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7652; X86-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
7653; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
7654; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7655; X86-NEXT:    retl # encoding: [0xc3]
7656;
7657; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
7658; X64:       # %bb.0:
7659; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7660; X64-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
7661; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
7662; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7663; X64-NEXT:    retq # encoding: [0xc3]
7664  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
7665  ret <2 x i64> %res
7666}
7667
7668define <2 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_128(<16 x i8> %x0, i8 %x2) {
7669; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_128:
7670; X86:       # %bb.0:
7671; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7672; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7673; X86-NEXT:    vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
7674; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
7675; X86-NEXT:    retl # encoding: [0xc3]
7676;
7677; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_128:
7678; X64:       # %bb.0:
7679; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7680; X64-NEXT:    vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
7681; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
7682; X64-NEXT:    retq # encoding: [0xc3]
7683  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
7684  ret <2 x i64> %res
7685}
7686
7687declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
7688
7689define <4 x i64>@test_int_x86_avx512_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1) {
7690; CHECK-LABEL: test_int_x86_avx512_pmovzxb_q_256:
7691; CHECK:       # %bb.0:
7692; CHECK-NEXT:    vpmovzxbq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xc0]
7693; CHECK-NEXT:    # ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
7694; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7695  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
7696  ret <4 x i64> %res
7697}
7698
7699define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
7700; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
7701; X86:       # %bb.0:
7702; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7703; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7704; X86-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
7705; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
7706; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7707; X86-NEXT:    retl # encoding: [0xc3]
7708;
7709; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
7710; X64:       # %bb.0:
7711; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7712; X64-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
7713; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
7714; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7715; X64-NEXT:    retq # encoding: [0xc3]
7716  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
7717  ret <4 x i64> %res
7718}
7719
7720define <4 x i64>@test_int_x86_avx512_maskz_pmovzxb_q_256(<16 x i8> %x0, i8 %x2) {
7721; X86-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_256:
7722; X86:       # %bb.0:
7723; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7724; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7725; X86-NEXT:    vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
7726; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
7727; X86-NEXT:    retl # encoding: [0xc3]
7728;
7729; X64-LABEL: test_int_x86_avx512_maskz_pmovzxb_q_256:
7730; X64:       # %bb.0:
7731; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7732; X64-NEXT:    vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
7733; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
7734; X64-NEXT:    retq # encoding: [0xc3]
7735  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
7736  ret <4 x i64> %res
7737}
7738
7739declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
7740
7741define <2 x i64>@test_int_x86_avx512_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1) {
7742; CHECK-LABEL: test_int_x86_avx512_pmovzxd_q_128:
7743; CHECK:       # %bb.0:
7744; CHECK-NEXT:    vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
7745; CHECK-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
7746; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7747  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
7748  ret <2 x i64> %res
7749}
7750
7751define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
7752; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
7753; X86:       # %bb.0:
7754; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7755; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7756; X86-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
7757; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
7758; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7759; X86-NEXT:    retl # encoding: [0xc3]
7760;
7761; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
7762; X64:       # %bb.0:
7763; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7764; X64-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
7765; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
7766; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7767; X64-NEXT:    retq # encoding: [0xc3]
7768  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
7769  ret <2 x i64> %res
7770}
7771
7772define <2 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_128(<4 x i32> %x0, i8 %x2) {
7773; X86-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_128:
7774; X86:       # %bb.0:
7775; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7776; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7777; X86-NEXT:    vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
7778; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
7779; X86-NEXT:    retl # encoding: [0xc3]
7780;
7781; X64-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_128:
7782; X64:       # %bb.0:
7783; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7784; X64-NEXT:    vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
7785; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
7786; X64-NEXT:    retq # encoding: [0xc3]
7787  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
7788  ret <2 x i64> %res
7789}
7790
7791declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
7792
7793define <4 x i64>@test_int_x86_avx512_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1) {
7794; CHECK-LABEL: test_int_x86_avx512_pmovzxd_q_256:
7795; CHECK:       # %bb.0:
7796; CHECK-NEXT:    vpmovzxdq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xc0]
7797; CHECK-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7798; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7799  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
7800  ret <4 x i64> %res
7801}
7802
7803define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
7804; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
7805; X86:       # %bb.0:
7806; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7807; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7808; X86-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
7809; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7810; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7811; X86-NEXT:    retl # encoding: [0xc3]
7812;
7813; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
7814; X64:       # %bb.0:
7815; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7816; X64-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
7817; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7818; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7819; X64-NEXT:    retq # encoding: [0xc3]
7820  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
7821  ret <4 x i64> %res
7822}
7823
7824define <4 x i64>@test_int_x86_avx512_maskz_pmovzxd_q_256(<4 x i32> %x0, i8 %x2) {
7825; X86-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_256:
7826; X86:       # %bb.0:
7827; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7828; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7829; X86-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
7830; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7831; X86-NEXT:    retl # encoding: [0xc3]
7832;
7833; X64-LABEL: test_int_x86_avx512_maskz_pmovzxd_q_256:
7834; X64:       # %bb.0:
7835; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7836; X64-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
7837; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7838; X64-NEXT:    retq # encoding: [0xc3]
7839  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
7840  ret <4 x i64> %res
7841}
7842
7843declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
7844
7845define <4 x i32>@test_int_x86_avx512_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1) {
7846; CHECK-LABEL: test_int_x86_avx512_pmovzxw_d_128:
7847; CHECK:       # %bb.0:
7848; CHECK-NEXT:    vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
7849; CHECK-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7850; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7851  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
7852  ret <4 x i32> %res
7853}
7854
7855
7856define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
7857; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
7858; X86:       # %bb.0:
7859; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7860; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7861; X86-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
7862; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7863; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7864; X86-NEXT:    retl # encoding: [0xc3]
7865;
7866; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
7867; X64:       # %bb.0:
7868; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7869; X64-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
7870; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7871; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7872; X64-NEXT:    retq # encoding: [0xc3]
7873  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
7874  ret <4 x i32> %res
7875}
7876
7877
7878define <4 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_128(<8 x i16> %x0, i8 %x2) {
7879; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_128:
7880; X86:       # %bb.0:
7881; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7882; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7883; X86-NEXT:    vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
7884; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7885; X86-NEXT:    retl # encoding: [0xc3]
7886;
7887; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_128:
7888; X64:       # %bb.0:
7889; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7890; X64-NEXT:    vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
7891; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
7892; X64-NEXT:    retq # encoding: [0xc3]
7893  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
7894  ret <4 x i32> %res
7895}
7896
7897declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
7898
7899define <8 x i32>@test_int_x86_avx512_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1) {
7900; CHECK-LABEL: test_int_x86_avx512_pmovzxw_d_256:
7901; CHECK:       # %bb.0:
7902; CHECK-NEXT:    vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0]
7903; CHECK-NEXT:    # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
7904; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7905  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
7906  ret <8 x i32> %res
7907}
7908
7909define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
7910; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
7911; X86:       # %bb.0:
7912; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7913; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7914; X86-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
7915; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
7916; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7917; X86-NEXT:    retl # encoding: [0xc3]
7918;
7919; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
7920; X64:       # %bb.0:
7921; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7922; X64-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
7923; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
7924; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
7925; X64-NEXT:    retq # encoding: [0xc3]
7926  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
7927  ret <8 x i32> %res
7928}
7929
7930define <8 x i32>@test_int_x86_avx512_maskz_pmovzxw_d_256(<8 x i16> %x0, i8 %x2) {
7931; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_256:
7932; X86:       # %bb.0:
7933; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7934; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7935; X86-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
7936; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
7937; X86-NEXT:    retl # encoding: [0xc3]
7938;
7939; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_d_256:
7940; X64:       # %bb.0:
7941; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7942; X64-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
7943; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
7944; X64-NEXT:    retq # encoding: [0xc3]
7945  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
7946  ret <8 x i32> %res
7947}
7948
7949declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
7950
7951define <2 x i64>@test_int_x86_avx512_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1) {
7952; CHECK-LABEL: test_int_x86_avx512_pmovzxw_q_128:
7953; CHECK:       # %bb.0:
7954; CHECK-NEXT:    vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
7955; CHECK-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
7956; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
7957  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
7958  ret <2 x i64> %res
7959}
7960
7961define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
7962; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
7963; X86:       # %bb.0:
7964; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7965; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7966; X86-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
7967; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
7968; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7969; X86-NEXT:    retl # encoding: [0xc3]
7970;
7971; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
7972; X64:       # %bb.0:
7973; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7974; X64-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
7975; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
7976; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
7977; X64-NEXT:    retq # encoding: [0xc3]
7978  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
7979  ret <2 x i64> %res
7980}
7981
7982define <2 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_128(<8 x i16> %x0, i8 %x2) {
7983; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_128:
7984; X86:       # %bb.0:
7985; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
7986; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
7987; X86-NEXT:    vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
7988; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
7989; X86-NEXT:    retl # encoding: [0xc3]
7990;
7991; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_128:
7992; X64:       # %bb.0:
7993; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7994; X64-NEXT:    vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
7995; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
7996; X64-NEXT:    retq # encoding: [0xc3]
7997  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
7998  ret <2 x i64> %res
7999}
8000
8001declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
8002
8003define <4 x i64>@test_int_x86_avx512_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1) {
8004; CHECK-LABEL: test_int_x86_avx512_pmovzxw_q_256:
8005; CHECK:       # %bb.0:
8006; CHECK-NEXT:    vpmovzxwq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xc0]
8007; CHECK-NEXT:    # ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
8008; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8009  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
8010  ret <4 x i64> %res
8011}
8012
8013define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
8014; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
8015; X86:       # %bb.0:
8016; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8017; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8018; X86-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
8019; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
8020; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8021; X86-NEXT:    retl # encoding: [0xc3]
8022;
8023; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
8024; X64:       # %bb.0:
8025; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8026; X64-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
8027; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
8028; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8029; X64-NEXT:    retq # encoding: [0xc3]
8030  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
8031  ret <4 x i64> %res
8032}
8033
8034define <4 x i64>@test_int_x86_avx512_maskz_pmovzxw_q_256(<8 x i16> %x0, i8 %x2) {
8035; X86-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_256:
8036; X86:       # %bb.0:
8037; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8038; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8039; X86-NEXT:    vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
8040; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
8041; X86-NEXT:    retl # encoding: [0xc3]
8042;
8043; X64-LABEL: test_int_x86_avx512_maskz_pmovzxw_q_256:
8044; X64:       # %bb.0:
8045; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8046; X64-NEXT:    vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
8047; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
8048; X64-NEXT:    retq # encoding: [0xc3]
8049  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
8050  ret <4 x i64> %res
8051}
8052
8053declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
8054
8055define <4 x i32>@test_int_x86_avx512_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1) {
8056; CHECK-LABEL: test_int_x86_avx512_pmovsxb_d_128:
8057; CHECK:       # %bb.0:
8058; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
8059; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8060  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
8061  ret <4 x i32> %res
8062}
8063
8064define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
8065; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
8066; X86:       # %bb.0:
8067; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8068; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8069; X86-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
8070; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8071; X86-NEXT:    retl # encoding: [0xc3]
8072;
8073; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
8074; X64:       # %bb.0:
8075; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8076; X64-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
8077; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8078; X64-NEXT:    retq # encoding: [0xc3]
8079  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
8080  ret <4 x i32> %res
8081}
8082
8083define <4 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_128(<16 x i8> %x0, i8 %x2) {
8084; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_128:
8085; X86:       # %bb.0:
8086; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8087; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8088; X86-NEXT:    vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
8089; X86-NEXT:    retl # encoding: [0xc3]
8090;
8091; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_128:
8092; X64:       # %bb.0:
8093; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8094; X64-NEXT:    vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
8095; X64-NEXT:    retq # encoding: [0xc3]
8096  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
8097  ret <4 x i32> %res
8098}
8099
8100declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
8101
8102define <8 x i32>@test_int_x86_avx512_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1) {
8103; CHECK-LABEL: test_int_x86_avx512_pmovsxb_d_256:
8104; CHECK:       # %bb.0:
8105; CHECK-NEXT:    vpmovsxbd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xc0]
8106; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8107  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
8108  ret <8 x i32> %res
8109}
8110
8111define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
8112; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
8113; X86:       # %bb.0:
8114; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8115; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8116; X86-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
8117; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8118; X86-NEXT:    retl # encoding: [0xc3]
8119;
8120; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
8121; X64:       # %bb.0:
8122; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8123; X64-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
8124; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8125; X64-NEXT:    retq # encoding: [0xc3]
8126  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
8127  ret <8 x i32> %res
8128}
8129
8130define <8 x i32>@test_int_x86_avx512_maskz_pmovsxb_d_256(<16 x i8> %x0, i8 %x2) {
8131; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_256:
8132; X86:       # %bb.0:
8133; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8134; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8135; X86-NEXT:    vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
8136; X86-NEXT:    retl # encoding: [0xc3]
8137;
8138; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_d_256:
8139; X64:       # %bb.0:
8140; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8141; X64-NEXT:    vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
8142; X64-NEXT:    retq # encoding: [0xc3]
8143  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
8144  ret <8 x i32> %res
8145}
8146
8147declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
8148
8149define <2 x i64>@test_int_x86_avx512_ask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1) {
8150; CHECK-LABEL: test_int_x86_avx512_ask_pmovsxb_q_128:
8151; CHECK:       # %bb.0:
8152; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
8153; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8154  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
8155  ret <2 x i64> %res
8156}
8157
8158define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
8159; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
8160; X86:       # %bb.0:
8161; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8162; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8163; X86-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
8164; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8165; X86-NEXT:    retl # encoding: [0xc3]
8166;
8167; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
8168; X64:       # %bb.0:
8169; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8170; X64-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
8171; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8172; X64-NEXT:    retq # encoding: [0xc3]
8173  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
8174  ret <2 x i64> %res
8175}
8176
8177define <2 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_128(<16 x i8> %x0, i8 %x2) {
8178; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_128:
8179; X86:       # %bb.0:
8180; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8181; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8182; X86-NEXT:    vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
8183; X86-NEXT:    retl # encoding: [0xc3]
8184;
8185; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_128:
8186; X64:       # %bb.0:
8187; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8188; X64-NEXT:    vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
8189; X64-NEXT:    retq # encoding: [0xc3]
8190  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
8191  ret <2 x i64> %res
8192}
8193
8194declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
8195
8196define <4 x i64>@test_int_x86_avx512_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1) {
8197; CHECK-LABEL: test_int_x86_avx512_pmovsxb_q_256:
8198; CHECK:       # %bb.0:
8199; CHECK-NEXT:    vpmovsxbq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xc0]
8200; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8201  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
8202  ret <4 x i64> %res
8203}
8204
8205define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
8206; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
8207; X86:       # %bb.0:
8208; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8209; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8210; X86-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
8211; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8212; X86-NEXT:    retl # encoding: [0xc3]
8213;
8214; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
8215; X64:       # %bb.0:
8216; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8217; X64-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
8218; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8219; X64-NEXT:    retq # encoding: [0xc3]
8220  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
8221  ret <4 x i64> %res
8222}
8223
8224define <4 x i64>@test_int_x86_avx512_maskz_pmovsxb_q_256(<16 x i8> %x0, i8 %x2) {
8225; X86-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_256:
8226; X86:       # %bb.0:
8227; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8228; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8229; X86-NEXT:    vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
8230; X86-NEXT:    retl # encoding: [0xc3]
8231;
8232; X64-LABEL: test_int_x86_avx512_maskz_pmovsxb_q_256:
8233; X64:       # %bb.0:
8234; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8235; X64-NEXT:    vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
8236; X64-NEXT:    retq # encoding: [0xc3]
8237  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
8238  ret <4 x i64> %res
8239}
8240
8241declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
8242
8243define <4 x i32>@test_int_x86_avx512_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1) {
8244; CHECK-LABEL: test_int_x86_avx512_pmovsxw_d_128:
8245; CHECK:       # %bb.0:
8246; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
8247; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8248  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
8249  ret <4 x i32> %res
8250}
8251
8252define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
8253; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
8254; X86:       # %bb.0:
8255; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8256; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8257; X86-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
8258; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8259; X86-NEXT:    retl # encoding: [0xc3]
8260;
8261; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
8262; X64:       # %bb.0:
8263; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8264; X64-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
8265; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8266; X64-NEXT:    retq # encoding: [0xc3]
8267  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
8268  ret <4 x i32> %res
8269}
8270
8271define <4 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_128(<8 x i16> %x0, i8 %x2) {
8272; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_128:
8273; X86:       # %bb.0:
8274; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8275; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8276; X86-NEXT:    vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
8277; X86-NEXT:    retl # encoding: [0xc3]
8278;
8279; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_128:
8280; X64:       # %bb.0:
8281; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8282; X64-NEXT:    vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
8283; X64-NEXT:    retq # encoding: [0xc3]
8284  %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
8285  ret <4 x i32> %res
8286}
8287
8288declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
8289
8290define <8 x i32>@test_int_x86_avx512_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1) {
8291; CHECK-LABEL: test_int_x86_avx512_pmovsxw_d_256:
8292; CHECK:       # %bb.0:
8293; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xc0]
8294; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8295  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
8296  ret <8 x i32> %res
8297}
8298
8299define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
8300; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
8301; X86:       # %bb.0:
8302; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8303; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8304; X86-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
8305; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8306; X86-NEXT:    retl # encoding: [0xc3]
8307;
8308; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
8309; X64:       # %bb.0:
8310; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8311; X64-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
8312; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8313; X64-NEXT:    retq # encoding: [0xc3]
8314  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
8315  ret <8 x i32> %res
8316}
8317
8318define <8 x i32>@test_int_x86_avx512_maskz_pmovsxw_d_256(<8 x i16> %x0, i8 %x2) {
8319; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_256:
8320; X86:       # %bb.0:
8321; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8322; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8323; X86-NEXT:    vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
8324; X86-NEXT:    retl # encoding: [0xc3]
8325;
8326; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_d_256:
8327; X64:       # %bb.0:
8328; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8329; X64-NEXT:    vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
8330; X64-NEXT:    retq # encoding: [0xc3]
8331  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
8332  ret <8 x i32> %res
8333}
8334
8335declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
8336
8337define <2 x i64>@test_int_x86_avx512_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1) {
8338; CHECK-LABEL: test_int_x86_avx512_pmovsxw_q_128:
8339; CHECK:       # %bb.0:
8340; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
8341; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8342  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
8343  ret <2 x i64> %res
8344}
8345
8346define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
8347; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
8348; X86:       # %bb.0:
8349; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8350; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8351; X86-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
8352; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8353; X86-NEXT:    retl # encoding: [0xc3]
8354;
8355; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
8356; X64:       # %bb.0:
8357; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8358; X64-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
8359; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
8360; X64-NEXT:    retq # encoding: [0xc3]
8361  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
8362  ret <2 x i64> %res
8363}
8364
8365define <2 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_128(<8 x i16> %x0, i8 %x2) {
8366; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_128:
8367; X86:       # %bb.0:
8368; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8369; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8370; X86-NEXT:    vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
8371; X86-NEXT:    retl # encoding: [0xc3]
8372;
8373; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_128:
8374; X64:       # %bb.0:
8375; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8376; X64-NEXT:    vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
8377; X64-NEXT:    retq # encoding: [0xc3]
8378  %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
8379  ret <2 x i64> %res
8380}
8381
8382declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
8383
8384define <4 x i64>@test_int_x86_avx512_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1) {
8385; CHECK-LABEL: test_int_x86_avx512_pmovsxw_q_256:
8386; CHECK:       # %bb.0:
8387; CHECK-NEXT:    vpmovsxwq %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xc0]
8388; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8389  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
8390  ret <4 x i64> %res
8391}
8392
8393define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
8394; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
8395; X86:       # %bb.0:
8396; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8397; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8398; X86-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
8399; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8400; X86-NEXT:    retl # encoding: [0xc3]
8401;
8402; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
8403; X64:       # %bb.0:
8404; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8405; X64-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
8406; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
8407; X64-NEXT:    retq # encoding: [0xc3]
8408  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
8409  ret <4 x i64> %res
8410}
8411
8412define <4 x i64>@test_int_x86_avx512_maskz_pmovsxw_q_256(<8 x i16> %x0, i8 %x2) {
8413; X86-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_256:
8414; X86:       # %bb.0:
8415; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8416; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8417; X86-NEXT:    vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
8418; X86-NEXT:    retl # encoding: [0xc3]
8419;
8420; X64-LABEL: test_int_x86_avx512_maskz_pmovsxw_q_256:
8421; X64:       # %bb.0:
8422; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8423; X64-NEXT:    vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
8424; X64-NEXT:    retq # encoding: [0xc3]
8425  %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
8426  ret <4 x i64> %res
8427}
8428
8429declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
8430
8431define <2 x i64>@test_int_x86_avx512_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
8432; CHECK-LABEL: test_int_x86_avx512_psra_q_128:
8433; CHECK:       # %bb.0:
8434; CHECK-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xc1]
8435; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8436  %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
8437  ret <2 x i64> %res
8438}
8439
8440define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
8441; X86-LABEL: test_int_x86_avx512_mask_psra_q_128:
8442; X86:       # %bb.0:
8443; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8444; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8445; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
8446; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8447; X86-NEXT:    retl # encoding: [0xc3]
8448;
8449; X64-LABEL: test_int_x86_avx512_mask_psra_q_128:
8450; X64:       # %bb.0:
8451; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8452; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
8453; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8454; X64-NEXT:    retq # encoding: [0xc3]
8455  %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
8456  ret <2 x i64> %res
8457}
8458
8459define <2 x i64>@test_int_x86_avx512_maskz_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
8460; X86-LABEL: test_int_x86_avx512_maskz_psra_q_128:
8461; X86:       # %bb.0:
8462; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8463; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8464; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
8465; X86-NEXT:    retl # encoding: [0xc3]
8466;
8467; X64-LABEL: test_int_x86_avx512_maskz_psra_q_128:
8468; X64:       # %bb.0:
8469; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8470; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
8471; X64-NEXT:    retq # encoding: [0xc3]
8472  %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
8473  ret <2 x i64> %res
8474}
8475
8476declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
8477
8478define <4 x i64>@test_int_x86_avx512_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2) {
8479; CHECK-LABEL: test_int_x86_avx512_psra_q_256:
8480; CHECK:       # %bb.0:
8481; CHECK-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xc1]
8482; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8483  %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
8484  ret <4 x i64> %res
8485}
8486
8487define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
8488; X86-LABEL: test_int_x86_avx512_mask_psra_q_256:
8489; X86:       # %bb.0:
8490; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8491; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8492; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
8493; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8494; X86-NEXT:    retl # encoding: [0xc3]
8495;
8496; X64-LABEL: test_int_x86_avx512_mask_psra_q_256:
8497; X64:       # %bb.0:
8498; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8499; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
8500; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8501; X64-NEXT:    retq # encoding: [0xc3]
8502  %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
8503  ret <4 x i64> %res
8504}
8505
8506define <4 x i64>@test_int_x86_avx512_maskz_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, i8 %x3) {
8507; X86-LABEL: test_int_x86_avx512_maskz_psra_q_256:
8508; X86:       # %bb.0:
8509; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8510; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8511; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
8512; X86-NEXT:    retl # encoding: [0xc3]
8513;
8514; X64-LABEL: test_int_x86_avx512_maskz_psra_q_256:
8515; X64:       # %bb.0:
8516; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8517; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
8518; X64-NEXT:    retq # encoding: [0xc3]
8519  %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
8520  ret <4 x i64> %res
8521}
8522
8523declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8)
8524
8525define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
8526; X86-LABEL: test_int_x86_avx512_mask_psra_qi_128:
8527; X86:       # %bb.0:
8528; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
8529; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8530; X86-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
8531; X86-NEXT:    vpsraq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x04]
8532; X86-NEXT:    vpsraq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x05]
8533; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
8534; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
8535; X86-NEXT:    retl # encoding: [0xc3]
8536;
8537; X64-LABEL: test_int_x86_avx512_mask_psra_qi_128:
8538; X64:       # %bb.0:
8539; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
8540; X64-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
8541; X64-NEXT:    vpsraq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xe0,0x04]
8542; X64-NEXT:    vpsraq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xe0,0x05]
8543; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
8544; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
8545; X64-NEXT:    retq # encoding: [0xc3]
8546  %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
8547  %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 4, <2 x i64> zeroinitializer, i8 %x3)
8548  %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 5, <2 x i64> %x2, i8 -1)
8549  %res3 = add <2 x i64> %res, %res1
8550  %res4 = add <2 x i64> %res3, %res2
8551  ret <2 x i64> %res4
8552}
8553
8554declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8)
8555
8556define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
8557; X86-LABEL: test_int_x86_avx512_mask_psra_qi_256:
8558; X86:       # %bb.0:
8559; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
8560; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8561; X86-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
8562; X86-NEXT:    vpsraq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x04]
8563; X86-NEXT:    vpsraq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x05]
8564; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
8565; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
8566; X86-NEXT:    retl # encoding: [0xc3]
8567;
8568; X64-LABEL: test_int_x86_avx512_mask_psra_qi_256:
8569; X64:       # %bb.0:
8570; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
8571; X64-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
8572; X64-NEXT:    vpsraq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xe0,0x04]
8573; X64-NEXT:    vpsraq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xe0,0x05]
8574; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
8575; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
8576; X64-NEXT:    retq # encoding: [0xc3]
8577  %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
8578  %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 4, <4 x i64> zeroinitializer, i8 %x3)
8579  %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 5, <4 x i64> %x2, i8 -1)
8580  %res3 = add <4 x i64> %res, %res1
8581  %res4 = add <4 x i64> %res3, %res2
8582  ret <4 x i64> %res4
8583}
8584
8585declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
8586
8587define <2 x i64>@test_int_x86_avx512_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
8588; CHECK-LABEL: test_int_x86_avx512_psrav_q_128:
8589; CHECK:       # %bb.0:
8590; CHECK-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xc1]
8591; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8592  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
8593  ret <2 x i64> %res
8594}
8595
8596define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
8597; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128:
8598; X86:       # %bb.0:
8599; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8600; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8601; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
8602; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8603; X86-NEXT:    retl # encoding: [0xc3]
8604;
8605; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128:
8606; X64:       # %bb.0:
8607; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8608; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
8609; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8610; X64-NEXT:    retq # encoding: [0xc3]
8611  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
8612  ret <2 x i64> %res
8613}
8614
8615define <2 x i64>@test_int_x86_avx512_maskz_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
8616; X86-LABEL: test_int_x86_avx512_maskz_psrav_q_128:
8617; X86:       # %bb.0:
8618; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8619; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8620; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
8621; X86-NEXT:    retl # encoding: [0xc3]
8622;
8623; X64-LABEL: test_int_x86_avx512_maskz_psrav_q_128:
8624; X64:       # %bb.0:
8625; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8626; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
8627; X64-NEXT:    retq # encoding: [0xc3]
8628  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
8629  ret <2 x i64> %res
8630}
8631
8632define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
8633; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
8634; X86:       # %bb.0:
8635; X86-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,0,4294967287,4294967295]
8636; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
8637; X86-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
8638; X86-NEXT:    vpsravq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
8639; X86-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4
8640; X86-NEXT:    retl # encoding: [0xc3]
8641;
8642; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
8643; X64:       # %bb.0:
8644; X64-NEXT:    vmovdqa {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,18446744073709551607]
8645; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
8646; X64-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
8647; X64-NEXT:    vpsravq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
8648; X64-NEXT:    # fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
8649; X64-NEXT:    retq # encoding: [0xc3]
8650  %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
8651  ret <2 x i64> %res
8652}
8653
8654declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
8655
8656define <4 x i64>@test_int_x86_avx512_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
8657; CHECK-LABEL: test_int_x86_avx512_psrav_q_256:
8658; CHECK:       # %bb.0:
8659; CHECK-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xc1]
8660; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8661  %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
8662  ret <4 x i64> %res
8663}
8664
8665define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
8666; X86-LABEL: test_int_x86_avx512_mask_psrav_q_256:
8667; X86:       # %bb.0:
8668; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8669; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8670; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
8671; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8672; X86-NEXT:    retl # encoding: [0xc3]
8673;
8674; X64-LABEL: test_int_x86_avx512_mask_psrav_q_256:
8675; X64:       # %bb.0:
8676; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8677; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
8678; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8679; X64-NEXT:    retq # encoding: [0xc3]
8680  %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
8681  ret <4 x i64> %res
8682}
8683
8684define <4 x i64>@test_int_x86_avx512_maskz_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
8685; X86-LABEL: test_int_x86_avx512_maskz_psrav_q_256:
8686; X86:       # %bb.0:
8687; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8688; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8689; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
8690; X86-NEXT:    retl # encoding: [0xc3]
8691;
8692; X64-LABEL: test_int_x86_avx512_maskz_psrav_q_256:
8693; X64:       # %bb.0:
8694; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8695; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
8696; X64-NEXT:    retq # encoding: [0xc3]
8697  %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
8698  ret <4 x i64> %res
8699}
8700
8701declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
8702
8703define <2 x double>@test_int_x86_avx512_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1) {
8704; CHECK-LABEL: test_int_x86_avx512_cvt_dq2pd_128:
8705; CHECK:       # %bb.0:
8706; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
8707; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8708  %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
8709  ret <2 x double> %res
8710}
8711
8712define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
8713; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
8714; X86:       # %bb.0:
8715; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8716; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8717; X86-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
8718; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
8719; X86-NEXT:    retl # encoding: [0xc3]
8720;
8721; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
8722; X64:       # %bb.0:
8723; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8724; X64-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
8725; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
8726; X64-NEXT:    retq # encoding: [0xc3]
8727  %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
8728  ret <2 x double> %res
8729}
8730
8731declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
8732
8733define <4 x double>@test_int_x86_avx512_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1) {
8734; CHECK-LABEL: test_int_x86_avx512_cvt_dq2pd_256:
8735; CHECK:       # %bb.0:
8736; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0]
8737; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8738  %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
8739  ret <4 x double> %res
8740}
8741
8742define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
8743; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
8744; X86:       # %bb.0:
8745; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8746; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8747; X86-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
8748; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
8749; X86-NEXT:    retl # encoding: [0xc3]
8750;
8751; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
8752; X64:       # %bb.0:
8753; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8754; X64-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
8755; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
8756; X64-NEXT:    retq # encoding: [0xc3]
8757  %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
8758  ret <4 x double> %res
8759}
8760
8761declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
8762
8763define <2 x double>@test_int_x86_avx512_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1) {
8764; CHECK-LABEL: test_int_x86_avx512_cvt_udq2pd_128:
8765; CHECK:       # %bb.0:
8766; CHECK-NEXT:    vcvtudq2pd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xc0]
8767; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8768  %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
8769  ret <2 x double> %res
8770}
8771
8772define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
8773; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
8774; X86:       # %bb.0:
8775; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8776; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8777; X86-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
8778; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
8779; X86-NEXT:    retl # encoding: [0xc3]
8780;
8781; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
8782; X64:       # %bb.0:
8783; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8784; X64-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
8785; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
8786; X64-NEXT:    retq # encoding: [0xc3]
8787  %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
8788  ret <2 x double> %res
8789}
8790
8791declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
8792
8793define <4 x double>@test_int_x86_avx512_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1) {
8794; CHECK-LABEL: test_int_x86_avx512_cvt_udq2pd_256:
8795; CHECK:       # %bb.0:
8796; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xc0]
8797; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8798  %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
8799  ret <4 x double> %res
8800}
8801
8802define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
8803; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
8804; X86:       # %bb.0:
8805; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8806; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8807; X86-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
8808; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
8809; X86-NEXT:    retl # encoding: [0xc3]
8810;
8811; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
8812; X64:       # %bb.0:
8813; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8814; X64-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
8815; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
8816; X64-NEXT:    retq # encoding: [0xc3]
8817  %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
8818  ret <4 x double> %res
8819}
8820
8821declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
8822
8823define <4 x i32>@test_int_x86_avx512_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3) {
8824; CHECK-LABEL: test_int_x86_avx512_valign_d_128:
8825; CHECK:       # %bb.0:
8826; CHECK-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x08]
8827; CHECK-NEXT:    # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
8828; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8829  %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1)
8830  ret <4 x i32> %res
8831}
8832
8833define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
8834; X86-LABEL: test_int_x86_avx512_mask_valign_d_128:
8835; X86:       # %bb.0:
8836; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8837; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8838; X86-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
8839; X86-NEXT:    # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
8840; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8841; X86-NEXT:    retl # encoding: [0xc3]
8842;
8843; X64-LABEL: test_int_x86_avx512_mask_valign_d_128:
8844; X64:       # %bb.0:
8845; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8846; X64-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
8847; X64-NEXT:    # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
8848; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8849; X64-NEXT:    retq # encoding: [0xc3]
8850  %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4)
8851  ret <4 x i32> %res
8852}
8853
8854define <4 x i32>@test_int_x86_avx512_maskz_valign_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x4) {
8855; X86-LABEL: test_int_x86_avx512_maskz_valign_d_128:
8856; X86:       # %bb.0:
8857; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8858; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8859; X86-NEXT:    valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
8860; X86-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
8861; X86-NEXT:    retl # encoding: [0xc3]
8862;
8863; X64-LABEL: test_int_x86_avx512_maskz_valign_d_128:
8864; X64:       # %bb.0:
8865; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8866; X64-NEXT:    valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
8867; X64-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
8868; X64-NEXT:    retq # encoding: [0xc3]
8869  %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> zeroinitializer,i8 %x4)
8870  ret <4 x i32> %res
8871}
8872
8873declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
8874
8875define <8 x i32>@test_int_x86_avx512_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3) {
8876; CHECK-LABEL: test_int_x86_avx512_valign_d_256:
8877; CHECK:       # %bb.0:
8878; CHECK-NEXT:    valignq $3, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xc1,0x03]
8879; CHECK-NEXT:    # ymm0 = ymm1[3],ymm0[0,1,2]
8880; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8881  %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1)
8882  ret <8 x i32> %res
8883}
8884
8885define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
8886; X86-LABEL: test_int_x86_avx512_mask_valign_d_256:
8887; X86:       # %bb.0:
8888; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8889; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8890; X86-NEXT:    valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
8891; X86-NEXT:    # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
8892; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8893; X86-NEXT:    retl # encoding: [0xc3]
8894;
8895; X64-LABEL: test_int_x86_avx512_mask_valign_d_256:
8896; X64:       # %bb.0:
8897; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8898; X64-NEXT:    valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
8899; X64-NEXT:    # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
8900; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8901; X64-NEXT:    retq # encoding: [0xc3]
8902  %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4)
8903  ret <8 x i32> %res
8904}
8905
8906declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
8907
8908define <2 x i64>@test_int_x86_avx512_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3) {
8909; CHECK-LABEL: test_int_x86_avx512_valign_q_128:
8910; CHECK:       # %bb.0:
8911; CHECK-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xc1,0x08]
8912; CHECK-NEXT:    # xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
8913; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8914  %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1)
8915  ret <2 x i64> %res
8916}
8917
8918define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
8919; X86-LABEL: test_int_x86_avx512_mask_valign_q_128:
8920; X86:       # %bb.0:
8921; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8922; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8923; X86-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
8924; X86-NEXT:    # xmm2 {%k1} = xmm1[1],xmm0[0]
8925; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8926; X86-NEXT:    retl # encoding: [0xc3]
8927;
8928; X64-LABEL: test_int_x86_avx512_mask_valign_q_128:
8929; X64:       # %bb.0:
8930; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8931; X64-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
8932; X64-NEXT:    # xmm2 {%k1} = xmm1[1],xmm0[0]
8933; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
8934; X64-NEXT:    retq # encoding: [0xc3]
8935  %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4)
8936  ret <2 x i64> %res
8937}
8938
8939declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
8940
8941define <4 x i64>@test_int_x86_avx512_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3) {
8942; CHECK-LABEL: test_int_x86_avx512_valign_q_256:
8943; CHECK:       # %bb.0:
8944; CHECK-NEXT:    valignq $3, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xc1,0x03]
8945; CHECK-NEXT:    # ymm0 = ymm1[3],ymm0[0,1,2]
8946; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8947  %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1)
8948  ret <4 x i64> %res
8949}
8950
8951define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
8952; X86-LABEL: test_int_x86_avx512_mask_valign_q_256:
8953; X86:       # %bb.0:
8954; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8955; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8956; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
8957; X86-NEXT:    # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
8958; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8959; X86-NEXT:    retl # encoding: [0xc3]
8960;
8961; X64-LABEL: test_int_x86_avx512_mask_valign_q_256:
8962; X64:       # %bb.0:
8963; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8964; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
8965; X64-NEXT:    # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
8966; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
8967; X64-NEXT:    retq # encoding: [0xc3]
8968  %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4)
8969  ret <4 x i64> %res
8970}
8971
8972declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
8973
8974define <4 x double>@test_int_x86_avx512_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
8975; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_256:
8976; CHECK:       # %bb.0:
8977; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
8978; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
8979  %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
8980  ret <4 x double> %res
8981}
8982
8983define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
8984; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
8985; X86:       # %bb.0:
8986; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
8987; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
8988; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
8989; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
8990; X86-NEXT:    retl # encoding: [0xc3]
8991;
8992; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
8993; X64:       # %bb.0:
8994; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
8995; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
8996; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
8997; X64-NEXT:    retq # encoding: [0xc3]
8998  %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
8999  ret <4 x double> %res
9000}
9001
9002define <4 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, i8 %x3) {
9003; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_256:
9004; X86:       # %bb.0:
9005; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9006; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9007; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
9008; X86-NEXT:    retl # encoding: [0xc3]
9009;
9010; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_256:
9011; X64:       # %bb.0:
9012; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9013; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
9014; X64-NEXT:    retq # encoding: [0xc3]
9015  %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
9016  ret <4 x double> %res
9017}
9018
9019declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
9020
9021define <2 x double>@test_int_x86_avx512_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) {
9022; CHECK-LABEL: test_int_x86_avx512_vpermilvar_pd_128:
9023; CHECK:       # %bb.0:
9024; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
9025; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9026  %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
9027  ret <2 x double> %res
9028}
9029
9030define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
9031; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
9032; X86:       # %bb.0:
9033; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9034; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9035; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
9036; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
9037; X86-NEXT:    retl # encoding: [0xc3]
9038;
9039; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
9040; X64:       # %bb.0:
9041; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9042; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
9043; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
9044; X64-NEXT:    retq # encoding: [0xc3]
9045  %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
9046  ret <2 x double> %res
9047}
9048
9049define <2 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
9050; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_128:
9051; X86:       # %bb.0:
9052; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9053; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9054; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
9055; X86-NEXT:    retl # encoding: [0xc3]
9056;
9057; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_pd_128:
9058; X64:       # %bb.0:
9059; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9060; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
9061; X64-NEXT:    retq # encoding: [0xc3]
9062  %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
9063  ret <2 x double> %res
9064}
9065
9066declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
9067
9068define <8 x float>@test_int_x86_avx512_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) {
9069; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_256:
9070; CHECK:       # %bb.0:
9071; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
9072; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9073  %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
9074  ret <8 x float> %res
9075}
9076
9077define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
9078; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
9079; X86:       # %bb.0:
9080; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9081; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9082; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
9083; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9084; X86-NEXT:    retl # encoding: [0xc3]
9085;
9086; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
9087; X64:       # %bb.0:
9088; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9089; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
9090; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9091; X64-NEXT:    retq # encoding: [0xc3]
9092  %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
9093  ret <8 x float> %res
9094}
9095
9096define <8 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, i8 %x3) {
9097; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_256:
9098; X86:       # %bb.0:
9099; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9100; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9101; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
9102; X86-NEXT:    retl # encoding: [0xc3]
9103;
9104; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_256:
9105; X64:       # %bb.0:
9106; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9107; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
9108; X64-NEXT:    retq # encoding: [0xc3]
9109  %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
9110  ret <8 x float> %res
9111}
9112
9113declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
9114
9115define <4 x float>@test_int_x86_avx512_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) {
9116; CHECK-LABEL: test_int_x86_avx512_vpermilvar_ps_128:
9117; CHECK:       # %bb.0:
9118; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
9119; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9120  %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
9121  ret <4 x float> %res
9122}
9123
9124define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
9125; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
9126; X86:       # %bb.0:
9127; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9128; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9129; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
9130; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
9131; X86-NEXT:    retl # encoding: [0xc3]
9132;
9133; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
9134; X64:       # %bb.0:
9135; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9136; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
9137; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
9138; X64-NEXT:    retq # encoding: [0xc3]
9139  %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
9140  ret <4 x float> %res
9141}
9142
9143define <4 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, i8 %x3) {
9144; X86-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_128:
9145; X86:       # %bb.0:
9146; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9147; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9148; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
9149; X86-NEXT:    retl # encoding: [0xc3]
9150;
9151; X64-LABEL: test_int_x86_avx512_maskz_vpermilvar_ps_128:
9152; X64:       # %bb.0:
9153; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9154; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
9155; X64-NEXT:    retq # encoding: [0xc3]
9156  %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
9157  ret <4 x float> %res
9158}
9159
9160declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
9161
9162define <4 x float>@test_int_x86_avx512_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2) {
9163; CHECK-LABEL: test_int_x86_avx512_vextractf32x4_256:
9164; CHECK:       # %bb.0:
9165; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
9166; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9167; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9168  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
9169  ret <4 x float> %res
9170}
9171
9172define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
9173; X86-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
9174; X86:       # %bb.0:
9175; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9176; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9177; X86-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
9178; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
9179; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9180; X86-NEXT:    retl # encoding: [0xc3]
9181;
9182; X64-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
9183; X64:       # %bb.0:
9184; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9185; X64-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
9186; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
9187; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9188; X64-NEXT:    retq # encoding: [0xc3]
9189  %res  = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
9190  ret <4 x float> %res
9191}
9192
9193define <4 x float>@test_int_x86_avx512_maskz_vextractf32x4_256(<8 x float> %x0, i8 %x3) {
9194; X86-LABEL: test_int_x86_avx512_maskz_vextractf32x4_256:
9195; X86:       # %bb.0:
9196; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9197; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9198; X86-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
9199; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9200; X86-NEXT:    retl # encoding: [0xc3]
9201;
9202; X64-LABEL: test_int_x86_avx512_maskz_vextractf32x4_256:
9203; X64:       # %bb.0:
9204; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9205; X64-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
9206; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9207; X64-NEXT:    retq # encoding: [0xc3]
9208  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
9209  ret <4 x float> %res
9210}
9211
9212declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
9213
9214define <8 x float>@test_int_x86_avx512_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3) {
9215; CHECK-LABEL: test_int_x86_avx512_insertf32x4_256:
9216; CHECK:       # %bb.0:
9217; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
9218; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9219  %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
9220  ret <8 x float> %res
9221}
9222
9223define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
9224; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
9225; X86:       # %bb.0:
9226; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9227; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9228; X86-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
9229; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9230; X86-NEXT:    retl # encoding: [0xc3]
9231;
9232; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
9233; X64:       # %bb.0:
9234; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9235; X64-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
9236; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9237; X64-NEXT:    retq # encoding: [0xc3]
9238  %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
9239  ret <8 x float> %res
9240}
9241
9242define <8 x float>@test_int_x86_avx512_maskz_insertf32x4_256(<8 x float> %x0, <4 x float> %x1,  i8 %x4) {
9243; X86-LABEL: test_int_x86_avx512_maskz_insertf32x4_256:
9244; X86:       # %bb.0:
9245; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9246; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9247; X86-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
9248; X86-NEXT:    retl # encoding: [0xc3]
9249;
9250; X64-LABEL: test_int_x86_avx512_maskz_insertf32x4_256:
9251; X64:       # %bb.0:
9252; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9253; X64-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
9254; X64-NEXT:    retq # encoding: [0xc3]
9255  %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
9256  ret <8 x float> %res
9257}
9258
9259declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
9260
9261define <8 x i32>@test_int_x86_avx512_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3) {
9262; CHECK-LABEL: test_int_x86_avx512_inserti32x4_256:
9263; CHECK:       # %bb.0:
9264; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
9265; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9266  %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
9267  ret <8 x i32> %res
9268}
9269
9270define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
9271; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
9272; X86:       # %bb.0:
9273; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9274; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9275; X86-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
9276; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9277; X86-NEXT:    retl # encoding: [0xc3]
9278;
9279; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
9280; X64:       # %bb.0:
9281; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9282; X64-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
9283; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
9284; X64-NEXT:    retq # encoding: [0xc3]
9285  %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
9286  ret <8 x i32> %res
9287}
9288
9289define <8 x i32>@test_int_x86_avx512_maskz_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, i8 %x4) {
9290; X86-LABEL: test_int_x86_avx512_maskz_inserti32x4_256:
9291; X86:       # %bb.0:
9292; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9293; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9294; X86-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
9295; X86-NEXT:    retl # encoding: [0xc3]
9296;
9297; X64-LABEL: test_int_x86_avx512_maskz_inserti32x4_256:
9298; X64:       # %bb.0:
9299; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9300; X64-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
9301; X64-NEXT:    retq # encoding: [0xc3]
9302  %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
9303  ret <8 x i32> %res
9304}
9305
9306define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
9307; X86-LABEL: test_mm512_maskz_max_ps_256:
9308; X86:       # %bb.0:
9309; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9310; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9311; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
9312; X86-NEXT:    retl # encoding: [0xc3]
9313;
9314; X64-LABEL: test_mm512_maskz_max_ps_256:
9315; X64:       # %bb.0:
9316; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9317; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
9318; X64-NEXT:    retq # encoding: [0xc3]
9319  %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
9320  ret <8 x float> %res
9321}
9322
9323define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
9324; X86-LABEL: test_mm512_mask_max_ps_256:
9325; X86:       # %bb.0:
9326; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9327; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9328; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
9329; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9330; X86-NEXT:    retl # encoding: [0xc3]
9331;
9332; X64-LABEL: test_mm512_mask_max_ps_256:
9333; X64:       # %bb.0:
9334; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9335; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
9336; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9337; X64-NEXT:    retq # encoding: [0xc3]
9338  %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
9339  ret <8 x float> %res
9340}
9341
9342define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
9343; CHECK-LABEL: test_mm512_max_ps_256:
9344; CHECK:       # %bb.0:
9345; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
9346; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9347  %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
9348  ret <8 x float> %res
9349}
9350declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
9351
9352define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
9353; X86-LABEL: test_mm512_maskz_max_ps_128:
9354; X86:       # %bb.0:
9355; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9356; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9357; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
9358; X86-NEXT:    retl # encoding: [0xc3]
9359;
9360; X64-LABEL: test_mm512_maskz_max_ps_128:
9361; X64:       # %bb.0:
9362; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9363; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
9364; X64-NEXT:    retq # encoding: [0xc3]
9365  %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
9366  ret <4 x float> %res
9367}
9368
9369define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
9370; X86-LABEL: test_mm512_mask_max_ps_128:
9371; X86:       # %bb.0:
9372; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9373; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9374; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
9375; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
9376; X86-NEXT:    retl # encoding: [0xc3]
9377;
9378; X64-LABEL: test_mm512_mask_max_ps_128:
9379; X64:       # %bb.0:
9380; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9381; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
9382; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
9383; X64-NEXT:    retq # encoding: [0xc3]
9384  %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
9385  ret <4 x float> %res
9386}
9387
9388define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
9389; CHECK-LABEL: test_mm512_max_ps_128:
9390; CHECK:       # %bb.0:
9391; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
9392; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9393  %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
9394  ret <4 x float> %res
9395}
9396declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
9397
9398define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
9399; X86-LABEL: test_mm512_maskz_min_ps_256:
9400; X86:       # %bb.0:
9401; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9402; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9403; X86-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
9404; X86-NEXT:    retl # encoding: [0xc3]
9405;
9406; X64-LABEL: test_mm512_maskz_min_ps_256:
9407; X64:       # %bb.0:
9408; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9409; X64-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
9410; X64-NEXT:    retq # encoding: [0xc3]
9411  %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
9412  ret <8 x float> %res
9413}
9414
9415define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
9416; X86-LABEL: test_mm512_mask_min_ps_256:
9417; X86:       # %bb.0:
9418; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9419; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9420; X86-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
9421; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9422; X86-NEXT:    retl # encoding: [0xc3]
9423;
9424; X64-LABEL: test_mm512_mask_min_ps_256:
9425; X64:       # %bb.0:
9426; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9427; X64-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
9428; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
9429; X64-NEXT:    retq # encoding: [0xc3]
9430  %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
9431  ret <8 x float> %res
9432}
9433
9434define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
9435; CHECK-LABEL: test_mm512_min_ps_256:
9436; CHECK:       # %bb.0:
9437; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
9438; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9439  %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
9440  ret <8 x float> %res
9441}
9442declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
9443
9444define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
9445; X86-LABEL: test_mm512_maskz_min_ps_128:
9446; X86:       # %bb.0:
9447; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9448; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9449; X86-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
9450; X86-NEXT:    retl # encoding: [0xc3]
9451;
9452; X64-LABEL: test_mm512_maskz_min_ps_128:
9453; X64:       # %bb.0:
9454; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9455; X64-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
9456; X64-NEXT:    retq # encoding: [0xc3]
9457  %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
9458  ret <4 x float> %res
9459}
9460
9461define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
9462; X86-LABEL: test_mm512_mask_min_ps_128:
9463; X86:       # %bb.0:
9464; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9465; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9466; X86-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
9467; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
9468; X86-NEXT:    retl # encoding: [0xc3]
9469;
9470; X64-LABEL: test_mm512_mask_min_ps_128:
9471; X64:       # %bb.0:
9472; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9473; X64-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
9474; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
9475; X64-NEXT:    retq # encoding: [0xc3]
9476  %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
9477  ret <4 x float> %res
9478}
9479
9480define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
9481; CHECK-LABEL: test_mm512_min_ps_128:
9482; CHECK:       # %bb.0:
9483; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
9484; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9485  %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
9486  ret <4 x float> %res
9487}
9488declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
9489
9490define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
9491; CHECK-LABEL: test_cmp_d_256:
9492; CHECK:       # %bb.0:
9493; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
9494; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9495; CHECK-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc0]
9496; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x02]
9497; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04]
9498; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x05]
9499; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe1]
9500; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9501; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
9502; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
9503; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
9504; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9505; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9506; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9507; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9508; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9509; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9510; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9511; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9512; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
9513; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9514; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9515; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9516  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
9517  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9518  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
9519  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9520  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
9521  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9522  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
9523  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9524  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
9525  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9526  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
9527  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9528  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
9529  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9530  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
9531  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9532  ret <8 x i8> %vec7
9533}
9534
9535define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
9536; X86-LABEL: test_mask_cmp_d_256:
9537; X86:       # %bb.0:
9538; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9539; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9540; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
9541; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9542; X86-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0]
9543; X86-NEXT:    vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02]
9544; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
9545; X86-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05]
9546; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
9547; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
9548; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
9549; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9550; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
9551; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
9552; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
9553; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
9554; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
9555; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
9556; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
9557; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9558; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
9559; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9560; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9561; X86-NEXT:    retl # encoding: [0xc3]
9562;
9563; X64-LABEL: test_mask_cmp_d_256:
9564; X64:       # %bb.0:
9565; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9566; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
9567; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9568; X64-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xc0]
9569; X64-NEXT:    vpcmpled %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd1,0x02]
9570; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
9571; X64-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x05]
9572; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
9573; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9574; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
9575; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
9576; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
9577; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9578; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9579; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9580; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9581; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9582; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9583; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9584; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9585; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
9586; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9587; X64-NEXT:    retq # encoding: [0xc3]
9588  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
9589  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9590  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
9591  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9592  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
9593  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9594  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
9595  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9596  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
9597  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9598  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
9599  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9600  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
9601  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9602  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
9603  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9604  ret <8 x i8> %vec7
9605}
9606
9607declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
9608
9609define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
9610; CHECK-LABEL: test_ucmp_d_256:
9611; CHECK:       # %bb.0:
9612; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
9613; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9614; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x01]
9615; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x02]
9616; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x04]
9617; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd9,0x05]
9618; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x06]
9619; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9620; CHECK-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
9621; CHECK-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
9622; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
9623; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9624; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9625; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9626; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9627; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9628; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9629; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9630; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9631; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
9632; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9633; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9634; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9635  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
9636  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9637  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
9638  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9639  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
9640  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9641  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
9642  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9643  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
9644  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9645  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
9646  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9647  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
9648  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9649  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
9650  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9651  ret <8 x i8> %vec7
9652}
9653
9654define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
9655; X86-LABEL: test_mask_ucmp_d_256:
9656; X86:       # %bb.0:
9657; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
9658; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9659; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
9660; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9661; X86-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01]
9662; X86-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02]
9663; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
9664; X86-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05]
9665; X86-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
9666; X86-NEXT:    kmovw %k0, %edx # encoding: [0xc5,0xf8,0x93,0xd0]
9667; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
9668; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9669; X86-NEXT:    vpinsrb $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc2,0x01]
9670; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
9671; X86-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x02]
9672; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
9673; X86-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x04]
9674; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
9675; X86-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x05]
9676; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
9677; X86-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x06]
9678; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9679; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9680; X86-NEXT:    retl # encoding: [0xc3]
9681;
9682; X64-LABEL: test_mask_ucmp_d_256:
9683; X64:       # %bb.0:
9684; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9685; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
9686; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
9687; X64-NEXT:    vpcmpltud %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc1,0x01]
9688; X64-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x02]
9689; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x04]
9690; X64-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe1,0x05]
9691; X64-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
9692; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9693; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
9694; X64-NEXT:    vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
9695; X64-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x01]
9696; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9697; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9698; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9699; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9700; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9701; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9702; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9703; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9704; X64-NEXT:    vpinsrb $7, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x07]
9705; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9706; X64-NEXT:    retq # encoding: [0xc3]
9707  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
9708  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9709  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
9710  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9711  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
9712  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9713  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
9714  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9715  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
9716  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9717  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
9718  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9719  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
9720  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9721  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
9722  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9723  ret <8 x i8> %vec7
9724}
9725
9726declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
9727
9728define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
9729; CHECK-LABEL: test_cmp_q_256:
9730; CHECK:       # %bb.0:
9731; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
9732; CHECK-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 # encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8]
9733; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
9734; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
9735; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05]
9736; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
9737; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9738; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9739; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9740; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9741; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9742; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9743; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9744; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9745; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9746; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9747; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9748; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9749; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
9750; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9751; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9752; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9753  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
9754  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9755  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
9756  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9757  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
9758  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9759  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
9760  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9761  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
9762  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9763  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
9764  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9765  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
9766  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9767  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
9768  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9769  ret <8 x i8> %vec7
9770}
9771
9772define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
9773; X86-LABEL: test_mask_cmp_q_256:
9774; X86:       # %bb.0:
9775; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9776; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9777; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
9778; X86-NEXT:    vpcmpgtq %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd0]
9779; X86-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02]
9780; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04]
9781; X86-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x05]
9782; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf1]
9783; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
9784; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
9785; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9786; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9787; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9788; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9789; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9790; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9791; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9792; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9793; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9794; X86-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9795; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
9796; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9797; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9798; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9799; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9800; X86-NEXT:    retl # encoding: [0xc3]
9801;
9802; X64-LABEL: test_mask_cmp_q_256:
9803; X64:       # %bb.0:
9804; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9805; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
9806; X64-NEXT:    vpcmpgtq %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd0]
9807; X64-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd9,0x02]
9808; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04]
9809; X64-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x05]
9810; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf1]
9811; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
9812; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
9813; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9814; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9815; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9816; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9817; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9818; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9819; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9820; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9821; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9822; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9823; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
9824; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9825; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9826; X64-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9827; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9828; X64-NEXT:    retq # encoding: [0xc3]
9829  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
9830  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9831  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
9832  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9833  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
9834  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9835  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
9836  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9837  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
9838  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9839  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
9840  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9841  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
9842  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9843  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
9844  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9845  ret <8 x i8> %vec7
9846}
9847
9848declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
9849
9850define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
9851; CHECK-LABEL: test_ucmp_q_256:
9852; CHECK:       # %bb.0:
9853; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
9854; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
9855; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
9856; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
9857; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
9858; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
9859; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9860; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9861; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9862; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9863; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9864; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9865; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9866; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9867; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9868; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9869; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9870; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9871; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
9872; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9873; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9874; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9875  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
9876  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9877  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
9878  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9879  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
9880  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9881  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
9882  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9883  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
9884  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9885  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
9886  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9887  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
9888  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9889  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
9890  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9891  ret <8 x i8> %vec7
9892}
9893
9894define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
9895; X86-LABEL: test_mask_ucmp_q_256:
9896; X86:       # %bb.0:
9897; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
9898; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
9899; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
9900; X86-NEXT:    vpcmpltuq %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01]
9901; X86-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
9902; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04]
9903; X86-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x05]
9904; X86-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06]
9905; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
9906; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
9907; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9908; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9909; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9910; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9911; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9912; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9913; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9914; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9915; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9916; X86-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9917; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
9918; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9919; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9920; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9921; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9922; X86-NEXT:    retl # encoding: [0xc3]
9923;
9924; X64-LABEL: test_mask_ucmp_q_256:
9925; X64:       # %bb.0:
9926; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
9927; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
9928; X64-NEXT:    vpcmpltuq %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x01]
9929; X64-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x02]
9930; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe1,0x04]
9931; X64-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xe9,0x05]
9932; X64-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x06]
9933; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
9934; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
9935; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9936; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9937; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9938; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9939; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9940; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9941; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9942; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9943; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9944; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9945; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
9946; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9947; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9948; X64-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9949; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
9950; X64-NEXT:    retq # encoding: [0xc3]
9951  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
9952  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9953  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
9954  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
9955  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
9956  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
9957  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
9958  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
9959  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
9960  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
9961  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
9962  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
9963  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
9964  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
9965  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
9966  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
9967  ret <8 x i8> %vec7
9968}
9969
9970declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
9971
9972define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
9973; CHECK-LABEL: test_cmp_d_128:
9974; CHECK:       # %bb.0:
9975; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
9976; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8]
9977; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
9978; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
9979; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05]
9980; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
9981; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
9982; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
9983; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
9984; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
9985; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
9986; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
9987; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
9988; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
9989; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
9990; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
9991; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
9992; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
9993; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
9994; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
9995; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
9996  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
9997  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
9998  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
9999  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10000  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
10001  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10002  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
10003  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10004  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
10005  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10006  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
10007  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10008  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
10009  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10010  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
10011  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10012  ret <8 x i8> %vec7
10013}
10014
10015define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
10016; X86-LABEL: test_mask_cmp_d_128:
10017; X86:       # %bb.0:
10018; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10019; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10020; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
10021; X86-NEXT:    vpcmpgtd %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x66,0xd0]
10022; X86-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02]
10023; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04]
10024; X86-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x05]
10025; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf1]
10026; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
10027; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
10028; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10029; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10030; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10031; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10032; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10033; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10034; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10035; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10036; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10037; X86-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10038; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10039; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10040; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10041; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10042; X86-NEXT:    retl # encoding: [0xc3]
10043;
10044; X64-LABEL: test_mask_cmp_d_128:
10045; X64:       # %bb.0:
10046; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10047; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
10048; X64-NEXT:    vpcmpgtd %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x66,0xd0]
10049; X64-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd9,0x02]
10050; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04]
10051; X64-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x05]
10052; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf1]
10053; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
10054; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
10055; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10056; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10057; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10058; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10059; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10060; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10061; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10062; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10063; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10064; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10065; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10066; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10067; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10068; X64-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10069; X64-NEXT:    retq # encoding: [0xc3]
10070  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
10071  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10072  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
10073  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10074  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
10075  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10076  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
10077  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10078  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
10079  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10080  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
10081  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10082  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
10083  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10084  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
10085  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10086  ret <8 x i8> %vec7
10087}
10088
10089declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
10090
10091define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
10092; CHECK-LABEL: test_ucmp_d_128:
10093; CHECK:       # %bb.0:
10094; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
10095; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
10096; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
10097; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
10098; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
10099; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
10100; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10101; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10102; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10103; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10104; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10105; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10106; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10107; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10108; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10109; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10110; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10111; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10112; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
10113; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10114; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10115  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
10116  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10117  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
10118  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10119  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
10120  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10121  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
10122  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10123  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
10124  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10125  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
10126  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10127  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
10128  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10129  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
10130  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10131  ret <8 x i8> %vec7
10132}
10133
10134define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
10135; X86-LABEL: test_mask_ucmp_d_128:
10136; X86:       # %bb.0:
10137; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10138; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10139; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
10140; X86-NEXT:    vpcmpltud %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01]
10141; X86-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
10142; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04]
10143; X86-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x05]
10144; X86-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06]
10145; X86-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
10146; X86-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
10147; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10148; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10149; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10150; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10151; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10152; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10153; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10154; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10155; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10156; X86-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10157; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10158; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10159; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10160; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10161; X86-NEXT:    retl # encoding: [0xc3]
10162;
10163; X64-LABEL: test_mask_ucmp_d_128:
10164; X64:       # %bb.0:
10165; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10166; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
10167; X64-NEXT:    vpcmpltud %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x01]
10168; X64-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x02]
10169; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe1,0x04]
10170; X64-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xe9,0x05]
10171; X64-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x06]
10172; X64-NEXT:    kshiftlw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
10173; X64-NEXT:    kshiftrw $12, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
10174; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10175; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10176; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10177; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10178; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10179; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10180; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10181; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10182; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10183; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10184; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10185; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10186; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10187; X64-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10188; X64-NEXT:    retq # encoding: [0xc3]
10189  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
10190  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10191  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
10192  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10193  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
10194  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10195  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
10196  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10197  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
10198  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10199  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
10200  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10201  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
10202  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10203  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
10204  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10205  ret <8 x i8> %vec7
10206}
10207
10208declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
10209
10210define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
10211; CHECK-LABEL: test_cmp_q_128:
10212; CHECK:       # %bb.0:
10213; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
10214; CHECK-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8]
10215; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
10216; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
10217; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05]
10218; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
10219; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10220; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10221; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10222; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10223; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10224; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10225; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10226; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10227; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10228; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10229; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10230; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10231; CHECK-NEXT:    movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
10232; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10233; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10234  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
10235  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10236  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
10237  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10238  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
10239  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10240  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
10241  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10242  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
10243  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10244  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
10245  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10246  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
10247  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10248  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
10249  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10250  ret <8 x i8> %vec7
10251}
10252
10253define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
10254; X86-LABEL: test_mask_cmp_q_128:
10255; X86:       # %bb.0:
10256; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10257; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10258; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
10259; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd0]
10260; X86-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02]
10261; X86-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04]
10262; X86-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x05]
10263; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf1]
10264; X86-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
10265; X86-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
10266; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10267; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10268; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10269; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10270; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10271; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10272; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10273; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10274; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10275; X86-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10276; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10277; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10278; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10279; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10280; X86-NEXT:    retl # encoding: [0xc3]
10281;
10282; X64-LABEL: test_mask_cmp_q_128:
10283; X64:       # %bb.0:
10284; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10285; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
10286; X64-NEXT:    vpcmpgtq %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd0]
10287; X64-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd9,0x02]
10288; X64-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04]
10289; X64-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x05]
10290; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf1]
10291; X64-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
10292; X64-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
10293; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10294; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10295; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10296; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10297; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10298; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10299; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10300; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10301; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10302; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10303; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10304; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10305; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10306; X64-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10307; X64-NEXT:    retq # encoding: [0xc3]
10308  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
10309  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10310  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
10311  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10312  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
10313  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10314  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
10315  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10316  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
10317  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10318  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
10319  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10320  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
10321  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10322  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
10323  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10324  ret <8 x i8> %vec7
10325}
10326
10327declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
10328
10329define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
10330; CHECK-LABEL: test_ucmp_q_128:
10331; CHECK:       # %bb.0:
10332; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
10333; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
10334; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
10335; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
10336; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
10337; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
10338; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10339; CHECK-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10340; CHECK-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10341; CHECK-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10342; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10343; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10344; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10345; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10346; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10347; CHECK-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10348; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10349; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10350; CHECK-NEXT:    movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
10351; CHECK-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10352; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10353  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
10354  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10355  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
10356  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10357  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
10358  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10359  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
10360  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10361  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
10362  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10363  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
10364  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10365  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
10366  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10367  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
10368  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10369  ret <8 x i8> %vec7
10370}
10371
10372define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
10373; X86-LABEL: test_mask_ucmp_q_128:
10374; X86:       # %bb.0:
10375; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10376; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10377; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
10378; X86-NEXT:    vpcmpltuq %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01]
10379; X86-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02]
10380; X86-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04]
10381; X86-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x05]
10382; X86-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x06]
10383; X86-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
10384; X86-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
10385; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10386; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10387; X86-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10388; X86-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10389; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10390; X86-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10391; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10392; X86-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10393; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10394; X86-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10395; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10396; X86-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10397; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10398; X86-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10399; X86-NEXT:    retl # encoding: [0xc3]
10400;
10401; X64-LABEL: test_mask_ucmp_q_128:
10402; X64:       # %bb.0:
10403; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10404; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
10405; X64-NEXT:    vpcmpltuq %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x01]
10406; X64-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x02]
10407; X64-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe1,0x04]
10408; X64-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xe9,0x05]
10409; X64-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x06]
10410; X64-NEXT:    kshiftlw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
10411; X64-NEXT:    kshiftrw $14, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
10412; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
10413; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10414; X64-NEXT:    vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
10415; X64-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01]
10416; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
10417; X64-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
10418; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
10419; X64-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
10420; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
10421; X64-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05]
10422; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
10423; X64-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
10424; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10425; X64-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07]
10426; X64-NEXT:    retq # encoding: [0xc3]
10427  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
10428  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
10429  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
10430  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
10431  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
10432  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
10433  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
10434  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
10435  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
10436  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
10437  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
10438  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
10439  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
10440  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
10441  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
10442  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
10443  ret <8 x i8> %vec7
10444}
10445
10446declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
10447
10448declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
10449
10450define <8 x float>@test_int_x86_avx512_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2) {
10451; CHECK-LABEL: test_int_x86_avx512_broadcastf32x4_256:
10452; CHECK:       # %bb.0:
10453; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10454; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
10455; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10456  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
10457  ret <8 x float> %res
10458}
10459
10460define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
10461; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
10462; X86:       # %bb.0:
10463; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10464; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10465; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10466; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
10467; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
10468; X86-NEXT:    retl # encoding: [0xc3]
10469;
10470; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
10471; X64:       # %bb.0:
10472; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10473; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10474; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
10475; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
10476; X64-NEXT:    retq # encoding: [0xc3]
10477  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
10478  ret <8 x float> %res
10479}
10480
10481define <8 x float>@test_int_x86_avx512_maskz_broadcastf32x4_256(<4 x float> %x0, i8 %mask) {
10482; X86-LABEL: test_int_x86_avx512_maskz_broadcastf32x4_256:
10483; X86:       # %bb.0:
10484; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10485; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10486; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10487; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
10488; X86-NEXT:    retl # encoding: [0xc3]
10489;
10490; X64-LABEL: test_int_x86_avx512_maskz_broadcastf32x4_256:
10491; X64:       # %bb.0:
10492; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10493; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10494; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
10495; X64-NEXT:    retq # encoding: [0xc3]
10496  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
10497  ret <8 x float> %res
10498}
10499
10500define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256_load(<4 x float>* %x0ptr, <8 x float> %x2, i8 %mask) {
10501; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
10502; X86:       # %bb.0:
10503; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10504; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10505; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
10506; X86-NEXT:    vbroadcastf32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x00]
10507; X86-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
10508; X86-NEXT:    retl # encoding: [0xc3]
10509;
10510; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
10511; X64:       # %bb.0:
10512; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10513; X64-NEXT:    vbroadcastf32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x07]
10514; X64-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
10515; X64-NEXT:    retq # encoding: [0xc3]
10516  %x0 = load <4 x float>, <4 x float>* %x0ptr
10517  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
10518  ret <8 x float> %res
10519}
10520
10521declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
10522
10523define <8 x i32>@test_int_x86_avx512_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2) {
10524; CHECK-LABEL: test_int_x86_avx512_broadcasti32x4_256:
10525; CHECK:       # %bb.0:
10526; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10527; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
10528; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10529  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
10530  ret <8 x i32> %res
10531}
10532
10533define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
10534; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
10535; X86:       # %bb.0:
10536; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10537; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10538; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10539; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
10540; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10541; X86-NEXT:    retl # encoding: [0xc3]
10542;
10543; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
10544; X64:       # %bb.0:
10545; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10546; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10547; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
10548; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10549; X64-NEXT:    retq # encoding: [0xc3]
10550  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
10551  ret <8 x i32> %res
10552}
10553
10554define <8 x i32>@test_int_x86_avx512_maskz_broadcasti32x4_256(<4 x i32> %x0, i8 %mask) {
10555; X86-LABEL: test_int_x86_avx512_maskz_broadcasti32x4_256:
10556; X86:       # %bb.0:
10557; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10558; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10559; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10560; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
10561; X86-NEXT:    retl # encoding: [0xc3]
10562;
10563; X64-LABEL: test_int_x86_avx512_maskz_broadcasti32x4_256:
10564; X64:       # %bb.0:
10565; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
10566; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10567; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
10568; X64-NEXT:    retq # encoding: [0xc3]
10569  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
10570  ret <8 x i32> %res
10571}
10572
10573define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256_load(<4 x i32>* %x0ptr, <8 x i32> %x2, i8 %mask) {
10574; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
10575; X86:       # %bb.0:
10576; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
10577; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
10578; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
10579; X86-NEXT:    vbroadcasti32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x00]
10580; X86-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
10581; X86-NEXT:    retl # encoding: [0xc3]
10582;
10583; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
10584; X64:       # %bb.0:
10585; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
10586; X64-NEXT:    vbroadcasti32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x07]
10587; X64-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
10588; X64-NEXT:    retq # encoding: [0xc3]
10589  %x0 = load <4 x i32>, <4 x i32>* %x0ptr
10590  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
10591  ret <8 x i32> %res
10592}
10593
10594declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
10595
10596define <2 x i64>@test_int_x86_avx512_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1) {
10597; CHECK-LABEL: test_int_x86_avx512_pabs_q_128:
10598; CHECK:       # %bb.0:
10599; CHECK-NEXT:    vpabsq %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xc0]
10600; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10601  %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
10602  ret <2 x i64> %res
10603}
10604
10605define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
10606; X86-LABEL: test_int_x86_avx512_mask_pabs_q_128:
10607; X86:       # %bb.0:
10608; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10609; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10610; X86-NEXT:    vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
10611; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10612; X86-NEXT:    retl # encoding: [0xc3]
10613;
10614; X64-LABEL: test_int_x86_avx512_mask_pabs_q_128:
10615; X64:       # %bb.0:
10616; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10617; X64-NEXT:    vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
10618; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10619; X64-NEXT:    retq # encoding: [0xc3]
10620  %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
10621  ret <2 x i64> %res
10622}
10623
10624declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
10625
10626define <4 x i64>@test_int_x86_avx512_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1) {
10627; CHECK-LABEL: test_int_x86_avx512_pabs_q_256:
10628; CHECK:       # %bb.0:
10629; CHECK-NEXT:    vpabsq %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xc0]
10630; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10631  %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
10632  ret <4 x i64> %res
10633}
10634
10635define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
10636; X86-LABEL: test_int_x86_avx512_mask_pabs_q_256:
10637; X86:       # %bb.0:
10638; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10639; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10640; X86-NEXT:    vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
10641; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10642; X86-NEXT:    retl # encoding: [0xc3]
10643;
10644; X64-LABEL: test_int_x86_avx512_mask_pabs_q_256:
10645; X64:       # %bb.0:
10646; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10647; X64-NEXT:    vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
10648; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10649; X64-NEXT:    retq # encoding: [0xc3]
10650  %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
10651  ret <4 x i64> %res
10652}
10653
10654declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
10655
10656define <4 x i32>@test_int_x86_avx512_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1) {
10657; CHECK-LABEL: test_int_x86_avx512_pabs_d_128:
10658; CHECK:       # %bb.0:
10659; CHECK-NEXT:    vpabsd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0]
10660; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10661  %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
10662  ret <4 x i32> %res
10663}
10664
10665define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
10666; X86-LABEL: test_int_x86_avx512_mask_pabs_d_128:
10667; X86:       # %bb.0:
10668; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10669; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10670; X86-NEXT:    vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
10671; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10672; X86-NEXT:    retl # encoding: [0xc3]
10673;
10674; X64-LABEL: test_int_x86_avx512_mask_pabs_d_128:
10675; X64:       # %bb.0:
10676; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10677; X64-NEXT:    vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
10678; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
10679; X64-NEXT:    retq # encoding: [0xc3]
10680  %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
10681  ret <4 x i32> %res
10682}
10683
10684declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
10685
10686define <8 x i32>@test_int_x86_avx512_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1) {
10687; CHECK-LABEL: test_int_x86_avx512_pabs_d_256:
10688; CHECK:       # %bb.0:
10689; CHECK-NEXT:    vpabsd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xc0]
10690; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10691  %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
10692  ret <8 x i32> %res
10693}
10694
10695define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
10696; X86-LABEL: test_int_x86_avx512_mask_pabs_d_256:
10697; X86:       # %bb.0:
10698; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10699; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10700; X86-NEXT:    vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
10701; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10702; X86-NEXT:    retl # encoding: [0xc3]
10703;
10704; X64-LABEL: test_int_x86_avx512_mask_pabs_d_256:
10705; X64:       # %bb.0:
10706; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10707; X64-NEXT:    vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
10708; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
10709; X64-NEXT:    retq # encoding: [0xc3]
10710  %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
10711  ret <8 x i32> %res
10712}
10713
10714declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
10715
10716define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
10717; X86-LABEL: test_int_x86_avx512_ptestm_d_128:
10718; X86:       # %bb.0:
10719; X86-NEXT:    vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
10720; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10721; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10722; X86-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10723; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10724; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10725; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10726; X86-NEXT:    # kill: def $al killed $al killed $eax
10727; X86-NEXT:    retl # encoding: [0xc3]
10728;
10729; X64-LABEL: test_int_x86_avx512_ptestm_d_128:
10730; X64:       # %bb.0:
10731; X64-NEXT:    vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
10732; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10733; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10734; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10735; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10736; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
10737; X64-NEXT:    # kill: def $al killed $al killed $eax
10738; X64-NEXT:    retq # encoding: [0xc3]
10739  %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
10740  %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
10741  %res2 = add i8 %res, %res1
10742  ret i8 %res2
10743}
10744
10745declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
10746
10747define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
10748; X86-LABEL: test_int_x86_avx512_ptestm_d_256:
10749; X86:       # %bb.0:
10750; X86-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
10751; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10752; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
10753; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
10754; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10755; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10756; X86-NEXT:    retl # encoding: [0xc3]
10757;
10758; X64-LABEL: test_int_x86_avx512_ptestm_d_256:
10759; X64:       # %bb.0:
10760; X64-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
10761; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10762; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
10763; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
10764; X64-NEXT:    # kill: def $al killed $al killed $eax
10765; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10766; X64-NEXT:    retq # encoding: [0xc3]
10767  %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
10768  %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
10769  %res2 = add i8 %res, %res1
10770  ret i8 %res2
10771}
10772
10773declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
10774
10775define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
10776; X86-LABEL: test_int_x86_avx512_ptestm_q_128:
10777; X86:       # %bb.0:
10778; X86-NEXT:    vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
10779; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10780; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10781; X86-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10782; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10783; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10784; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10785; X86-NEXT:    # kill: def $al killed $al killed $eax
10786; X86-NEXT:    retl # encoding: [0xc3]
10787;
10788; X64-LABEL: test_int_x86_avx512_ptestm_q_128:
10789; X64:       # %bb.0:
10790; X64-NEXT:    vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
10791; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10792; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10793; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10794; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10795; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
10796; X64-NEXT:    # kill: def $al killed $al killed $eax
10797; X64-NEXT:    retq # encoding: [0xc3]
10798  %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
10799  %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
10800  %res2 = add i8 %res, %res1
10801  ret i8 %res2
10802}
10803
10804declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
10805
10806define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
10807; X86-LABEL: test_int_x86_avx512_ptestm_q_256:
10808; X86:       # %bb.0:
10809; X86-NEXT:    vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
10810; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10811; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10812; X86-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10813; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10814; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10815; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10816; X86-NEXT:    # kill: def $al killed $al killed $eax
10817; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10818; X86-NEXT:    retl # encoding: [0xc3]
10819;
10820; X64-LABEL: test_int_x86_avx512_ptestm_q_256:
10821; X64:       # %bb.0:
10822; X64-NEXT:    vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
10823; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10824; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10825; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10826; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10827; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
10828; X64-NEXT:    # kill: def $al killed $al killed $eax
10829; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10830; X64-NEXT:    retq # encoding: [0xc3]
10831  %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
10832  %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
10833  %res2 = add i8 %res, %res1
10834  ret i8 %res2
10835}
10836
10837declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
10838
10839define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
10840; X86-LABEL: test_int_x86_avx512_ptestnm_d_128:
10841; X86:       # %bb.0:
10842; X86-NEXT:    vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
10843; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10844; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10845; X86-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10846; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10847; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10848; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10849; X86-NEXT:    # kill: def $al killed $al killed $eax
10850; X86-NEXT:    retl # encoding: [0xc3]
10851;
10852; X64-LABEL: test_int_x86_avx512_ptestnm_d_128:
10853; X64:       # %bb.0:
10854; X64-NEXT:    vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
10855; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10856; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10857; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10858; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10859; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
10860; X64-NEXT:    # kill: def $al killed $al killed $eax
10861; X64-NEXT:    retq # encoding: [0xc3]
10862  %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
10863  %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
10864  %res2 = add i8 %res, %res1
10865  ret i8 %res2
10866}
10867
10868declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
10869
10870define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
10871; X86-LABEL: test_int_x86_avx512_ptestnm_d_256:
10872; X86:       # %bb.0:
10873; X86-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
10874; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10875; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
10876; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
10877; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10878; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10879; X86-NEXT:    retl # encoding: [0xc3]
10880;
10881; X64-LABEL: test_int_x86_avx512_ptestnm_d_256:
10882; X64:       # %bb.0:
10883; X64-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
10884; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10885; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
10886; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
10887; X64-NEXT:    # kill: def $al killed $al killed $eax
10888; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10889; X64-NEXT:    retq # encoding: [0xc3]
10890  %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
10891  %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
10892  %res2 = add i8 %res, %res1
10893  ret i8 %res2
10894}
10895
10896declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
10897
10898define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
10899; X86-LABEL: test_int_x86_avx512_ptestnm_q_128:
10900; X86:       # %bb.0:
10901; X86-NEXT:    vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
10902; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10903; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10904; X86-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10905; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10906; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10907; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10908; X86-NEXT:    # kill: def $al killed $al killed $eax
10909; X86-NEXT:    retl # encoding: [0xc3]
10910;
10911; X64-LABEL: test_int_x86_avx512_ptestnm_q_128:
10912; X64:       # %bb.0:
10913; X64-NEXT:    vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
10914; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10915; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10916; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10917; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10918; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
10919; X64-NEXT:    # kill: def $al killed $al killed $eax
10920; X64-NEXT:    retq # encoding: [0xc3]
10921  %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
10922  %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
10923  %res2 = add i8 %res, %res1
10924  ret i8 %res2
10925}
10926
10927declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
10928
10929define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
10930; X86-LABEL: test_int_x86_avx512_ptestnm_q_256:
10931; X86:       # %bb.0:
10932; X86-NEXT:    vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
10933; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
10934; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
10935; X86-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10936; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
10937; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10938; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
10939; X86-NEXT:    # kill: def $al killed $al killed $eax
10940; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10941; X86-NEXT:    retl # encoding: [0xc3]
10942;
10943; X64-LABEL: test_int_x86_avx512_ptestnm_q_256:
10944; X64:       # %bb.0:
10945; X64-NEXT:    vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
10946; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
10947; X64-NEXT:    kandw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x41,0xc9]
10948; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
10949; X64-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
10950; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
10951; X64-NEXT:    # kill: def $al killed $al killed $eax
10952; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10953; X64-NEXT:    retq # encoding: [0xc3]
10954  %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
10955  %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
10956  %res2 = add i8 %res, %res1
10957  ret i8 %res2
10958}
10959
10960define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
10961; CHECK-LABEL: test_cmpps_256:
10962; CHECK:       # %bb.0:
10963; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
10964; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10965; CHECK-NEXT:    # kill: def $al killed $al killed $eax
10966; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10967; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10968  %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
10969  ret i8 %res
10970}
10971declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
10972
10973define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
10974; CHECK-LABEL: test_cmpps_128:
10975; CHECK:       # %bb.0:
10976; CHECK-NEXT:    vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
10977; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10978; CHECK-NEXT:    # kill: def $al killed $al killed $eax
10979; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10980  %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
10981  ret i8 %res
10982}
10983declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
10984
10985define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
10986; CHECK-LABEL: test_cmppd_256:
10987; CHECK:       # %bb.0:
10988; CHECK-NEXT:    vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
10989; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
10990; CHECK-NEXT:    # kill: def $al killed $al killed $eax
10991; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
10992; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
10993  %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
10994  ret i8 %res
10995}
10996declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
10997
10998define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
10999; CHECK-LABEL: test_cmppd_128:
11000; CHECK:       # %bb.0:
11001; CHECK-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
11002; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
11003; CHECK-NEXT:    # kill: def $al killed $al killed $eax
11004; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11005  %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
11006  ret i8 %res
11007}
11008declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
11009
11010define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
11011; CHECK-LABEL: test_mask_mul_epi32_rr_128:
11012; CHECK:       # %bb.0:
11013; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
11014; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11015  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
11016  ret < 2 x i64> %res
11017}
11018
11019define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
11020; X86-LABEL: test_mask_mul_epi32_rrk_128:
11021; X86:       # %bb.0:
11022; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11023; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11024; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
11025; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
11026; X86-NEXT:    retl # encoding: [0xc3]
11027;
11028; X64-LABEL: test_mask_mul_epi32_rrk_128:
11029; X64:       # %bb.0:
11030; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11031; X64-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
11032; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
11033; X64-NEXT:    retq # encoding: [0xc3]
11034  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
11035  ret < 2 x i64> %res
11036}
11037
11038define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
11039; X86-LABEL: test_mask_mul_epi32_rrkz_128:
11040; X86:       # %bb.0:
11041; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11042; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11043; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
11044; X86-NEXT:    retl # encoding: [0xc3]
11045;
11046; X64-LABEL: test_mask_mul_epi32_rrkz_128:
11047; X64:       # %bb.0:
11048; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11049; X64-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
11050; X64-NEXT:    retq # encoding: [0xc3]
11051  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
11052  ret < 2 x i64> %res
11053}
11054
11055define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
11056; X86-LABEL: test_mask_mul_epi32_rm_128:
11057; X86:       # %bb.0:
11058; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11059; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x00]
11060; X86-NEXT:    retl # encoding: [0xc3]
11061;
11062; X64-LABEL: test_mask_mul_epi32_rm_128:
11063; X64:       # %bb.0:
11064; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07]
11065; X64-NEXT:    retq # encoding: [0xc3]
11066  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
11067  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
11068  ret < 2 x i64> %res
11069}
11070
11071define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
11072; X86-LABEL: test_mask_mul_epi32_rmk_128:
11073; X86:       # %bb.0:
11074; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11075; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11076; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11077; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x08]
11078; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11079; X86-NEXT:    retl # encoding: [0xc3]
11080;
11081; X64-LABEL: test_mask_mul_epi32_rmk_128:
11082; X64:       # %bb.0:
11083; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11084; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
11085; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11086; X64-NEXT:    retq # encoding: [0xc3]
11087  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
11088  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
11089  ret < 2 x i64> %res
11090}
11091
11092define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
11093; X86-LABEL: test_mask_mul_epi32_rmkz_128:
11094; X86:       # %bb.0:
11095; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11096; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11097; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11098; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x00]
11099; X86-NEXT:    retl # encoding: [0xc3]
11100;
11101; X64-LABEL: test_mask_mul_epi32_rmkz_128:
11102; X64:       # %bb.0:
11103; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11104; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
11105; X64-NEXT:    retq # encoding: [0xc3]
11106  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
11107  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
11108  ret < 2 x i64> %res
11109}
11110
11111define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
11112; X86-LABEL: test_mask_mul_epi32_rmb_128:
11113; X86:       # %bb.0:
11114; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11115; X86-NEXT:    vpmuldq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x00]
11116; X86-NEXT:    retl # encoding: [0xc3]
11117;
11118; X64-LABEL: test_mask_mul_epi32_rmb_128:
11119; X64:       # %bb.0:
11120; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
11121; X64-NEXT:    retq # encoding: [0xc3]
11122  %q = load i64, i64* %ptr_b
11123  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
11124  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
11125  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
11126  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
11127  ret < 2 x i64> %res
11128}
11129
11130define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
11131; X86-LABEL: test_mask_mul_epi32_rmbk_128:
11132; X86:       # %bb.0:
11133; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11134; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11135; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11136; X86-NEXT:    vpmuldq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x08]
11137; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11138; X86-NEXT:    retl # encoding: [0xc3]
11139;
11140; X64-LABEL: test_mask_mul_epi32_rmbk_128:
11141; X64:       # %bb.0:
11142; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11143; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
11144; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11145; X64-NEXT:    retq # encoding: [0xc3]
11146  %q = load i64, i64* %ptr_b
11147  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
11148  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
11149  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
11150  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
11151  ret < 2 x i64> %res
11152}
11153
11154define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
11155; X86-LABEL: test_mask_mul_epi32_rmbkz_128:
11156; X86:       # %bb.0:
11157; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11158; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11159; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11160; X86-NEXT:    vpmuldq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x00]
11161; X86-NEXT:    retl # encoding: [0xc3]
11162;
11163; X64-LABEL: test_mask_mul_epi32_rmbkz_128:
11164; X64:       # %bb.0:
11165; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11166; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
11167; X64-NEXT:    retq # encoding: [0xc3]
11168  %q = load i64, i64* %ptr_b
11169  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
11170  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
11171  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
11172  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
11173  ret < 2 x i64> %res
11174}
11175
11176declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
11177
11178define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
11179; CHECK-LABEL: test_mask_mul_epi32_rr_256:
11180; CHECK:       # %bb.0:
11181; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
11182; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11183  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
11184  ret < 4 x i64> %res
11185}
11186
11187define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
11188; X86-LABEL: test_mask_mul_epi32_rrk_256:
11189; X86:       # %bb.0:
11190; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11191; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11192; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
11193; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
11194; X86-NEXT:    retl # encoding: [0xc3]
11195;
11196; X64-LABEL: test_mask_mul_epi32_rrk_256:
11197; X64:       # %bb.0:
11198; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11199; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
11200; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
11201; X64-NEXT:    retq # encoding: [0xc3]
11202  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
11203  ret < 4 x i64> %res
11204}
11205
11206define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
11207; X86-LABEL: test_mask_mul_epi32_rrkz_256:
11208; X86:       # %bb.0:
11209; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11210; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11211; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
11212; X86-NEXT:    retl # encoding: [0xc3]
11213;
11214; X64-LABEL: test_mask_mul_epi32_rrkz_256:
11215; X64:       # %bb.0:
11216; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11217; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
11218; X64-NEXT:    retq # encoding: [0xc3]
11219  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
11220  ret < 4 x i64> %res
11221}
11222
11223define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
11224; X86-LABEL: test_mask_mul_epi32_rm_256:
11225; X86:       # %bb.0:
11226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11227; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x00]
11228; X86-NEXT:    retl # encoding: [0xc3]
11229;
11230; X64-LABEL: test_mask_mul_epi32_rm_256:
11231; X64:       # %bb.0:
11232; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07]
11233; X64-NEXT:    retq # encoding: [0xc3]
11234  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
11235  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
11236  ret < 4 x i64> %res
11237}
11238
11239define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
11240; X86-LABEL: test_mask_mul_epi32_rmk_256:
11241; X86:       # %bb.0:
11242; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11243; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11244; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11245; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x08]
11246; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11247; X86-NEXT:    retl # encoding: [0xc3]
11248;
11249; X64-LABEL: test_mask_mul_epi32_rmk_256:
11250; X64:       # %bb.0:
11251; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11252; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
11253; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11254; X64-NEXT:    retq # encoding: [0xc3]
11255  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
11256  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
11257  ret < 4 x i64> %res
11258}
11259
11260define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
11261; X86-LABEL: test_mask_mul_epi32_rmkz_256:
11262; X86:       # %bb.0:
11263; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11264; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11265; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11266; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x00]
11267; X86-NEXT:    retl # encoding: [0xc3]
11268;
11269; X64-LABEL: test_mask_mul_epi32_rmkz_256:
11270; X64:       # %bb.0:
11271; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11272; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
11273; X64-NEXT:    retq # encoding: [0xc3]
11274  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
11275  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
11276  ret < 4 x i64> %res
11277}
11278
11279define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
11280; X86-LABEL: test_mask_mul_epi32_rmb_256:
11281; X86:       # %bb.0:
11282; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11283; X86-NEXT:    vpmuldq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x00]
11284; X86-NEXT:    retl # encoding: [0xc3]
11285;
11286; X64-LABEL: test_mask_mul_epi32_rmb_256:
11287; X64:       # %bb.0:
11288; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
11289; X64-NEXT:    retq # encoding: [0xc3]
11290  %q = load i64, i64* %ptr_b
11291  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
11292  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
11293  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
11294  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
11295  ret < 4 x i64> %res
11296}
11297
11298define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
11299; X86-LABEL: test_mask_mul_epi32_rmbk_256:
11300; X86:       # %bb.0:
11301; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11302; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11303; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11304; X86-NEXT:    vpmuldq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x08]
11305; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11306; X86-NEXT:    retl # encoding: [0xc3]
11307;
11308; X64-LABEL: test_mask_mul_epi32_rmbk_256:
11309; X64:       # %bb.0:
11310; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11311; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
11312; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11313; X64-NEXT:    retq # encoding: [0xc3]
11314  %q = load i64, i64* %ptr_b
11315  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
11316  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
11317  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
11318  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
11319  ret < 4 x i64> %res
11320}
11321
11322define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
11323; X86-LABEL: test_mask_mul_epi32_rmbkz_256:
11324; X86:       # %bb.0:
11325; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11326; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11327; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11328; X86-NEXT:    vpmuldq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x00]
11329; X86-NEXT:    retl # encoding: [0xc3]
11330;
11331; X64-LABEL: test_mask_mul_epi32_rmbkz_256:
11332; X64:       # %bb.0:
11333; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11334; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
11335; X64-NEXT:    retq # encoding: [0xc3]
11336  %q = load i64, i64* %ptr_b
11337  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
11338  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
11339  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
11340  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
11341  ret < 4 x i64> %res
11342}
11343
11344declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
11345
11346define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
11347; CHECK-LABEL: test_mask_mul_epu32_rr_128:
11348; CHECK:       # %bb.0:
11349; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
11350; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11351  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
11352  ret < 2 x i64> %res
11353}
11354
11355define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
11356; X86-LABEL: test_mask_mul_epu32_rrk_128:
11357; X86:       # %bb.0:
11358; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11359; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11360; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
11361; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
11362; X86-NEXT:    retl # encoding: [0xc3]
11363;
11364; X64-LABEL: test_mask_mul_epu32_rrk_128:
11365; X64:       # %bb.0:
11366; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11367; X64-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
11368; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
11369; X64-NEXT:    retq # encoding: [0xc3]
11370  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
11371  ret < 2 x i64> %res
11372}
11373
11374define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
11375; X86-LABEL: test_mask_mul_epu32_rrkz_128:
11376; X86:       # %bb.0:
11377; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11378; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11379; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
11380; X86-NEXT:    retl # encoding: [0xc3]
11381;
11382; X64-LABEL: test_mask_mul_epu32_rrkz_128:
11383; X64:       # %bb.0:
11384; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11385; X64-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
11386; X64-NEXT:    retq # encoding: [0xc3]
11387  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
11388  ret < 2 x i64> %res
11389}
11390
11391define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
11392; X86-LABEL: test_mask_mul_epu32_rm_128:
11393; X86:       # %bb.0:
11394; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11395; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x00]
11396; X86-NEXT:    retl # encoding: [0xc3]
11397;
11398; X64-LABEL: test_mask_mul_epu32_rm_128:
11399; X64:       # %bb.0:
11400; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07]
11401; X64-NEXT:    retq # encoding: [0xc3]
11402  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
11403  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
11404  ret < 2 x i64> %res
11405}
11406
11407define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
11408; X86-LABEL: test_mask_mul_epu32_rmk_128:
11409; X86:       # %bb.0:
11410; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11411; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11412; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11413; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x08]
11414; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11415; X86-NEXT:    retl # encoding: [0xc3]
11416;
11417; X64-LABEL: test_mask_mul_epu32_rmk_128:
11418; X64:       # %bb.0:
11419; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11420; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
11421; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11422; X64-NEXT:    retq # encoding: [0xc3]
11423  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
11424  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
11425  ret < 2 x i64> %res
11426}
11427
11428define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
11429; X86-LABEL: test_mask_mul_epu32_rmkz_128:
11430; X86:       # %bb.0:
11431; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11432; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11433; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11434; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x00]
11435; X86-NEXT:    retl # encoding: [0xc3]
11436;
11437; X64-LABEL: test_mask_mul_epu32_rmkz_128:
11438; X64:       # %bb.0:
11439; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11440; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
11441; X64-NEXT:    retq # encoding: [0xc3]
11442  %b = load < 4 x i32>, < 4 x i32>* %ptr_b
11443  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
11444  ret < 2 x i64> %res
11445}
11446
11447define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
11448; X86-LABEL: test_mask_mul_epu32_rmb_128:
11449; X86:       # %bb.0:
11450; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11451; X86-NEXT:    vpmuludq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x00]
11452; X86-NEXT:    retl # encoding: [0xc3]
11453;
11454; X64-LABEL: test_mask_mul_epu32_rmb_128:
11455; X64:       # %bb.0:
11456; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
11457; X64-NEXT:    retq # encoding: [0xc3]
11458  %q = load i64, i64* %ptr_b
11459  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
11460  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
11461  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
11462  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
11463  ret < 2 x i64> %res
11464}
11465
11466define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
11467; X86-LABEL: test_mask_mul_epu32_rmbk_128:
11468; X86:       # %bb.0:
11469; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11470; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11471; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11472; X86-NEXT:    vpmuludq (%eax){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x08]
11473; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11474; X86-NEXT:    retl # encoding: [0xc3]
11475;
11476; X64-LABEL: test_mask_mul_epu32_rmbk_128:
11477; X64:       # %bb.0:
11478; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11479; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
11480; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
11481; X64-NEXT:    retq # encoding: [0xc3]
11482  %q = load i64, i64* %ptr_b
11483  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
11484  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
11485  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
11486  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
11487  ret < 2 x i64> %res
11488}
11489
11490define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
11491; X86-LABEL: test_mask_mul_epu32_rmbkz_128:
11492; X86:       # %bb.0:
11493; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11494; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11495; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11496; X86-NEXT:    vpmuludq (%eax){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x00]
11497; X86-NEXT:    retl # encoding: [0xc3]
11498;
11499; X64-LABEL: test_mask_mul_epu32_rmbkz_128:
11500; X64:       # %bb.0:
11501; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11502; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
11503; X64-NEXT:    retq # encoding: [0xc3]
11504  %q = load i64, i64* %ptr_b
11505  %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
11506  %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
11507  %b = bitcast < 2 x i64> %b64 to < 4 x i32>
11508  %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
11509  ret < 2 x i64> %res
11510}
11511
11512declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
11513
11514define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
11515; CHECK-LABEL: test_mask_mul_epu32_rr_256:
11516; CHECK:       # %bb.0:
11517; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
11518; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11519  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
11520  ret < 4 x i64> %res
11521}
11522
11523define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
11524; X86-LABEL: test_mask_mul_epu32_rrk_256:
11525; X86:       # %bb.0:
11526; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11527; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11528; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
11529; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
11530; X86-NEXT:    retl # encoding: [0xc3]
11531;
11532; X64-LABEL: test_mask_mul_epu32_rrk_256:
11533; X64:       # %bb.0:
11534; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11535; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
11536; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
11537; X64-NEXT:    retq # encoding: [0xc3]
11538  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
11539  ret < 4 x i64> %res
11540}
11541
11542define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
11543; X86-LABEL: test_mask_mul_epu32_rrkz_256:
11544; X86:       # %bb.0:
11545; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11546; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11547; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
11548; X86-NEXT:    retl # encoding: [0xc3]
11549;
11550; X64-LABEL: test_mask_mul_epu32_rrkz_256:
11551; X64:       # %bb.0:
11552; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11553; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
11554; X64-NEXT:    retq # encoding: [0xc3]
11555  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
11556  ret < 4 x i64> %res
11557}
11558
11559define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
11560; X86-LABEL: test_mask_mul_epu32_rm_256:
11561; X86:       # %bb.0:
11562; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11563; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x00]
11564; X86-NEXT:    retl # encoding: [0xc3]
11565;
11566; X64-LABEL: test_mask_mul_epu32_rm_256:
11567; X64:       # %bb.0:
11568; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07]
11569; X64-NEXT:    retq # encoding: [0xc3]
11570  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
11571  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
11572  ret < 4 x i64> %res
11573}
11574
11575define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
11576; X86-LABEL: test_mask_mul_epu32_rmk_256:
11577; X86:       # %bb.0:
11578; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11579; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11580; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11581; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x08]
11582; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11583; X86-NEXT:    retl # encoding: [0xc3]
11584;
11585; X64-LABEL: test_mask_mul_epu32_rmk_256:
11586; X64:       # %bb.0:
11587; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11588; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
11589; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11590; X64-NEXT:    retq # encoding: [0xc3]
11591  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
11592  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
11593  ret < 4 x i64> %res
11594}
11595
11596define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
11597; X86-LABEL: test_mask_mul_epu32_rmkz_256:
11598; X86:       # %bb.0:
11599; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11600; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11601; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11602; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x00]
11603; X86-NEXT:    retl # encoding: [0xc3]
11604;
11605; X64-LABEL: test_mask_mul_epu32_rmkz_256:
11606; X64:       # %bb.0:
11607; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11608; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
11609; X64-NEXT:    retq # encoding: [0xc3]
11610  %b = load < 8 x i32>, < 8 x i32>* %ptr_b
11611  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
11612  ret < 4 x i64> %res
11613}
11614
11615define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
11616; X86-LABEL: test_mask_mul_epu32_rmb_256:
11617; X86:       # %bb.0:
11618; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11619; X86-NEXT:    vpmuludq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x00]
11620; X86-NEXT:    retl # encoding: [0xc3]
11621;
11622; X64-LABEL: test_mask_mul_epu32_rmb_256:
11623; X64:       # %bb.0:
11624; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
11625; X64-NEXT:    retq # encoding: [0xc3]
11626  %q = load i64, i64* %ptr_b
11627  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
11628  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
11629  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
11630  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
11631  ret < 4 x i64> %res
11632}
11633
11634define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
11635; X86-LABEL: test_mask_mul_epu32_rmbk_256:
11636; X86:       # %bb.0:
11637; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11638; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11639; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11640; X86-NEXT:    vpmuludq (%eax){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x08]
11641; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11642; X86-NEXT:    retl # encoding: [0xc3]
11643;
11644; X64-LABEL: test_mask_mul_epu32_rmbk_256:
11645; X64:       # %bb.0:
11646; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11647; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
11648; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
11649; X64-NEXT:    retq # encoding: [0xc3]
11650  %q = load i64, i64* %ptr_b
11651  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
11652  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
11653  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
11654  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
11655  ret < 4 x i64> %res
11656}
11657
11658define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
11659; X86-LABEL: test_mask_mul_epu32_rmbkz_256:
11660; X86:       # %bb.0:
11661; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
11662; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
11663; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
11664; X86-NEXT:    vpmuludq (%eax){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x00]
11665; X86-NEXT:    retl # encoding: [0xc3]
11666;
11667; X64-LABEL: test_mask_mul_epu32_rmbkz_256:
11668; X64:       # %bb.0:
11669; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
11670; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
11671; X64-NEXT:    retq # encoding: [0xc3]
11672  %q = load i64, i64* %ptr_b
11673  %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
11674  %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
11675  %b = bitcast < 4 x i64> %b64 to < 8 x i32>
11676  %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
11677  ret < 4 x i64> %res
11678}
11679
11680declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
11681
11682declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
11683
11684define <4 x float>@test_int_x86_avx512_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1) {
11685; CHECK-LABEL: test_int_x86_avx512_cvt_dq2ps_128:
11686; CHECK:       # %bb.0:
11687; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
11688; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11689  %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
11690  ret <4 x float> %res
11691}
11692
11693define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
11694; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
11695; X86:       # %bb.0:
11696; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11697; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11698; X86-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
11699; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11700; X86-NEXT:    retl # encoding: [0xc3]
11701;
11702; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
11703; X64:       # %bb.0:
11704; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11705; X64-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
11706; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11707; X64-NEXT:    retq # encoding: [0xc3]
11708  %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
11709  ret <4 x float> %res
11710}
11711
11712declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
11713
11714define <8 x float>@test_int_x86_avx512_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1) {
11715; CHECK-LABEL: test_int_x86_avx512_cvt_dq2ps_256:
11716; CHECK:       # %bb.0:
11717; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0]
11718; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11719  %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
11720  ret <8 x float> %res
11721}
11722
11723define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
11724; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
11725; X86:       # %bb.0:
11726; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11727; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11728; X86-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
11729; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
11730; X86-NEXT:    retl # encoding: [0xc3]
11731;
11732; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
11733; X64:       # %bb.0:
11734; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11735; X64-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
11736; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
11737; X64-NEXT:    retq # encoding: [0xc3]
11738  %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
11739  ret <8 x float> %res
11740}
11741
11742define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
11743; CHECK-LABEL: test_x86_vcvtph2ps_128:
11744; CHECK:       # %bb.0:
11745; CHECK-NEXT:    vcvtph2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
11746; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11747  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
11748  ret <4 x float> %res
11749}
11750
11751define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
11752; X86-LABEL: test_x86_vcvtph2ps_128_rrk:
11753; X86:       # %bb.0:
11754; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11755; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11756; X86-NEXT:    vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
11757; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11758; X86-NEXT:    retl # encoding: [0xc3]
11759;
11760; X64-LABEL: test_x86_vcvtph2ps_128_rrk:
11761; X64:       # %bb.0:
11762; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11763; X64-NEXT:    vcvtph2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x13,0xc8]
11764; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11765; X64-NEXT:    retq # encoding: [0xc3]
11766  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
11767  ret <4 x float> %res
11768}
11769
11770define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
11771; X86-LABEL: test_x86_vcvtph2ps_128_rrkz:
11772; X86:       # %bb.0:
11773; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11774; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11775; X86-NEXT:    vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
11776; X86-NEXT:    retl # encoding: [0xc3]
11777;
11778; X64-LABEL: test_x86_vcvtph2ps_128_rrkz:
11779; X64:       # %bb.0:
11780; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11781; X64-NEXT:    vcvtph2ps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x13,0xc0]
11782; X64-NEXT:    retq # encoding: [0xc3]
11783  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
11784  ret <4 x float> %res
11785}
11786
11787declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
11788
11789define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
11790; CHECK-LABEL: test_x86_vcvtph2ps_256:
11791; CHECK:       # %bb.0:
11792; CHECK-NEXT:    vcvtph2ps %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x13,0xc0]
11793; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11794  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
11795  ret <8 x float> %res
11796}
11797
11798define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
11799; X86-LABEL: test_x86_vcvtph2ps_256_rrk:
11800; X86:       # %bb.0:
11801; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11802; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11803; X86-NEXT:    vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
11804; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
11805; X86-NEXT:    retl # encoding: [0xc3]
11806;
11807; X64-LABEL: test_x86_vcvtph2ps_256_rrk:
11808; X64:       # %bb.0:
11809; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11810; X64-NEXT:    vcvtph2ps %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x13,0xc8]
11811; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
11812; X64-NEXT:    retq # encoding: [0xc3]
11813  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
11814  ret <8 x float> %res
11815}
11816
11817define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
11818; X86-LABEL: test_x86_vcvtph2ps_256_rrkz:
11819; X86:       # %bb.0:
11820; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11821; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11822; X86-NEXT:    vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
11823; X86-NEXT:    retl # encoding: [0xc3]
11824;
11825; X64-LABEL: test_x86_vcvtph2ps_256_rrkz:
11826; X64:       # %bb.0:
11827; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11828; X64-NEXT:    vcvtph2ps %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x13,0xc0]
11829; X64-NEXT:    retq # encoding: [0xc3]
11830  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
11831  ret <8 x float> %res
11832}
11833
11834declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
11835
11836declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
11837
11838define <4 x i32>@test_int_x86_avx512_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1) {
11839; CHECK-LABEL: test_int_x86_avx512_cvt_pd2dq_256:
11840; CHECK:       # %bb.0:
11841; CHECK-NEXT:    vcvtpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0]
11842; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11843; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11844  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
11845  ret <4 x i32> %res
11846}
11847
11848define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
11849; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
11850; X86:       # %bb.0:
11851; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11852; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11853; X86-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
11854; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11855; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11856; X86-NEXT:    retl # encoding: [0xc3]
11857;
11858; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
11859; X64:       # %bb.0:
11860; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11861; X64-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
11862; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11863; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11864; X64-NEXT:    retq # encoding: [0xc3]
11865  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
11866  ret <4 x i32> %res
11867}
11868
11869declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
11870
11871define <4 x float>@test_int_x86_avx512_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1) {
11872; CHECK-LABEL: test_int_x86_avx512_cvt_pd2ps_256:
11873; CHECK:       # %bb.0:
11874; CHECK-NEXT:    vcvtpd2ps %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0]
11875; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11876; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11877  %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
11878  ret <4 x float> %res
11879}
11880
11881define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
11882; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
11883; X86:       # %bb.0:
11884; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11885; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11886; X86-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
11887; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11888; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11889; X86-NEXT:    retl # encoding: [0xc3]
11890;
11891; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
11892; X64:       # %bb.0:
11893; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11894; X64-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
11895; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11896; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11897; X64-NEXT:    retq # encoding: [0xc3]
11898  %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
11899  ret <4 x float> %res
11900}
11901
11902declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
11903
11904define <4 x double>@test_int_x86_avx512_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1) {
11905; CHECK-LABEL: test_int_x86_avx512_cvt_ps2pd_256:
11906; CHECK:       # %bb.0:
11907; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
11908; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11909  %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
11910  ret <4 x double> %res
11911}
11912
11913define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
11914; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
11915; X86:       # %bb.0:
11916; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11917; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11918; X86-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
11919; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
11920; X86-NEXT:    retl # encoding: [0xc3]
11921;
11922; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
11923; X64:       # %bb.0:
11924; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11925; X64-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
11926; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
11927; X64-NEXT:    retq # encoding: [0xc3]
11928  %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
11929  ret <4 x double> %res
11930}
11931
11932declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
11933
11934define <2 x double>@test_int_x86_avx512_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1) {
11935; CHECK-LABEL: test_int_x86_avx512_cvt_ps2pd_128:
11936; CHECK:       # %bb.0:
11937; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
11938; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11939  %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
11940  ret <2 x double> %res
11941}
11942
11943define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
11944; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
11945; X86:       # %bb.0:
11946; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11947; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11948; X86-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
11949; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11950; X86-NEXT:    retl # encoding: [0xc3]
11951;
11952; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
11953; X64:       # %bb.0:
11954; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11955; X64-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
11956; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11957; X64-NEXT:    retq # encoding: [0xc3]
11958  %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
11959  ret <2 x double> %res
11960}
11961
11962declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
11963
11964define <4 x i32>@test_int_x86_avx512_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1) {
11965; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2dq_256:
11966; CHECK:       # %bb.0:
11967; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0]
11968; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11969; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
11970  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
11971  ret <4 x i32> %res
11972}
11973
11974define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
11975; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
11976; X86:       # %bb.0:
11977; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
11978; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
11979; X86-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
11980; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11981; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11982; X86-NEXT:    retl # encoding: [0xc3]
11983;
11984; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
11985; X64:       # %bb.0:
11986; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
11987; X64-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
11988; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
11989; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
11990; X64-NEXT:    retq # encoding: [0xc3]
11991  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
11992  ret <4 x i32> %res
11993}
11994
11995declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
11996
11997define <4 x i32>@test_int_x86_avx512_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1) {
11998; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2dq_128:
11999; CHECK:       # %bb.0:
12000; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0]
12001; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12002  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
12003  ret <4 x i32> %res
12004}
12005
12006define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
12007; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
12008; X86:       # %bb.0:
12009; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12010; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12011; X86-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
12012; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12013; X86-NEXT:    retl # encoding: [0xc3]
12014;
12015; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
12016; X64:       # %bb.0:
12017; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12018; X64-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
12019; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12020; X64-NEXT:    retq # encoding: [0xc3]
12021  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
12022  ret <4 x i32> %res
12023}
12024
12025declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
12026
12027define <8 x i32>@test_int_x86_avx512_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1) {
12028; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2dq_256:
12029; CHECK:       # %bb.0:
12030; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0]
12031; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12032  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
12033  ret <8 x i32> %res
12034}
12035
12036define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
12037; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
12038; X86:       # %bb.0:
12039; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12040; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12041; X86-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
12042; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
12043; X86-NEXT:    retl # encoding: [0xc3]
12044;
12045; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
12046; X64:       # %bb.0:
12047; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12048; X64-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
12049; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
12050; X64-NEXT:    retq # encoding: [0xc3]
12051  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
12052  ret <8 x i32> %res
12053}
12054
12055declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
12056
12057define <8 x float>@test_int_x86_avx512_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
12058; CHECK-LABEL: test_int_x86_avx512_permvar_sf_256:
12059; CHECK:       # %bb.0:
12060; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
12061; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12062  %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
12063  ret <8 x float> %res
12064}
12065
12066define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
12067; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
12068; X86:       # %bb.0:
12069; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12070; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12071; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
12072; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
12073; X86-NEXT:    retl # encoding: [0xc3]
12074;
12075; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
12076; X64:       # %bb.0:
12077; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12078; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
12079; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
12080; X64-NEXT:    retq # encoding: [0xc3]
12081  %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
12082  ret <8 x float> %res
12083}
12084
12085define <8 x float>@test_int_x86_avx512_maskz_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, i8 %x3) {
12086; X86-LABEL: test_int_x86_avx512_maskz_permvar_sf_256:
12087; X86:       # %bb.0:
12088; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12089; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12090; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
12091; X86-NEXT:    retl # encoding: [0xc3]
12092;
12093; X64-LABEL: test_int_x86_avx512_maskz_permvar_sf_256:
12094; X64:       # %bb.0:
12095; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12096; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
12097; X64-NEXT:    retq # encoding: [0xc3]
12098  %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
12099  ret <8 x float> %res
12100}
12101
12102declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12103
12104define <8 x i32>@test_int_x86_avx512_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
12105; CHECK-LABEL: test_int_x86_avx512_permvar_si_256:
12106; CHECK:       # %bb.0:
12107; CHECK-NEXT:    vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
12108; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12109  %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12110  ret <8 x i32> %res
12111}
12112
12113define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12114; X86-LABEL: test_int_x86_avx512_mask_permvar_si_256:
12115; X86:       # %bb.0:
12116; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12117; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12118; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
12119; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
12120; X86-NEXT:    retl # encoding: [0xc3]
12121;
12122; X64-LABEL: test_int_x86_avx512_mask_permvar_si_256:
12123; X64:       # %bb.0:
12124; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12125; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
12126; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
12127; X64-NEXT:    retq # encoding: [0xc3]
12128  %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12129  ret <8 x i32> %res
12130}
12131
12132define <8 x i32>@test_int_x86_avx512_maskz_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
12133; X86-LABEL: test_int_x86_avx512_maskz_permvar_si_256:
12134; X86:       # %bb.0:
12135; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12136; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12137; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
12138; X86-NEXT:    retl # encoding: [0xc3]
12139;
12140; X64-LABEL: test_int_x86_avx512_maskz_permvar_si_256:
12141; X64:       # %bb.0:
12142; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12143; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
12144; X64-NEXT:    retq # encoding: [0xc3]
12145  %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
12146  ret <8 x i32> %res
12147}
12148
12149declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
12150
12151define <4 x double>@test_int_x86_avx512_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
12152; CHECK-LABEL: test_int_x86_avx512_permvar_df_256:
12153; CHECK:       # %bb.0:
12154; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
12155; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12156  %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
12157  ret <4 x double> %res
12158}
12159
12160define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
12161; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
12162; X86:       # %bb.0:
12163; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12164; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12165; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
12166; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
12167; X86-NEXT:    retl # encoding: [0xc3]
12168;
12169; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
12170; X64:       # %bb.0:
12171; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12172; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
12173; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
12174; X64-NEXT:    retq # encoding: [0xc3]
12175  %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
12176  ret <4 x double> %res
12177}
12178
12179define <4 x double>@test_int_x86_avx512_maskz_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, i8 %x3) {
12180; X86-LABEL: test_int_x86_avx512_maskz_permvar_df_256:
12181; X86:       # %bb.0:
12182; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12183; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12184; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
12185; X86-NEXT:    retl # encoding: [0xc3]
12186;
12187; X64-LABEL: test_int_x86_avx512_maskz_permvar_df_256:
12188; X64:       # %bb.0:
12189; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12190; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
12191; X64-NEXT:    retq # encoding: [0xc3]
12192  %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
12193  ret <4 x double> %res
12194}
12195
12196declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12197
12198define <4 x i64>@test_int_x86_avx512_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
12199; CHECK-LABEL: test_int_x86_avx512_permvar_di_256:
12200; CHECK:       # %bb.0:
12201; CHECK-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xc0]
12202; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12203  %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12204  ret <4 x i64> %res
12205}
12206
12207define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12208; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
12209; X86:       # %bb.0:
12210; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12211; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12212; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
12213; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
12214; X86-NEXT:    retl # encoding: [0xc3]
12215;
12216; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
12217; X64:       # %bb.0:
12218; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12219; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
12220; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
12221; X64-NEXT:    retq # encoding: [0xc3]
12222  %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12223  ret <4 x i64> %res
12224}
12225
12226define <4 x i64>@test_int_x86_avx512_maskz_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
12227; X86-LABEL: test_int_x86_avx512_maskz_permvar_di_256:
12228; X86:       # %bb.0:
12229; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12230; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12231; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
12232; X86-NEXT:    retl # encoding: [0xc3]
12233;
12234; X64-LABEL: test_int_x86_avx512_maskz_permvar_di_256:
12235; X64:       # %bb.0:
12236; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12237; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
12238; X64-NEXT:    retq # encoding: [0xc3]
12239  %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
12240  ret <4 x i64> %res
12241}
12242
12243declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
12244
12245define <4 x i32>@test_int_x86_avx512_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
12246; CHECK-LABEL: test_int_x86_avx512_pternlog_d_128:
12247; CHECK:       # %bb.0:
12248; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xc2,0x21]
12249; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12250  %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
12251  ret <4 x i32> %res
12252}
12253
12254define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
12255; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
12256; X86:       # %bb.0:
12257; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12258; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12259; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
12260; X86-NEXT:    retl # encoding: [0xc3]
12261;
12262; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
12263; X64:       # %bb.0:
12264; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12265; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
12266; X64-NEXT:    retq # encoding: [0xc3]
12267  %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
12268  ret <4 x i32> %res
12269}
12270
12271declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
12272
12273define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
12274; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
12275; X86:       # %bb.0:
12276; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12277; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12278; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
12279; X86-NEXT:    retl # encoding: [0xc3]
12280;
12281; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
12282; X64:       # %bb.0:
12283; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12284; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
12285; X64-NEXT:    retq # encoding: [0xc3]
12286  %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
12287  ret <4 x i32> %res
12288}
12289
12290declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
12291
12292define <8 x i32>@test_int_x86_avx512_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
12293; CHECK-LABEL: test_int_x86_avx512_pternlog_d_256:
12294; CHECK:       # %bb.0:
12295; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xc2,0x21]
12296; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12297  %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
12298  ret <8 x i32> %res
12299}
12300
12301define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
12302; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
12303; X86:       # %bb.0:
12304; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12305; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12306; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
12307; X86-NEXT:    retl # encoding: [0xc3]
12308;
12309; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
12310; X64:       # %bb.0:
12311; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12312; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
12313; X64-NEXT:    retq # encoding: [0xc3]
12314  %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
12315  ret <8 x i32> %res
12316}
12317
12318declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
12319
12320define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
12321; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
12322; X86:       # %bb.0:
12323; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12324; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12325; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
12326; X86-NEXT:    retl # encoding: [0xc3]
12327;
12328; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
12329; X64:       # %bb.0:
12330; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12331; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
12332; X64-NEXT:    retq # encoding: [0xc3]
12333  %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
12334  ret <8 x i32> %res
12335}
12336
12337declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
12338
12339define <2 x i64>@test_int_x86_avx512_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
12340; CHECK-LABEL: test_int_x86_avx512_pternlog_q_128:
12341; CHECK:       # %bb.0:
12342; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xc2,0x21]
12343; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12344  %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
12345  ret <2 x i64> %res
12346}
12347
12348define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
12349; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
12350; X86:       # %bb.0:
12351; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12352; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12353; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
12354; X86-NEXT:    retl # encoding: [0xc3]
12355;
12356; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
12357; X64:       # %bb.0:
12358; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12359; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
12360; X64-NEXT:    retq # encoding: [0xc3]
12361  %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
12362  ret <2 x i64> %res
12363}
12364
12365declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
12366
12367define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
12368; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
12369; X86:       # %bb.0:
12370; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12371; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12372; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
12373; X86-NEXT:    retl # encoding: [0xc3]
12374;
12375; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
12376; X64:       # %bb.0:
12377; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12378; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
12379; X64-NEXT:    retq # encoding: [0xc3]
12380  %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
12381  ret <2 x i64> %res
12382}
12383
12384declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
12385
12386define <4 x i64>@test_int_x86_avx512_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
12387; CHECK-LABEL: test_int_x86_avx512_pternlog_q_256:
12388; CHECK:       # %bb.0:
12389; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xc2,0x21]
12390; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12391  %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
12392  ret <4 x i64> %res
12393}
12394
12395define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
12396; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
12397; X86:       # %bb.0:
12398; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12399; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12400; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
12401; X86-NEXT:    retl # encoding: [0xc3]
12402;
12403; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
12404; X64:       # %bb.0:
12405; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12406; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
12407; X64-NEXT:    retq # encoding: [0xc3]
12408  %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
12409  ret <4 x i64> %res
12410}
12411
12412declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
12413
12414define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
12415; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
12416; X86:       # %bb.0:
12417; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12418; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12419; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
12420; X86-NEXT:    retl # encoding: [0xc3]
12421;
12422; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
12423; X64:       # %bb.0:
12424; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12425; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
12426; X64-NEXT:    retq # encoding: [0xc3]
12427  %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
12428  ret <4 x i64> %res
12429}
12430
12431declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
12432
12433define <4 x float>@test_int_x86_avx512_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1) {
12434; CHECK-LABEL: test_int_x86_avx512_cvt_udq2ps_128:
12435; CHECK:       # %bb.0:
12436; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xc0]
12437; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12438  %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
12439  ret <4 x float> %res
12440}
12441
12442define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
12443; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
12444; X86:       # %bb.0:
12445; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12446; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12447; X86-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
12448; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12449; X86-NEXT:    retl # encoding: [0xc3]
12450;
12451; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
12452; X64:       # %bb.0:
12453; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12454; X64-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
12455; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12456; X64-NEXT:    retq # encoding: [0xc3]
12457  %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
12458  ret <4 x float> %res
12459}
12460
12461declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
12462
12463define <8 x float>@test_int_x86_avx512_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1) {
12464; CHECK-LABEL: test_int_x86_avx512_cvt_udq2ps_256:
12465; CHECK:       # %bb.0:
12466; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xc0]
12467; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12468  %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
12469  ret <8 x float> %res
12470}
12471
12472define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
12473; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
12474; X86:       # %bb.0:
12475; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12476; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12477; X86-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
12478; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
12479; X86-NEXT:    retl # encoding: [0xc3]
12480;
12481; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
12482; X64:       # %bb.0:
12483; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12484; X64-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
12485; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
12486; X64-NEXT:    retq # encoding: [0xc3]
12487  %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
12488  ret <8 x float> %res
12489}
12490
12491declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12492
12493define <4 x i32>@test_int_x86_avx512_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
12494; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_128:
12495; CHECK:       # %bb.0:
12496; CHECK-NEXT:    vpermt2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xc2]
12497; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12498  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
12499  ret <4 x i32> %res
12500}
12501
12502define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12503; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
12504; X86:       # %bb.0:
12505; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12506; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12507; X86-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
12508; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12509; X86-NEXT:    retl # encoding: [0xc3]
12510;
12511; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
12512; X64:       # %bb.0:
12513; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12514; X64-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
12515; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12516; X64-NEXT:    retq # encoding: [0xc3]
12517  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12518  ret <4 x i32> %res
12519}
12520
12521declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12522
12523define <4 x i32>@test_int_x86_avx512_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
12524; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_128:
12525; CHECK:       # %bb.0:
12526; CHECK-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x76,0xc2]
12527; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12528  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
12529  ret <4 x i32> %res
12530}
12531
12532define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12533; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
12534; X86:       # %bb.0:
12535; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12536; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12537; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
12538; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12539; X86-NEXT:    retl # encoding: [0xc3]
12540;
12541; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
12542; X64:       # %bb.0:
12543; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12544; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
12545; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12546; X64-NEXT:    retq # encoding: [0xc3]
12547  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12548  ret <4 x i32> %res
12549}
12550
12551declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
12552
12553define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
12554; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
12555; X86:       # %bb.0:
12556; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12557; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12558; X86-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2]
12559; X86-NEXT:    retl # encoding: [0xc3]
12560;
12561; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
12562; X64:       # %bb.0:
12563; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12564; X64-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x76,0xc2]
12565; X64-NEXT:    retq # encoding: [0xc3]
12566  %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
12567  ret <4 x i32> %res
12568}
12569
12570declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12571
12572define <8 x i32>@test_int_x86_avx512_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12573; CHECK-LABEL: test_int_x86_avx512_vpermi2var_d_256:
12574; CHECK:       # %bb.0:
12575; CHECK-NEXT:    vpermt2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xc2]
12576; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12577  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12578  ret <8 x i32> %res
12579}
12580
12581define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12582; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
12583; X86:       # %bb.0:
12584; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12585; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12586; X86-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
12587; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12588; X86-NEXT:    retl # encoding: [0xc3]
12589;
12590; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
12591; X64:       # %bb.0:
12592; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12593; X64-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
12594; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12595; X64-NEXT:    retq # encoding: [0xc3]
12596  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12597  ret <8 x i32> %res
12598}
12599
12600declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12601
12602define <8 x i32>@test_int_x86_avx512_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
12603; CHECK-LABEL: test_int_x86_avx512_vpermt2var_d_256:
12604; CHECK:       # %bb.0:
12605; CHECK-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x76,0xc2]
12606; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12607  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
12608  ret <8 x i32> %res
12609}
12610
12611define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12612; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
12613; X86:       # %bb.0:
12614; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12615; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12616; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
12617; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12618; X86-NEXT:    retl # encoding: [0xc3]
12619;
12620; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
12621; X64:       # %bb.0:
12622; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12623; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
12624; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12625; X64-NEXT:    retq # encoding: [0xc3]
12626  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12627  ret <8 x i32> %res
12628}
12629
12630declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
12631
12632define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
12633; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
12634; X86:       # %bb.0:
12635; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12636; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12637; X86-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2]
12638; X86-NEXT:    retl # encoding: [0xc3]
12639;
12640; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
12641; X64:       # %bb.0:
12642; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12643; X64-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x76,0xc2]
12644; X64-NEXT:    retq # encoding: [0xc3]
12645  %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
12646  ret <8 x i32> %res
12647}
12648
12649declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
12650
12651define <2 x double>@test_int_x86_avx512_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) {
12652; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_128:
12653; CHECK:       # %bb.0:
12654; CHECK-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xc2]
12655; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12656  %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
12657  ret <2 x double> %res
12658}
12659
12660define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
12661; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
12662; X86:       # %bb.0:
12663; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12664; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12665; X86-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
12666; X86-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
12667; X86-NEXT:    retl # encoding: [0xc3]
12668;
12669; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
12670; X64:       # %bb.0:
12671; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12672; X64-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
12673; X64-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
12674; X64-NEXT:    retq # encoding: [0xc3]
12675  %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
12676  ret <2 x double> %res
12677}
12678
12679declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
12680
12681define <4 x double>@test_int_x86_avx512_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
12682; CHECK-LABEL: test_int_x86_avx512_vpermi2var_pd_256:
12683; CHECK:       # %bb.0:
12684; CHECK-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xc2]
12685; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12686  %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
12687  ret <4 x double> %res
12688}
12689
12690define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
12691; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
12692; X86:       # %bb.0:
12693; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12694; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12695; X86-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
12696; X86-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
12697; X86-NEXT:    retl # encoding: [0xc3]
12698;
12699; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
12700; X64:       # %bb.0:
12701; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12702; X64-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
12703; X64-NEXT:    vmovapd %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1]
12704; X64-NEXT:    retq # encoding: [0xc3]
12705  %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
12706  ret <4 x double> %res
12707}
12708
12709declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
12710
12711define <4 x float>@test_int_x86_avx512_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) {
12712; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_128:
12713; CHECK:       # %bb.0:
12714; CHECK-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xc2]
12715; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12716  %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
12717  ret <4 x float> %res
12718}
12719
12720define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
12721; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
12722; X86:       # %bb.0:
12723; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12724; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12725; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
12726; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12727; X86-NEXT:    retl # encoding: [0xc3]
12728;
12729; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
12730; X64:       # %bb.0:
12731; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12732; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
12733; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12734; X64-NEXT:    retq # encoding: [0xc3]
12735  %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
12736  ret <4 x float> %res
12737}
12738
12739define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
12740; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
12741; X86:       # %bb.0:
12742; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12743; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12744; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
12745; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12746; X86-NEXT:    retl # encoding: [0xc3]
12747;
12748; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
12749; X64:       # %bb.0:
12750; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12751; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
12752; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
12753; X64-NEXT:    retq # encoding: [0xc3]
12754  %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
12755  %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
12756  ret <4 x float> %res
12757}
12758
12759declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
12760
12761define <8 x float>@test_int_x86_avx512_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) {
12762; CHECK-LABEL: test_int_x86_avx512_vpermi2var_ps_256:
12763; CHECK:       # %bb.0:
12764; CHECK-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xc2]
12765; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12766  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
12767  ret <8 x float> %res
12768}
12769
12770define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
12771; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
12772; X86:       # %bb.0:
12773; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12774; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12775; X86-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
12776; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
12777; X86-NEXT:    retl # encoding: [0xc3]
12778;
12779; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
12780; X64:       # %bb.0:
12781; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12782; X64-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
12783; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
12784; X64-NEXT:    retq # encoding: [0xc3]
12785  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
12786  ret <8 x float> %res
12787}
12788
12789declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12790
12791define <2 x i64>@test_int_x86_avx512_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
12792; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_128:
12793; CHECK:       # %bb.0:
12794; CHECK-NEXT:    vpermt2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xc2]
12795; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12796  %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
12797  ret <2 x i64> %res
12798}
12799
12800define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12801; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
12802; X86:       # %bb.0:
12803; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12804; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12805; X86-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
12806; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12807; X86-NEXT:    retl # encoding: [0xc3]
12808;
12809; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
12810; X64:       # %bb.0:
12811; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12812; X64-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
12813; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12814; X64-NEXT:    retq # encoding: [0xc3]
12815  %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12816  ret <2 x i64> %res
12817}
12818
12819declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12820
12821define <2 x i64>@test_int_x86_avx512_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
12822; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_128:
12823; CHECK:       # %bb.0:
12824; CHECK-NEXT:    vpermi2q %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0xf5,0x08,0x76,0xc2]
12825; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12826  %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
12827  ret <2 x i64> %res
12828}
12829
12830define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12831; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
12832; X86:       # %bb.0:
12833; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12834; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12835; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
12836; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12837; X86-NEXT:    retl # encoding: [0xc3]
12838;
12839; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
12840; X64:       # %bb.0:
12841; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12842; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
12843; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
12844; X64-NEXT:    retq # encoding: [0xc3]
12845  %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12846  ret <2 x i64> %res
12847}
12848
12849declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
12850
12851define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
12852; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
12853; X86:       # %bb.0:
12854; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12855; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12856; X86-NEXT:    vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2]
12857; X86-NEXT:    retl # encoding: [0xc3]
12858;
12859; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
12860; X64:       # %bb.0:
12861; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12862; X64-NEXT:    vpermi2q %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x76,0xc2]
12863; X64-NEXT:    retq # encoding: [0xc3]
12864  %res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
12865  ret <2 x i64> %res
12866}
12867
12868declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12869
12870define <4 x i64>@test_int_x86_avx512_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
12871; CHECK-LABEL: test_int_x86_avx512_vpermi2var_q_256:
12872; CHECK:       # %bb.0:
12873; CHECK-NEXT:    vpermt2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xc2]
12874; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12875  %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12876  ret <4 x i64> %res
12877}
12878
12879define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12880; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
12881; X86:       # %bb.0:
12882; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12883; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12884; X86-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
12885; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12886; X86-NEXT:    retl # encoding: [0xc3]
12887;
12888; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
12889; X64:       # %bb.0:
12890; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12891; X64-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
12892; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12893; X64-NEXT:    retq # encoding: [0xc3]
12894  %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12895  ret <4 x i64> %res
12896}
12897
12898declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12899
12900define <4 x i64>@test_int_x86_avx512_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
12901; CHECK-LABEL: test_int_x86_avx512_vpermt2var_q_256:
12902; CHECK:       # %bb.0:
12903; CHECK-NEXT:    vpermi2q %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0xf5,0x28,0x76,0xc2]
12904; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12905  %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
12906  ret <4 x i64> %res
12907}
12908
12909define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12910; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
12911; X86:       # %bb.0:
12912; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12913; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12914; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
12915; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12916; X86-NEXT:    retl # encoding: [0xc3]
12917;
12918; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
12919; X64:       # %bb.0:
12920; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12921; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
12922; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
12923; X64-NEXT:    retq # encoding: [0xc3]
12924  %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12925  ret <4 x i64> %res
12926}
12927
12928declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
12929
12930define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
12931; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
12932; X86:       # %bb.0:
12933; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
12934; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
12935; X86-NEXT:    vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2]
12936; X86-NEXT:    retl # encoding: [0xc3]
12937;
12938; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
12939; X64:       # %bb.0:
12940; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
12941; X64-NEXT:    vpermi2q %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x76,0xc2]
12942; X64-NEXT:    retq # encoding: [0xc3]
12943  %res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
12944  ret <4 x i64> %res
12945}
12946
12947define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
12948; X86-LABEL: test_mask_compress_store_pd_128:
12949; X86:       # %bb.0:
12950; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12951; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
12952; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
12953; X86-NEXT:    vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
12954; X86-NEXT:    retl # encoding: [0xc3]
12955;
12956; X64-LABEL: test_mask_compress_store_pd_128:
12957; X64:       # %bb.0:
12958; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12959; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
12960; X64-NEXT:    retq # encoding: [0xc3]
12961  call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
12962  ret void
12963}
12964
12965declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
12966
12967define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
12968; X86-LABEL: test_compress_store_pd_128:
12969; X86:       # %bb.0:
12970; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12971; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12972; X86-NEXT:    vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
12973; X86-NEXT:    retl # encoding: [0xc3]
12974;
12975; X64-LABEL: test_compress_store_pd_128:
12976; X64:       # %bb.0:
12977; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
12978; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
12979; X64-NEXT:    retq # encoding: [0xc3]
12980  call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
12981  ret void
12982}
12983
12984define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
12985; X86-LABEL: test_mask_compress_store_ps_128:
12986; X86:       # %bb.0:
12987; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
12988; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
12989; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
12990; X86-NEXT:    vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
12991; X86-NEXT:    retl # encoding: [0xc3]
12992;
12993; X64-LABEL: test_mask_compress_store_ps_128:
12994; X64:       # %bb.0:
12995; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
12996; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
12997; X64-NEXT:    retq # encoding: [0xc3]
12998  call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
12999  ret void
13000}
13001
13002declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
13003
13004define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
13005; X86-LABEL: test_compress_store_ps_128:
13006; X86:       # %bb.0:
13007; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13008; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13009; X86-NEXT:    vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
13010; X86-NEXT:    retl # encoding: [0xc3]
13011;
13012; X64-LABEL: test_compress_store_ps_128:
13013; X64:       # %bb.0:
13014; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13015; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
13016; X64-NEXT:    retq # encoding: [0xc3]
13017  call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
13018  ret void
13019}
13020
13021define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
13022; X86-LABEL: test_mask_compress_store_q_128:
13023; X86:       # %bb.0:
13024; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13025; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13026; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13027; X86-NEXT:    vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
13028; X86-NEXT:    retl # encoding: [0xc3]
13029;
13030; X64-LABEL: test_mask_compress_store_q_128:
13031; X64:       # %bb.0:
13032; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13033; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
13034; X64-NEXT:    retq # encoding: [0xc3]
13035  call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
13036  ret void
13037}
13038
13039declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
13040
13041define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
13042; X86-LABEL: test_compress_store_q_128:
13043; X86:       # %bb.0:
13044; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13045; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13046; X86-NEXT:    vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
13047; X86-NEXT:    retl # encoding: [0xc3]
13048;
13049; X64-LABEL: test_compress_store_q_128:
13050; X64:       # %bb.0:
13051; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13052; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
13053; X64-NEXT:    retq # encoding: [0xc3]
13054  call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
13055  ret void
13056}
13057
13058define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
13059; X86-LABEL: test_mask_compress_store_d_128:
13060; X86:       # %bb.0:
13061; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13062; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13063; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13064; X86-NEXT:    vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
13065; X86-NEXT:    retl # encoding: [0xc3]
13066;
13067; X64-LABEL: test_mask_compress_store_d_128:
13068; X64:       # %bb.0:
13069; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13070; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
13071; X64-NEXT:    retq # encoding: [0xc3]
13072  call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
13073  ret void
13074}
13075
13076declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
13077
13078define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
13079; X86-LABEL: test_compress_store_d_128:
13080; X86:       # %bb.0:
13081; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13082; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13083; X86-NEXT:    vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
13084; X86-NEXT:    retl # encoding: [0xc3]
13085;
13086; X64-LABEL: test_compress_store_d_128:
13087; X64:       # %bb.0:
13088; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13089; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
13090; X64-NEXT:    retq # encoding: [0xc3]
13091  call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
13092  ret void
13093}
13094
13095define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
13096; X86-LABEL: test_mask_expand_load_pd_128:
13097; X86:       # %bb.0:
13098; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13099; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13100; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13101; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
13102; X86-NEXT:    retl # encoding: [0xc3]
13103;
13104; X64-LABEL: test_mask_expand_load_pd_128:
13105; X64:       # %bb.0:
13106; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13107; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
13108; X64-NEXT:    retq # encoding: [0xc3]
13109  %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
13110  ret <2 x double> %res
13111}
13112
13113define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
13114; X86-LABEL: test_maskz_expand_load_pd_128:
13115; X86:       # %bb.0:
13116; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13117; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13118; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13119; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00]
13120; X86-NEXT:    retl # encoding: [0xc3]
13121;
13122; X64-LABEL: test_maskz_expand_load_pd_128:
13123; X64:       # %bb.0:
13124; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13125; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07]
13126; X64-NEXT:    retq # encoding: [0xc3]
13127  %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
13128  ret <2 x double> %res
13129}
13130
13131declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
13132
13133define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
13134; X86-LABEL: test_expand_load_pd_128:
13135; X86:       # %bb.0:
13136; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13137; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13138; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
13139; X86-NEXT:    retl # encoding: [0xc3]
13140;
13141; X64-LABEL: test_expand_load_pd_128:
13142; X64:       # %bb.0:
13143; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13144; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
13145; X64-NEXT:    retq # encoding: [0xc3]
13146  %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
13147  ret <2 x double> %res
13148}
13149
13150define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
13151; X86-LABEL: test_mask_expand_load_ps_128:
13152; X86:       # %bb.0:
13153; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13154; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13155; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13156; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
13157; X86-NEXT:    retl # encoding: [0xc3]
13158;
13159; X64-LABEL: test_mask_expand_load_ps_128:
13160; X64:       # %bb.0:
13161; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13162; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
13163; X64-NEXT:    retq # encoding: [0xc3]
13164  %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
13165  ret <4 x float> %res
13166}
13167
13168define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
13169; X86-LABEL: test_maskz_expand_load_ps_128:
13170; X86:       # %bb.0:
13171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13172; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13173; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13174; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00]
13175; X86-NEXT:    retl # encoding: [0xc3]
13176;
13177; X64-LABEL: test_maskz_expand_load_ps_128:
13178; X64:       # %bb.0:
13179; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13180; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07]
13181; X64-NEXT:    retq # encoding: [0xc3]
13182  %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
13183  ret <4 x float> %res
13184}
13185
13186declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
13187
13188define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
13189; X86-LABEL: test_expand_load_ps_128:
13190; X86:       # %bb.0:
13191; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13192; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13193; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
13194; X86-NEXT:    retl # encoding: [0xc3]
13195;
13196; X64-LABEL: test_expand_load_ps_128:
13197; X64:       # %bb.0:
13198; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13199; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
13200; X64-NEXT:    retq # encoding: [0xc3]
13201  %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
13202  ret <4 x float> %res
13203}
13204
13205define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
13206; X86-LABEL: test_mask_expand_load_q_128:
13207; X86:       # %bb.0:
13208; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13209; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13210; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13211; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
13212; X86-NEXT:    retl # encoding: [0xc3]
13213;
13214; X64-LABEL: test_mask_expand_load_q_128:
13215; X64:       # %bb.0:
13216; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13217; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
13218; X64-NEXT:    retq # encoding: [0xc3]
13219  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
13220  ret <2 x i64> %res
13221}
13222
13223define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
13224; X86-LABEL: test_maskz_expand_load_q_128:
13225; X86:       # %bb.0:
13226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13227; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13228; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13229; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00]
13230; X86-NEXT:    retl # encoding: [0xc3]
13231;
13232; X64-LABEL: test_maskz_expand_load_q_128:
13233; X64:       # %bb.0:
13234; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13235; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07]
13236; X64-NEXT:    retq # encoding: [0xc3]
13237  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
13238  ret <2 x i64> %res
13239}
13240
13241declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
13242
13243define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
13244; X86-LABEL: test_expand_load_q_128:
13245; X86:       # %bb.0:
13246; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13247; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13248; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
13249; X86-NEXT:    retl # encoding: [0xc3]
13250;
13251; X64-LABEL: test_expand_load_q_128:
13252; X64:       # %bb.0:
13253; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13254; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
13255; X64-NEXT:    retq # encoding: [0xc3]
13256  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
13257  ret <2 x i64> %res
13258}
13259
13260define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
13261; X86-LABEL: test_mask_expand_load_d_128:
13262; X86:       # %bb.0:
13263; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13264; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13265; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13266; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
13267; X86-NEXT:    retl # encoding: [0xc3]
13268;
13269; X64-LABEL: test_mask_expand_load_d_128:
13270; X64:       # %bb.0:
13271; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13272; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
13273; X64-NEXT:    retq # encoding: [0xc3]
13274  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
13275  ret <4 x i32> %res
13276}
13277
13278define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
13279; X86-LABEL: test_maskz_expand_load_d_128:
13280; X86:       # %bb.0:
13281; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13282; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13283; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13284; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00]
13285; X86-NEXT:    retl # encoding: [0xc3]
13286;
13287; X64-LABEL: test_maskz_expand_load_d_128:
13288; X64:       # %bb.0:
13289; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13290; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07]
13291; X64-NEXT:    retq # encoding: [0xc3]
13292  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
13293  ret <4 x i32> %res
13294}
13295
13296declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
13297
13298define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
13299; X86-LABEL: test_expand_load_d_128:
13300; X86:       # %bb.0:
13301; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13302; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13303; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
13304; X86-NEXT:    retl # encoding: [0xc3]
13305;
13306; X64-LABEL: test_expand_load_d_128:
13307; X64:       # %bb.0:
13308; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13309; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
13310; X64-NEXT:    retq # encoding: [0xc3]
13311  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
13312  ret <4 x i32> %res
13313}
13314
13315define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
13316; X86-LABEL: test_mask_compress_store_pd_256:
13317; X86:       # %bb.0:
13318; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13319; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13320; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13321; X86-NEXT:    vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
13322; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13323; X86-NEXT:    retl # encoding: [0xc3]
13324;
13325; X64-LABEL: test_mask_compress_store_pd_256:
13326; X64:       # %bb.0:
13327; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13328; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
13329; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13330; X64-NEXT:    retq # encoding: [0xc3]
13331  call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
13332  ret void
13333}
13334
13335declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
13336
13337define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
13338; X86-LABEL: test_compress_store_pd_256:
13339; X86:       # %bb.0:
13340; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13341; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13342; X86-NEXT:    vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
13343; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13344; X86-NEXT:    retl # encoding: [0xc3]
13345;
13346; X64-LABEL: test_compress_store_pd_256:
13347; X64:       # %bb.0:
13348; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13349; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
13350; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13351; X64-NEXT:    retq # encoding: [0xc3]
13352  call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
13353  ret void
13354}
13355
13356define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
13357; X86-LABEL: test_mask_compress_store_ps_256:
13358; X86:       # %bb.0:
13359; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13360; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13361; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13362; X86-NEXT:    vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
13363; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13364; X86-NEXT:    retl # encoding: [0xc3]
13365;
13366; X64-LABEL: test_mask_compress_store_ps_256:
13367; X64:       # %bb.0:
13368; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13369; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
13370; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13371; X64-NEXT:    retq # encoding: [0xc3]
13372  call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
13373  ret void
13374}
13375
13376declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
13377
13378define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
13379; X86-LABEL: test_compress_store_ps_256:
13380; X86:       # %bb.0:
13381; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13382; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13383; X86-NEXT:    vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
13384; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13385; X86-NEXT:    retl # encoding: [0xc3]
13386;
13387; X64-LABEL: test_compress_store_ps_256:
13388; X64:       # %bb.0:
13389; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13390; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
13391; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13392; X64-NEXT:    retq # encoding: [0xc3]
13393  call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
13394  ret void
13395}
13396
13397define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
13398; X86-LABEL: test_mask_compress_store_q_256:
13399; X86:       # %bb.0:
13400; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13401; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13402; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13403; X86-NEXT:    vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
13404; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13405; X86-NEXT:    retl # encoding: [0xc3]
13406;
13407; X64-LABEL: test_mask_compress_store_q_256:
13408; X64:       # %bb.0:
13409; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13410; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
13411; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13412; X64-NEXT:    retq # encoding: [0xc3]
13413  call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
13414  ret void
13415}
13416
13417declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
13418
13419define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
13420; X86-LABEL: test_compress_store_q_256:
13421; X86:       # %bb.0:
13422; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13423; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13424; X86-NEXT:    vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
13425; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13426; X86-NEXT:    retl # encoding: [0xc3]
13427;
13428; X64-LABEL: test_compress_store_q_256:
13429; X64:       # %bb.0:
13430; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13431; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
13432; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13433; X64-NEXT:    retq # encoding: [0xc3]
13434  call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
13435  ret void
13436}
13437
13438define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
13439; X86-LABEL: test_mask_compress_store_d_256:
13440; X86:       # %bb.0:
13441; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13442; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13443; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13444; X86-NEXT:    vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
13445; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13446; X86-NEXT:    retl # encoding: [0xc3]
13447;
13448; X64-LABEL: test_mask_compress_store_d_256:
13449; X64:       # %bb.0:
13450; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13451; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
13452; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13453; X64-NEXT:    retq # encoding: [0xc3]
13454  call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
13455  ret void
13456}
13457
13458declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
13459
13460define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
13461; X86-LABEL: test_compress_store_d_256:
13462; X86:       # %bb.0:
13463; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13464; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13465; X86-NEXT:    vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
13466; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13467; X86-NEXT:    retl # encoding: [0xc3]
13468;
13469; X64-LABEL: test_compress_store_d_256:
13470; X64:       # %bb.0:
13471; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13472; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
13473; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
13474; X64-NEXT:    retq # encoding: [0xc3]
13475  call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
13476  ret void
13477}
13478
13479define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
13480; X86-LABEL: test_mask_expand_load_pd_256:
13481; X86:       # %bb.0:
13482; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13483; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13484; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13485; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
13486; X86-NEXT:    retl # encoding: [0xc3]
13487;
13488; X64-LABEL: test_mask_expand_load_pd_256:
13489; X64:       # %bb.0:
13490; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13491; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
13492; X64-NEXT:    retq # encoding: [0xc3]
13493  %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
13494  ret <4 x double> %res
13495}
13496
13497define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
13498; X86-LABEL: test_maskz_expand_load_pd_256:
13499; X86:       # %bb.0:
13500; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13501; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13502; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13503; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00]
13504; X86-NEXT:    retl # encoding: [0xc3]
13505;
13506; X64-LABEL: test_maskz_expand_load_pd_256:
13507; X64:       # %bb.0:
13508; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13509; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07]
13510; X64-NEXT:    retq # encoding: [0xc3]
13511  %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
13512  ret <4 x double> %res
13513}
13514
13515declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
13516
13517define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
13518; X86-LABEL: test_expand_load_pd_256:
13519; X86:       # %bb.0:
13520; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13521; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13522; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
13523; X86-NEXT:    retl # encoding: [0xc3]
13524;
13525; X64-LABEL: test_expand_load_pd_256:
13526; X64:       # %bb.0:
13527; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13528; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
13529; X64-NEXT:    retq # encoding: [0xc3]
13530  %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
13531  ret <4 x double> %res
13532}
13533
13534define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
13535; X86-LABEL: test_mask_expand_load_ps_256:
13536; X86:       # %bb.0:
13537; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13538; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13539; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13540; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
13541; X86-NEXT:    retl # encoding: [0xc3]
13542;
13543; X64-LABEL: test_mask_expand_load_ps_256:
13544; X64:       # %bb.0:
13545; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13546; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
13547; X64-NEXT:    retq # encoding: [0xc3]
13548  %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
13549  ret <8 x float> %res
13550}
13551
13552define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
13553; X86-LABEL: test_maskz_expand_load_ps_256:
13554; X86:       # %bb.0:
13555; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13556; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13557; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13558; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00]
13559; X86-NEXT:    retl # encoding: [0xc3]
13560;
13561; X64-LABEL: test_maskz_expand_load_ps_256:
13562; X64:       # %bb.0:
13563; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13564; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07]
13565; X64-NEXT:    retq # encoding: [0xc3]
13566  %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
13567  ret <8 x float> %res
13568}
13569
13570declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
13571
13572define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
13573; X86-LABEL: test_expand_load_ps_256:
13574; X86:       # %bb.0:
13575; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13576; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13577; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
13578; X86-NEXT:    retl # encoding: [0xc3]
13579;
13580; X64-LABEL: test_expand_load_ps_256:
13581; X64:       # %bb.0:
13582; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13583; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
13584; X64-NEXT:    retq # encoding: [0xc3]
13585  %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
13586  ret <8 x float> %res
13587}
13588
13589define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
13590; X86-LABEL: test_mask_expand_load_q_256:
13591; X86:       # %bb.0:
13592; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13593; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13594; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13595; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
13596; X86-NEXT:    retl # encoding: [0xc3]
13597;
13598; X64-LABEL: test_mask_expand_load_q_256:
13599; X64:       # %bb.0:
13600; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13601; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
13602; X64-NEXT:    retq # encoding: [0xc3]
13603  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
13604  ret <4 x i64> %res
13605}
13606
13607define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
13608; X86-LABEL: test_maskz_expand_load_q_256:
13609; X86:       # %bb.0:
13610; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13611; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13612; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13613; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00]
13614; X86-NEXT:    retl # encoding: [0xc3]
13615;
13616; X64-LABEL: test_maskz_expand_load_q_256:
13617; X64:       # %bb.0:
13618; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13619; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07]
13620; X64-NEXT:    retq # encoding: [0xc3]
13621  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
13622  ret <4 x i64> %res
13623}
13624
13625declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
13626
13627define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
13628; X86-LABEL: test_expand_load_q_256:
13629; X86:       # %bb.0:
13630; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13631; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13632; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
13633; X86-NEXT:    retl # encoding: [0xc3]
13634;
13635; X64-LABEL: test_expand_load_q_256:
13636; X64:       # %bb.0:
13637; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13638; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
13639; X64-NEXT:    retq # encoding: [0xc3]
13640  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
13641  ret <4 x i64> %res
13642}
13643
13644define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
13645; X86-LABEL: test_mask_expand_load_d_256:
13646; X86:       # %bb.0:
13647; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13648; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13649; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13650; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
13651; X86-NEXT:    retl # encoding: [0xc3]
13652;
13653; X64-LABEL: test_mask_expand_load_d_256:
13654; X64:       # %bb.0:
13655; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13656; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
13657; X64-NEXT:    retq # encoding: [0xc3]
13658  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
13659  ret <8 x i32> %res
13660}
13661
13662define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
13663; X86-LABEL: test_maskz_expand_load_d_256:
13664; X86:       # %bb.0:
13665; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13666; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
13667; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
13668; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00]
13669; X86-NEXT:    retl # encoding: [0xc3]
13670;
13671; X64-LABEL: test_maskz_expand_load_d_256:
13672; X64:       # %bb.0:
13673; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13674; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07]
13675; X64-NEXT:    retq # encoding: [0xc3]
13676  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
13677  ret <8 x i32> %res
13678}
13679
13680declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
13681
13682define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
13683; X86-LABEL: test_expand_load_d_256:
13684; X86:       # %bb.0:
13685; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
13686; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13687; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
13688; X86-NEXT:    retl # encoding: [0xc3]
13689;
13690; X64-LABEL: test_expand_load_d_256:
13691; X64:       # %bb.0:
13692; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
13693; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
13694; X64-NEXT:    retq # encoding: [0xc3]
13695  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
13696  ret <8 x i32> %res
13697}
13698
13699define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
13700; X86-LABEL: test_sqrt_pd_256:
13701; X86:       # %bb.0:
13702; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13703; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13704; X86-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
13705; X86-NEXT:    retl # encoding: [0xc3]
13706;
13707; X64-LABEL: test_sqrt_pd_256:
13708; X64:       # %bb.0:
13709; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13710; X64-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
13711; X64-NEXT:    retq # encoding: [0xc3]
13712  %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 %mask)
13713  ret <4 x double> %res
13714}
13715declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
13716
13717define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
13718; X86-LABEL: test_sqrt_ps_256:
13719; X86:       # %bb.0:
13720; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13721; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13722; X86-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
13723; X86-NEXT:    retl # encoding: [0xc3]
13724;
13725; X64-LABEL: test_sqrt_ps_256:
13726; X64:       # %bb.0:
13727; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13728; X64-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
13729; X64-NEXT:    retq # encoding: [0xc3]
13730  %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
13731  ret <8 x float> %res
13732}
13733
13734declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
13735
13736declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
13737
13738define <4 x i32>@test_int_x86_avx512_prorv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
13739; CHECK-LABEL: test_int_x86_avx512_prorv_d_128_old:
13740; CHECK:       # %bb.0:
13741; CHECK-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1]
13742; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13743  %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
13744  ret <4 x i32> %res
13745}
13746
13747define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
13748; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128_old:
13749; X86:       # %bb.0:
13750; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13751; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13752; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
13753; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
13754; X86-NEXT:    retl # encoding: [0xc3]
13755;
13756; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128_old:
13757; X64:       # %bb.0:
13758; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13759; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
13760; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
13761; X64-NEXT:    retq # encoding: [0xc3]
13762  %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
13763  ret <4 x i32> %res
13764}
13765
13766define <4 x i32>@test_int_x86_avx512_maskz_prorv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
13767; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_128_old:
13768; X86:       # %bb.0:
13769; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13770; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13771; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
13772; X86-NEXT:    retl # encoding: [0xc3]
13773;
13774; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_128_old:
13775; X64:       # %bb.0:
13776; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13777; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
13778; X64-NEXT:    retq # encoding: [0xc3]
13779  %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
13780  ret <4 x i32> %res
13781}
13782
13783declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
13784
13785define <8 x i32>@test_int_x86_avx512_prorv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
13786; CHECK-LABEL: test_int_x86_avx512_prorv_d_256_old:
13787; CHECK:       # %bb.0:
13788; CHECK-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1]
13789; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13790  %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
13791  ret <8 x i32> %res
13792}
13793
13794define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
13795; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256_old:
13796; X86:       # %bb.0:
13797; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13798; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13799; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
13800; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
13801; X86-NEXT:    retl # encoding: [0xc3]
13802;
13803; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256_old:
13804; X64:       # %bb.0:
13805; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13806; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
13807; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
13808; X64-NEXT:    retq # encoding: [0xc3]
13809  %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
13810  ret <8 x i32> %res
13811}
13812
13813define <8 x i32>@test_int_x86_avx512_maskz_prorv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
13814; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_256_old:
13815; X86:       # %bb.0:
13816; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13817; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13818; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
13819; X86-NEXT:    retl # encoding: [0xc3]
13820;
13821; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_256_old:
13822; X64:       # %bb.0:
13823; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13824; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
13825; X64-NEXT:    retq # encoding: [0xc3]
13826  %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
13827  ret <8 x i32> %res
13828}
13829
13830declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
13831
13832define <2 x i64>@test_int_x86_avx512_prorv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
13833; CHECK-LABEL: test_int_x86_avx512_prorv_q_128_old:
13834; CHECK:       # %bb.0:
13835; CHECK-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1]
13836; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13837  %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
13838  ret <2 x i64> %res
13839}
13840
13841define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
13842; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128_old:
13843; X86:       # %bb.0:
13844; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13845; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13846; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
13847; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
13848; X86-NEXT:    retl # encoding: [0xc3]
13849;
13850; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128_old:
13851; X64:       # %bb.0:
13852; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13853; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
13854; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
13855; X64-NEXT:    retq # encoding: [0xc3]
13856  %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
13857  ret <2 x i64> %res
13858}
13859
13860define <2 x i64>@test_int_x86_avx512_maskz_prorv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
13861; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_128_old:
13862; X86:       # %bb.0:
13863; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13864; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13865; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
13866; X86-NEXT:    retl # encoding: [0xc3]
13867;
13868; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_128_old:
13869; X64:       # %bb.0:
13870; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13871; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
13872; X64-NEXT:    retq # encoding: [0xc3]
13873  %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
13874  ret <2 x i64> %res
13875}
13876
13877declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
13878
13879define <4 x i64>@test_int_x86_avx512_prorv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
13880; CHECK-LABEL: test_int_x86_avx512_prorv_q_256_old:
13881; CHECK:       # %bb.0:
13882; CHECK-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1]
13883; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
13884  %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
13885  ret <4 x i64> %res
13886}
13887
13888define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
13889; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256_old:
13890; X86:       # %bb.0:
13891; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13892; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13893; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
13894; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
13895; X86-NEXT:    retl # encoding: [0xc3]
13896;
13897; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256_old:
13898; X64:       # %bb.0:
13899; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13900; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
13901; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
13902; X64-NEXT:    retq # encoding: [0xc3]
13903  %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
13904  ret <4 x i64> %res
13905}
13906
13907define <4 x i64>@test_int_x86_avx512_maskz_prorv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
13908; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_256_old:
13909; X86:       # %bb.0:
13910; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
13911; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13912; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
13913; X86-NEXT:    retl # encoding: [0xc3]
13914;
13915; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_256_old:
13916; X64:       # %bb.0:
13917; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
13918; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
13919; X64-NEXT:    retq # encoding: [0xc3]
13920  %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
13921  ret <4 x i64> %res
13922}
13923
13924declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8)
13925
13926define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
13927; X86-LABEL: test_int_x86_avx512_mask_prol_d_128:
13928; X86:       # %bb.0:
13929; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13930; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13931; X86-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
13932; X86-NEXT:    vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04]
13933; X86-NEXT:    vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05]
13934; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
13935; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
13936; X86-NEXT:    retl # encoding: [0xc3]
13937;
13938; X64-LABEL: test_int_x86_avx512_mask_prol_d_128:
13939; X64:       # %bb.0:
13940; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13941; X64-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
13942; X64-NEXT:    vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04]
13943; X64-NEXT:    vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05]
13944; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
13945; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
13946; X64-NEXT:    retq # encoding: [0xc3]
13947  %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
13948  %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 4, <4 x i32> zeroinitializer, i8 %x3)
13949  %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 5, <4 x i32> %x2, i8 -1)
13950  %res3 = add <4 x i32> %res, %res1
13951  %res4 = add <4 x i32> %res3, %res2
13952  ret <4 x i32> %res4
13953}
13954
13955declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8)
13956
13957define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
13958; X86-LABEL: test_int_x86_avx512_mask_prol_d_256:
13959; X86:       # %bb.0:
13960; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13961; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13962; X86-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
13963; X86-NEXT:    vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04]
13964; X86-NEXT:    vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05]
13965; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
13966; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
13967; X86-NEXT:    retl # encoding: [0xc3]
13968;
13969; X64-LABEL: test_int_x86_avx512_mask_prol_d_256:
13970; X64:       # %bb.0:
13971; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
13972; X64-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
13973; X64-NEXT:    vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04]
13974; X64-NEXT:    vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05]
13975; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
13976; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
13977; X64-NEXT:    retq # encoding: [0xc3]
13978  %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
13979  %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 4, <8 x i32> zeroinitializer, i8 %x3)
13980  %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 5, <8 x i32> %x2, i8 -1)
13981  %res3 = add <8 x i32> %res, %res1
13982  %res4 = add <8 x i32> %res3, %res2
13983  ret <8 x i32> %res4
13984}
13985
13986declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8)
13987
13988define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
13989; X86-LABEL: test_int_x86_avx512_mask_prol_q_128:
13990; X86:       # %bb.0:
13991; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
13992; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
13993; X86-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
13994; X86-NEXT:    vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04]
13995; X86-NEXT:    vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05]
13996; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
13997; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
13998; X86-NEXT:    retl # encoding: [0xc3]
13999;
14000; X64-LABEL: test_int_x86_avx512_mask_prol_q_128:
14001; X64:       # %bb.0:
14002; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14003; X64-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
14004; X64-NEXT:    vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04]
14005; X64-NEXT:    vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05]
14006; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
14007; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14008; X64-NEXT:    retq # encoding: [0xc3]
14009  %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
14010  %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 4, <2 x i64> zeroinitializer, i8 %x3)
14011  %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 5, <2 x i64> %x2, i8 -1)
14012  %res3 = add <2 x i64> %res, %res1
14013  %res4 = add <2 x i64> %res3, %res2
14014  ret <2 x i64> %res4
14015}
14016
14017declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8)
14018
14019define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
14020; X86-LABEL: test_int_x86_avx512_mask_prol_q_256:
14021; X86:       # %bb.0:
14022; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14023; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14024; X86-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
14025; X86-NEXT:    vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04]
14026; X86-NEXT:    vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05]
14027; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
14028; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
14029; X86-NEXT:    retl # encoding: [0xc3]
14030;
14031; X64-LABEL: test_int_x86_avx512_mask_prol_q_256:
14032; X64:       # %bb.0:
14033; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14034; X64-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
14035; X64-NEXT:    vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04]
14036; X64-NEXT:    vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05]
14037; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
14038; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
14039; X64-NEXT:    retq # encoding: [0xc3]
14040  %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
14041  %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 4, <4 x i64> zeroinitializer, i8 %x3)
14042  %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 5, <4 x i64> %x2, i8 -1)
14043  %res3 = add <4 x i64> %res, %res1
14044  %res4 = add <4 x i64> %res3, %res2
14045  ret <4 x i64> %res4
14046}
14047
14048declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
14049
14050define <4 x i32>@test_int_x86_avx512_prolv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
14051; CHECK-LABEL: test_int_x86_avx512_prolv_d_128_old:
14052; CHECK:       # %bb.0:
14053; CHECK-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1]
14054; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14055  %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
14056  ret <4 x i32> %res
14057}
14058
14059define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
14060; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128_old:
14061; X86:       # %bb.0:
14062; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14063; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14064; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
14065; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14066; X86-NEXT:    retl # encoding: [0xc3]
14067;
14068; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128_old:
14069; X64:       # %bb.0:
14070; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14071; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
14072; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14073; X64-NEXT:    retq # encoding: [0xc3]
14074  %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
14075  ret <4 x i32> %res
14076}
14077
14078define <4 x i32>@test_int_x86_avx512_maskz_prolv_d_128_old(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
14079; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_128_old:
14080; X86:       # %bb.0:
14081; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14082; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14083; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
14084; X86-NEXT:    retl # encoding: [0xc3]
14085;
14086; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_128_old:
14087; X64:       # %bb.0:
14088; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14089; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
14090; X64-NEXT:    retq # encoding: [0xc3]
14091  %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
14092  ret <4 x i32> %res
14093}
14094
14095declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
14096
14097define <8 x i32>@test_int_x86_avx512_prolv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
14098; CHECK-LABEL: test_int_x86_avx512_prolv_d_256_old:
14099; CHECK:       # %bb.0:
14100; CHECK-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1]
14101; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14102  %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
14103  ret <8 x i32> %res
14104}
14105
14106define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
14107; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256_old:
14108; X86:       # %bb.0:
14109; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14110; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14111; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
14112; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14113; X86-NEXT:    retl # encoding: [0xc3]
14114;
14115; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256_old:
14116; X64:       # %bb.0:
14117; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14118; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
14119; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14120; X64-NEXT:    retq # encoding: [0xc3]
14121  %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
14122  ret <8 x i32> %res
14123}
14124
14125define <8 x i32>@test_int_x86_avx512_maskz_prolv_d_256_old(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
14126; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_256_old:
14127; X86:       # %bb.0:
14128; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14129; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14130; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
14131; X86-NEXT:    retl # encoding: [0xc3]
14132;
14133; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_256_old:
14134; X64:       # %bb.0:
14135; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14136; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
14137; X64-NEXT:    retq # encoding: [0xc3]
14138  %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
14139  ret <8 x i32> %res
14140}
14141
14142declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
14143
14144define <2 x i64>@test_int_x86_avx512_prolv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2) {
14145; CHECK-LABEL: test_int_x86_avx512_prolv_q_128_old:
14146; CHECK:       # %bb.0:
14147; CHECK-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1]
14148; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14149  %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
14150  ret <2 x i64> %res
14151}
14152
14153define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
14154; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128_old:
14155; X86:       # %bb.0:
14156; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14157; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14158; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
14159; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14160; X86-NEXT:    retl # encoding: [0xc3]
14161;
14162; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128_old:
14163; X64:       # %bb.0:
14164; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14165; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
14166; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14167; X64-NEXT:    retq # encoding: [0xc3]
14168  %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
14169  ret <2 x i64> %res
14170}
14171
14172define <2 x i64>@test_int_x86_avx512_maskz_prolv_q_128_old(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
14173; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_128_old:
14174; X86:       # %bb.0:
14175; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14176; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14177; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
14178; X86-NEXT:    retl # encoding: [0xc3]
14179;
14180; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_128_old:
14181; X64:       # %bb.0:
14182; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14183; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
14184; X64-NEXT:    retq # encoding: [0xc3]
14185  %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
14186  ret <2 x i64> %res
14187}
14188
14189declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
14190
14191define <4 x i64>@test_int_x86_avx512_prolv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2) {
14192; CHECK-LABEL: test_int_x86_avx512_prolv_q_256_old:
14193; CHECK:       # %bb.0:
14194; CHECK-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1]
14195; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14196  %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
14197  ret <4 x i64> %res
14198}
14199
14200define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
14201; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256_old:
14202; X86:       # %bb.0:
14203; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14204; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14205; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
14206; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14207; X86-NEXT:    retl # encoding: [0xc3]
14208;
14209; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256_old:
14210; X64:       # %bb.0:
14211; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14212; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
14213; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14214; X64-NEXT:    retq # encoding: [0xc3]
14215  %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
14216  ret <4 x i64> %res
14217}
14218
14219define <4 x i64>@test_int_x86_avx512_maskz_prolv_q_256_old(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
14220; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_256_old:
14221; X86:       # %bb.0:
14222; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14223; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14224; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
14225; X86-NEXT:    retl # encoding: [0xc3]
14226;
14227; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_256_old:
14228; X64:       # %bb.0:
14229; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14230; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
14231; X64-NEXT:    retq # encoding: [0xc3]
14232  %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
14233  ret <4 x i64> %res
14234}
14235
14236declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8)
14237
14238define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
14239; X86-LABEL: test_int_x86_avx512_mask_pror_d_128:
14240; X86:       # %bb.0:
14241; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14242; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14243; X86-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
14244; X86-NEXT:    vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04]
14245; X86-NEXT:    vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05]
14246; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14247; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14248; X86-NEXT:    retl # encoding: [0xc3]
14249;
14250; X64-LABEL: test_int_x86_avx512_mask_pror_d_128:
14251; X64:       # %bb.0:
14252; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14253; X64-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
14254; X64-NEXT:    vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04]
14255; X64-NEXT:    vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05]
14256; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14257; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14258; X64-NEXT:    retq # encoding: [0xc3]
14259  %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
14260  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 4, <4 x i32> zeroinitializer, i8 %x3)
14261  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 5, <4 x i32> %x2, i8 -1)
14262  %res3 = add <4 x i32> %res, %res1
14263  %res4 = add <4 x i32> %res3, %res2
14264  ret <4 x i32> %res4
14265}
14266
14267declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8)
14268
14269define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
14270; X86-LABEL: test_int_x86_avx512_mask_pror_d_256:
14271; X86:       # %bb.0:
14272; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14273; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14274; X86-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
14275; X86-NEXT:    vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04]
14276; X86-NEXT:    vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05]
14277; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
14278; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
14279; X86-NEXT:    retl # encoding: [0xc3]
14280;
14281; X64-LABEL: test_int_x86_avx512_mask_pror_d_256:
14282; X64:       # %bb.0:
14283; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14284; X64-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
14285; X64-NEXT:    vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04]
14286; X64-NEXT:    vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05]
14287; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
14288; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
14289; X64-NEXT:    retq # encoding: [0xc3]
14290  %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
14291  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 4, <8 x i32> zeroinitializer, i8 %x3)
14292  %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 5, <8 x i32> %x2, i8 -1)
14293  %res3 = add <8 x i32> %res, %res1
14294  %res4 = add <8 x i32> %res3, %res2
14295  ret <8 x i32> %res4
14296}
14297
14298declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8)
14299
14300define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
14301; X86-LABEL: test_int_x86_avx512_mask_pror_q_128:
14302; X86:       # %bb.0:
14303; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14304; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14305; X86-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
14306; X86-NEXT:    vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04]
14307; X86-NEXT:    vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05]
14308; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
14309; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14310; X86-NEXT:    retl # encoding: [0xc3]
14311;
14312; X64-LABEL: test_int_x86_avx512_mask_pror_q_128:
14313; X64:       # %bb.0:
14314; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14315; X64-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
14316; X64-NEXT:    vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04]
14317; X64-NEXT:    vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05]
14318; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
14319; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14320; X64-NEXT:    retq # encoding: [0xc3]
14321  %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
14322  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 4, <2 x i64> zeroinitializer, i8 %x3)
14323  %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 5, <2 x i64> %x2, i8 -1)
14324  %res3 = add <2 x i64> %res, %res1
14325  %res4 = add <2 x i64> %res3, %res2
14326  ret <2 x i64> %res4
14327}
14328
14329declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8)
14330
14331define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
14332; X86-LABEL: test_int_x86_avx512_mask_pror_q_256:
14333; X86:       # %bb.0:
14334; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14335; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14336; X86-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
14337; X86-NEXT:    vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04]
14338; X86-NEXT:    vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05]
14339; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
14340; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
14341; X86-NEXT:    retl # encoding: [0xc3]
14342;
14343; X64-LABEL: test_int_x86_avx512_mask_pror_q_256:
14344; X64:       # %bb.0:
14345; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14346; X64-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
14347; X64-NEXT:    vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04]
14348; X64-NEXT:    vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05]
14349; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
14350; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
14351; X64-NEXT:    retq # encoding: [0xc3]
14352  %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
14353  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 4, <4 x i64> zeroinitializer, i8 %x3)
14354  %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 5, <4 x i64> %x2, i8 -1)
14355  %res3 = add <4 x i64> %res, %res1
14356  %res4 = add <4 x i64> %res3, %res2
14357  ret <4 x i64> %res4
14358}
14359
14360declare <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32>, <4 x i32>)
14361
14362define <4 x i32>@test_int_x86_avx512_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1) {
14363; CHECK-LABEL: test_int_x86_avx512_prorv_d_128:
14364; CHECK:       # %bb.0:
14365; CHECK-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xc1]
14366; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14367  %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
14368  ret <4 x i32> %1
14369}
14370
14371define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
14372; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128:
14373; X86:       # %bb.0:
14374; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14375; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14376; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
14377; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14378; X86-NEXT:    retl # encoding: [0xc3]
14379;
14380; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128:
14381; X64:       # %bb.0:
14382; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14383; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
14384; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14385; X64-NEXT:    retq # encoding: [0xc3]
14386  %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
14387  %2 = bitcast i8 %x3 to <8 x i1>
14388  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14389  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
14390  ret <4 x i32> %3
14391}
14392
14393define <4 x i32>@test_int_x86_avx512_maskz_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
14394; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_128:
14395; X86:       # %bb.0:
14396; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14397; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14398; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
14399; X86-NEXT:    retl # encoding: [0xc3]
14400;
14401; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_128:
14402; X64:       # %bb.0:
14403; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14404; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
14405; X64-NEXT:    retq # encoding: [0xc3]
14406  %1 = call <4 x i32> @llvm.x86.avx512.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1)
14407  %2 = bitcast i8 %x3 to <8 x i1>
14408  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14409  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer
14410  ret <4 x i32> %3
14411}
14412
14413declare <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32>, <8 x i32>)
14414
14415define <8 x i32>@test_int_x86_avx512_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1) {
14416; CHECK-LABEL: test_int_x86_avx512_prorv_d_256:
14417; CHECK:       # %bb.0:
14418; CHECK-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xc1]
14419; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14420  %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
14421  ret <8 x i32> %1
14422}
14423
14424define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
14425; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256:
14426; X86:       # %bb.0:
14427; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14428; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14429; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
14430; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14431; X86-NEXT:    retl # encoding: [0xc3]
14432;
14433; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256:
14434; X64:       # %bb.0:
14435; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14436; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
14437; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14438; X64-NEXT:    retq # encoding: [0xc3]
14439  %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
14440  %2 = bitcast i8 %x3 to <8 x i1>
14441  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
14442  ret <8 x i32> %3
14443}
14444
14445define <8 x i32>@test_int_x86_avx512_maskz_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x3) {
14446; X86-LABEL: test_int_x86_avx512_maskz_prorv_d_256:
14447; X86:       # %bb.0:
14448; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14449; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14450; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
14451; X86-NEXT:    retl # encoding: [0xc3]
14452;
14453; X64-LABEL: test_int_x86_avx512_maskz_prorv_d_256:
14454; X64:       # %bb.0:
14455; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14456; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
14457; X64-NEXT:    retq # encoding: [0xc3]
14458  %1 = call <8 x i32> @llvm.x86.avx512.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1)
14459  %2 = bitcast i8 %x3 to <8 x i1>
14460  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
14461  ret <8 x i32> %3
14462}
14463
14464declare <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64>, <2 x i64>)
14465
14466define <2 x i64>@test_int_x86_avx512_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1) {
14467; CHECK-LABEL: test_int_x86_avx512_prorv_q_128:
14468; CHECK:       # %bb.0:
14469; CHECK-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xc1]
14470; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14471  %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
14472  ret <2 x i64> %1
14473}
14474
14475define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
14476; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128:
14477; X86:       # %bb.0:
14478; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14479; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14480; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
14481; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14482; X86-NEXT:    retl # encoding: [0xc3]
14483;
14484; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128:
14485; X64:       # %bb.0:
14486; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14487; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
14488; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14489; X64-NEXT:    retq # encoding: [0xc3]
14490  %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
14491  %2 = bitcast i8 %x3 to <8 x i1>
14492  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
14493  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
14494  ret <2 x i64> %3
14495}
14496
14497define <2 x i64>@test_int_x86_avx512_maskz_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
14498; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_128:
14499; X86:       # %bb.0:
14500; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14501; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14502; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
14503; X86-NEXT:    retl # encoding: [0xc3]
14504;
14505; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_128:
14506; X64:       # %bb.0:
14507; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14508; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
14509; X64-NEXT:    retq # encoding: [0xc3]
14510  %1 = call <2 x i64> @llvm.x86.avx512.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1)
14511  %2 = bitcast i8 %x3 to <8 x i1>
14512  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
14513  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer
14514  ret <2 x i64> %3
14515}
14516
14517declare <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64>, <4 x i64>)
14518
14519define <4 x i64>@test_int_x86_avx512_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1) {
14520; CHECK-LABEL: test_int_x86_avx512_prorv_q_256:
14521; CHECK:       # %bb.0:
14522; CHECK-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xc1]
14523; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14524  %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
14525  ret <4 x i64> %1
14526}
14527
14528define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
14529; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256:
14530; X86:       # %bb.0:
14531; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14532; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14533; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
14534; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14535; X86-NEXT:    retl # encoding: [0xc3]
14536;
14537; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256:
14538; X64:       # %bb.0:
14539; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14540; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
14541; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14542; X64-NEXT:    retq # encoding: [0xc3]
14543  %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
14544  %2 = bitcast i8 %x3 to <8 x i1>
14545  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14546  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
14547  ret <4 x i64> %3
14548}
14549
14550define <4 x i64>@test_int_x86_avx512_maskz_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1,  i8 %x3) {
14551; X86-LABEL: test_int_x86_avx512_maskz_prorv_q_256:
14552; X86:       # %bb.0:
14553; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14554; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14555; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
14556; X86-NEXT:    retl # encoding: [0xc3]
14557;
14558; X64-LABEL: test_int_x86_avx512_maskz_prorv_q_256:
14559; X64:       # %bb.0:
14560; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14561; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
14562; X64-NEXT:    retq # encoding: [0xc3]
14563  %1 = call <4 x i64> @llvm.x86.avx512.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1)
14564  %2 = bitcast i8 %x3 to <8 x i1>
14565  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14566  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer
14567  ret <4 x i64> %3
14568}
14569
14570declare <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32>, i32)
14571
14572define <4 x i32>@test_int_x86_avx512_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
14573; X86-LABEL: test_int_x86_avx512_prol_d_128:
14574; X86:       # %bb.0:
14575; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14576; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14577; X86-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
14578; X86-NEXT:    vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04]
14579; X86-NEXT:    vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05]
14580; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14581; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14582; X86-NEXT:    retl # encoding: [0xc3]
14583;
14584; X64-LABEL: test_int_x86_avx512_prol_d_128:
14585; X64:       # %bb.0:
14586; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14587; X64-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
14588; X64-NEXT:    vprold $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc8,0x04]
14589; X64-NEXT:    vprold $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc8,0x05]
14590; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14591; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14592; X64-NEXT:    retq # encoding: [0xc3]
14593  %1 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 3)
14594  %2 = bitcast i8 %x3 to <8 x i1>
14595  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14596  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
14597  %4 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 4)
14598  %5 = bitcast i8 %x3 to <8 x i1>
14599  %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14600  %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
14601  %7 = call <4 x i32> @llvm.x86.avx512.prol.d.128(<4 x i32> %x0, i32 5)
14602  %res3 = add <4 x i32> %3, %6
14603  %res4 = add <4 x i32> %res3, %7
14604  ret <4 x i32> %res4
14605}
14606
14607declare <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32>, i32)
14608
14609define <8 x i32>@test_int_x86_avx512_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
14610; X86-LABEL: test_int_x86_avx512_prol_d_256:
14611; X86:       # %bb.0:
14612; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14613; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14614; X86-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
14615; X86-NEXT:    vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04]
14616; X86-NEXT:    vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05]
14617; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
14618; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
14619; X86-NEXT:    retl # encoding: [0xc3]
14620;
14621; X64-LABEL: test_int_x86_avx512_prol_d_256:
14622; X64:       # %bb.0:
14623; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14624; X64-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
14625; X64-NEXT:    vprold $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc8,0x04]
14626; X64-NEXT:    vprold $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc8,0x05]
14627; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
14628; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
14629; X64-NEXT:    retq # encoding: [0xc3]
14630  %1 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 3)
14631  %2 = bitcast i8 %x3 to <8 x i1>
14632  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
14633  %4 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 4)
14634  %5 = bitcast i8 %x3 to <8 x i1>
14635  %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
14636  %7 = call <8 x i32> @llvm.x86.avx512.prol.d.256(<8 x i32> %x0, i32 5)
14637  %res3 = add <8 x i32> %3, %6
14638  %res4 = add <8 x i32> %res3, %7
14639  ret <8 x i32> %res4
14640}
14641
14642declare <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64>, i32)
14643
14644define <2 x i64>@test_int_x86_avx512_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
14645; X86-LABEL: test_int_x86_avx512_prol_q_128:
14646; X86:       # %bb.0:
14647; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14648; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14649; X86-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
14650; X86-NEXT:    vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04]
14651; X86-NEXT:    vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05]
14652; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
14653; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14654; X86-NEXT:    retl # encoding: [0xc3]
14655;
14656; X64-LABEL: test_int_x86_avx512_prol_q_128:
14657; X64:       # %bb.0:
14658; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14659; X64-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
14660; X64-NEXT:    vprolq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc8,0x04]
14661; X64-NEXT:    vprolq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc8,0x05]
14662; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
14663; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
14664; X64-NEXT:    retq # encoding: [0xc3]
14665  %1 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 3)
14666  %2 = bitcast i8 %x3 to <8 x i1>
14667  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
14668  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
14669  %4 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 4)
14670  %5 = bitcast i8 %x3 to <8 x i1>
14671  %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
14672  %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
14673  %7 = call <2 x i64> @llvm.x86.avx512.prol.q.128(<2 x i64> %x0, i32 5)
14674  %res3 = add <2 x i64> %3, %6
14675  %res4 = add <2 x i64> %res3, %7
14676  ret <2 x i64> %res4
14677}
14678
14679declare <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64>, i32)
14680
14681define <4 x i64>@test_int_x86_avx512_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
14682; X86-LABEL: test_int_x86_avx512_prol_q_256:
14683; X86:       # %bb.0:
14684; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14685; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14686; X86-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
14687; X86-NEXT:    vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04]
14688; X86-NEXT:    vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05]
14689; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
14690; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
14691; X86-NEXT:    retl # encoding: [0xc3]
14692;
14693; X64-LABEL: test_int_x86_avx512_prol_q_256:
14694; X64:       # %bb.0:
14695; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14696; X64-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
14697; X64-NEXT:    vprolq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc8,0x04]
14698; X64-NEXT:    vprolq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc8,0x05]
14699; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
14700; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
14701; X64-NEXT:    retq # encoding: [0xc3]
14702  %1 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 3)
14703  %2 = bitcast i8 %x3 to <8 x i1>
14704  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14705  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
14706  %4 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 4)
14707  %5 = bitcast i8 %x3 to <8 x i1>
14708  %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14709  %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
14710  %7 = call <4 x i64> @llvm.x86.avx512.prol.q.256(<4 x i64> %x0, i32 5)
14711  %res3 = add <4 x i64> %3, %6
14712  %res4 = add <4 x i64> %res3, %7
14713  ret <4 x i64> %res4
14714}
14715
14716declare <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32>, <4 x i32>)
14717
14718define <4 x i32>@test_int_x86_avx512_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1) {
14719; CHECK-LABEL: test_int_x86_avx512_prolv_d_128:
14720; CHECK:       # %bb.0:
14721; CHECK-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xc1]
14722; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14723  %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
14724  ret <4 x i32> %1
14725}
14726
14727define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
14728; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128:
14729; X86:       # %bb.0:
14730; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14731; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14732; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
14733; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14734; X86-NEXT:    retl # encoding: [0xc3]
14735;
14736; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128:
14737; X64:       # %bb.0:
14738; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14739; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
14740; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14741; X64-NEXT:    retq # encoding: [0xc3]
14742  %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
14743  %2 = bitcast i8 %x3 to <8 x i1>
14744  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14745  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
14746  ret <4 x i32> %3
14747}
14748
14749define <4 x i32>@test_int_x86_avx512_maskz_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x3) {
14750; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_128:
14751; X86:       # %bb.0:
14752; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14753; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14754; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
14755; X86-NEXT:    retl # encoding: [0xc3]
14756;
14757; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_128:
14758; X64:       # %bb.0:
14759; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14760; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
14761; X64-NEXT:    retq # encoding: [0xc3]
14762  %1 = call <4 x i32> @llvm.x86.avx512.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1)
14763  %2 = bitcast i8 %x3 to <8 x i1>
14764  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14765  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> zeroinitializer
14766  ret <4 x i32> %3
14767}
14768
14769declare <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32>, <8 x i32>)
14770
14771define <8 x i32>@test_int_x86_avx512_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
14772; CHECK-LABEL: test_int_x86_avx512_prolv_d_256:
14773; CHECK:       # %bb.0:
14774; CHECK-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xc1]
14775; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14776  %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
14777  ret <8 x i32> %1
14778}
14779
14780define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
14781; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256:
14782; X86:       # %bb.0:
14783; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14784; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14785; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
14786; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14787; X86-NEXT:    retl # encoding: [0xc3]
14788;
14789; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256:
14790; X64:       # %bb.0:
14791; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14792; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
14793; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14794; X64-NEXT:    retq # encoding: [0xc3]
14795  %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
14796  %2 = bitcast i8 %x3 to <8 x i1>
14797  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
14798  ret <8 x i32> %3
14799}
14800
14801define <8 x i32>@test_int_x86_avx512_maskz_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
14802; X86-LABEL: test_int_x86_avx512_maskz_prolv_d_256:
14803; X86:       # %bb.0:
14804; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14805; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14806; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
14807; X86-NEXT:    retl # encoding: [0xc3]
14808;
14809; X64-LABEL: test_int_x86_avx512_maskz_prolv_d_256:
14810; X64:       # %bb.0:
14811; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14812; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
14813; X64-NEXT:    retq # encoding: [0xc3]
14814  %1 = call <8 x i32> @llvm.x86.avx512.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1)
14815  %2 = bitcast i8 %x3 to <8 x i1>
14816  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
14817  ret <8 x i32> %3
14818}
14819
14820declare <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64>, <2 x i64>)
14821
14822define <2 x i64>@test_int_x86_avx512_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1) {
14823; CHECK-LABEL: test_int_x86_avx512_prolv_q_128:
14824; CHECK:       # %bb.0:
14825; CHECK-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xc1]
14826; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14827  %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
14828  ret <2 x i64> %1
14829}
14830
14831define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
14832; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128:
14833; X86:       # %bb.0:
14834; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14835; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14836; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
14837; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14838; X86-NEXT:    retl # encoding: [0xc3]
14839;
14840; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128:
14841; X64:       # %bb.0:
14842; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14843; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
14844; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
14845; X64-NEXT:    retq # encoding: [0xc3]
14846  %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
14847  %2 = bitcast i8 %x3 to <8 x i1>
14848  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
14849  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
14850  ret <2 x i64> %3
14851}
14852
14853define <2 x i64>@test_int_x86_avx512_maskz_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x3) {
14854; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_128:
14855; X86:       # %bb.0:
14856; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14857; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14858; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
14859; X86-NEXT:    retl # encoding: [0xc3]
14860;
14861; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_128:
14862; X64:       # %bb.0:
14863; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14864; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
14865; X64-NEXT:    retq # encoding: [0xc3]
14866  %1 = call <2 x i64> @llvm.x86.avx512.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1)
14867  %2 = bitcast i8 %x3 to <8 x i1>
14868  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
14869  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> zeroinitializer
14870  ret <2 x i64> %3
14871}
14872
14873declare <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64>, <4 x i64>)
14874
14875define <4 x i64>@test_int_x86_avx512_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1) {
14876; CHECK-LABEL: test_int_x86_avx512_prolv_q_256:
14877; CHECK:       # %bb.0:
14878; CHECK-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xc1]
14879; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
14880  %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
14881  ret <4 x i64> %1
14882}
14883
14884define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
14885; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256:
14886; X86:       # %bb.0:
14887; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14888; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14889; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
14890; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14891; X86-NEXT:    retl # encoding: [0xc3]
14892;
14893; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256:
14894; X64:       # %bb.0:
14895; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14896; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
14897; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
14898; X64-NEXT:    retq # encoding: [0xc3]
14899  %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
14900  %2 = bitcast i8 %x3 to <8 x i1>
14901  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14902  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
14903  ret <4 x i64> %3
14904}
14905
14906define <4 x i64>@test_int_x86_avx512_maskz_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x3) {
14907; X86-LABEL: test_int_x86_avx512_maskz_prolv_q_256:
14908; X86:       # %bb.0:
14909; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
14910; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14911; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
14912; X86-NEXT:    retl # encoding: [0xc3]
14913;
14914; X64-LABEL: test_int_x86_avx512_maskz_prolv_q_256:
14915; X64:       # %bb.0:
14916; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
14917; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
14918; X64-NEXT:    retq # encoding: [0xc3]
14919  %1 = call <4 x i64> @llvm.x86.avx512.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1)
14920  %2 = bitcast i8 %x3 to <8 x i1>
14921  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14922  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> zeroinitializer
14923  ret <4 x i64> %3
14924}
14925
14926declare <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32>, i32)
14927
14928define <4 x i32>@test_int_x86_avx512_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
14929; X86-LABEL: test_int_x86_avx512_pror_d_128:
14930; X86:       # %bb.0:
14931; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14932; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14933; X86-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
14934; X86-NEXT:    vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04]
14935; X86-NEXT:    vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05]
14936; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14937; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14938; X86-NEXT:    retl # encoding: [0xc3]
14939;
14940; X64-LABEL: test_int_x86_avx512_pror_d_128:
14941; X64:       # %bb.0:
14942; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14943; X64-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
14944; X64-NEXT:    vprord $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x72,0xc0,0x04]
14945; X64-NEXT:    vprord $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x72,0xc0,0x05]
14946; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
14947; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
14948; X64-NEXT:    retq # encoding: [0xc3]
14949  %1 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 3)
14950  %2 = bitcast i8 %x3 to <8 x i1>
14951  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14952  %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x2
14953  %4 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 4)
14954  %5 = bitcast i8 %x3 to <8 x i1>
14955  %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
14956  %6 = select <4 x i1> %extract, <4 x i32> %4, <4 x i32> zeroinitializer
14957  %7 = call <4 x i32> @llvm.x86.avx512.pror.d.128(<4 x i32> %x0, i32 5)
14958  %res3 = add <4 x i32> %3, %6
14959  %res4 = add <4 x i32> %res3, %7
14960  ret <4 x i32> %res4
14961}
14962
14963declare <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32>, i32)
14964
14965define <8 x i32>@test_int_x86_avx512_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
14966; X86-LABEL: test_int_x86_avx512_pror_d_256:
14967; X86:       # %bb.0:
14968; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
14969; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
14970; X86-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
14971; X86-NEXT:    vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04]
14972; X86-NEXT:    vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05]
14973; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
14974; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
14975; X86-NEXT:    retl # encoding: [0xc3]
14976;
14977; X64-LABEL: test_int_x86_avx512_pror_d_256:
14978; X64:       # %bb.0:
14979; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
14980; X64-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
14981; X64-NEXT:    vprord $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x72,0xc0,0x04]
14982; X64-NEXT:    vprord $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x72,0xc0,0x05]
14983; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
14984; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
14985; X64-NEXT:    retq # encoding: [0xc3]
14986  %1 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 3)
14987  %2 = bitcast i8 %x3 to <8 x i1>
14988  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x2
14989  %4 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 4)
14990  %5 = bitcast i8 %x3 to <8 x i1>
14991  %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer
14992  %7 = call <8 x i32> @llvm.x86.avx512.pror.d.256(<8 x i32> %x0, i32 5)
14993  %res3 = add <8 x i32> %3, %6
14994  %res4 = add <8 x i32> %res3, %7
14995  ret <8 x i32> %res4
14996}
14997
14998declare <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64>, i32)
14999
15000define <2 x i64>@test_int_x86_avx512_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
15001; X86-LABEL: test_int_x86_avx512_pror_q_128:
15002; X86:       # %bb.0:
15003; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
15004; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15005; X86-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
15006; X86-NEXT:    vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04]
15007; X86-NEXT:    vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05]
15008; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
15009; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
15010; X86-NEXT:    retl # encoding: [0xc3]
15011;
15012; X64-LABEL: test_int_x86_avx512_pror_q_128:
15013; X64:       # %bb.0:
15014; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
15015; X64-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
15016; X64-NEXT:    vprorq $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0x89,0x72,0xc0,0x04]
15017; X64-NEXT:    vprorq $5, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x72,0xc0,0x05]
15018; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
15019; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
15020; X64-NEXT:    retq # encoding: [0xc3]
15021  %1 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 3)
15022  %2 = bitcast i8 %x3 to <8 x i1>
15023  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
15024  %3 = select <2 x i1> %extract1, <2 x i64> %1, <2 x i64> %x2
15025  %4 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 4)
15026  %5 = bitcast i8 %x3 to <8 x i1>
15027  %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <2 x i32> <i32 0, i32 1>
15028  %6 = select <2 x i1> %extract, <2 x i64> %4, <2 x i64> zeroinitializer
15029  %7 = call <2 x i64> @llvm.x86.avx512.pror.q.128(<2 x i64> %x0, i32 5)
15030  %res3 = add <2 x i64> %3, %6
15031  %res4 = add <2 x i64> %res3, %7
15032  ret <2 x i64> %res4
15033}
15034
15035declare <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64>, i32)
15036
15037define <4 x i64>@test_int_x86_avx512_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
15038; X86-LABEL: test_int_x86_avx512_pror_q_256:
15039; X86:       # %bb.0:
15040; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
15041; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15042; X86-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
15043; X86-NEXT:    vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04]
15044; X86-NEXT:    vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05]
15045; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
15046; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
15047; X86-NEXT:    retl # encoding: [0xc3]
15048;
15049; X64-LABEL: test_int_x86_avx512_pror_q_256:
15050; X64:       # %bb.0:
15051; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
15052; X64-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
15053; X64-NEXT:    vprorq $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xed,0xa9,0x72,0xc0,0x04]
15054; X64-NEXT:    vprorq $5, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x72,0xc0,0x05]
15055; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
15056; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
15057; X64-NEXT:    retq # encoding: [0xc3]
15058  %1 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 3)
15059  %2 = bitcast i8 %x3 to <8 x i1>
15060  %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15061  %3 = select <4 x i1> %extract1, <4 x i64> %1, <4 x i64> %x2
15062  %4 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 4)
15063  %5 = bitcast i8 %x3 to <8 x i1>
15064  %extract = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
15065  %6 = select <4 x i1> %extract, <4 x i64> %4, <4 x i64> zeroinitializer
15066  %7 = call <4 x i64> @llvm.x86.avx512.pror.q.256(<4 x i64> %x0, i32 5)
15067  %res3 = add <4 x i64> %3, %6
15068  %res4 = add <4 x i64> %res3, %7
15069  ret <4 x i64> %res4
15070}
15071
15072declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
15073
15074define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
15075; CHECK-LABEL: test_vfmadd256_ps:
15076; CHECK:       # %bb.0:
15077; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
15078; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
15079; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15080  %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
15081  ret <8 x float> %res
15082}
15083
15084define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
15085; X86-LABEL: test_mask_vfmadd256_ps:
15086; X86:       # %bb.0:
15087; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15088; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15089; X86-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
15090; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
15091; X86-NEXT:    retl # encoding: [0xc3]
15092;
15093; X64-LABEL: test_mask_vfmadd256_ps:
15094; X64:       # %bb.0:
15095; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15096; X64-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
15097; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
15098; X64-NEXT:    retq # encoding: [0xc3]
15099  %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
15100  ret <8 x float> %res
15101}
15102
15103declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
15104
15105define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
15106; CHECK-LABEL: test_vfmadd128_ps:
15107; CHECK:       # %bb.0:
15108; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
15109; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
15110; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15111  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
15112  ret <4 x float> %res
15113}
15114
15115define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
15116; X86-LABEL: test_mask_vfmadd128_ps:
15117; X86:       # %bb.0:
15118; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15119; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15120; X86-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
15121; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
15122; X86-NEXT:    retl # encoding: [0xc3]
15123;
15124; X64-LABEL: test_mask_vfmadd128_ps:
15125; X64:       # %bb.0:
15126; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15127; X64-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
15128; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
15129; X64-NEXT:    retq # encoding: [0xc3]
15130  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
15131  ret <4 x float> %res
15132}
15133
15134declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15135
15136define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
15137; CHECK-LABEL: test_fmadd256_pd:
15138; CHECK:       # %bb.0:
15139; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
15140; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
15141; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15142  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 -1)
15143  ret <4 x double> %res
15144}
15145
15146define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
15147; X86-LABEL: test_mask_fmadd256_pd:
15148; X86:       # %bb.0:
15149; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15150; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15151; X86-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
15152; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
15153; X86-NEXT:    retl # encoding: [0xc3]
15154;
15155; X64-LABEL: test_mask_fmadd256_pd:
15156; X64:       # %bb.0:
15157; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15158; X64-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
15159; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) + ymm2
15160; X64-NEXT:    retq # encoding: [0xc3]
15161  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
15162  ret <4 x double> %res
15163}
15164
15165declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15166
15167define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
15168; CHECK-LABEL: test_fmadd128_pd:
15169; CHECK:       # %bb.0:
15170; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
15171; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
15172; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15173  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1)
15174  ret <2 x double> %res
15175}
15176
15177define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
15178; X86-LABEL: test_mask_fmadd128_pd:
15179; X86:       # %bb.0:
15180; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15181; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15182; X86-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
15183; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
15184; X86-NEXT:    retl # encoding: [0xc3]
15185;
15186; X64-LABEL: test_mask_fmadd128_pd:
15187; X64:       # %bb.0:
15188; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15189; X64-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
15190; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) + xmm2
15191; X64-NEXT:    retq # encoding: [0xc3]
15192  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
15193  ret <2 x double> %res
15194}
15195
15196declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15197
15198define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
15199; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
15200; X86:       # %bb.0:
15201; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15202; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15203; X86-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
15204; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
15205; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15206; X86-NEXT:    retl # encoding: [0xc3]
15207;
15208; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
15209; X64:       # %bb.0:
15210; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15211; X64-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
15212; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
15213; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15214; X64-NEXT:    retq # encoding: [0xc3]
15215  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
15216  ret <2 x double> %res
15217}
15218
15219declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15220
15221define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
15222; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
15223; X86:       # %bb.0:
15224; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15225; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15226; X86-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
15227; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
15228; X86-NEXT:    retl # encoding: [0xc3]
15229;
15230; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
15231; X64:       # %bb.0:
15232; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15233; X64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
15234; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
15235; X64-NEXT:    retq # encoding: [0xc3]
15236  %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
15237  ret <2 x double> %res
15238}
15239
15240declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15241
15242define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
15243; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
15244; X86:       # %bb.0:
15245; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15246; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15247; X86-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
15248; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
15249; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15250; X86-NEXT:    retl # encoding: [0xc3]
15251;
15252; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
15253; X64:       # %bb.0:
15254; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15255; X64-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
15256; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
15257; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15258; X64-NEXT:    retq # encoding: [0xc3]
15259  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
15260  ret <4 x double> %res
15261}
15262
15263declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15264
15265define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
15266; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
15267; X86:       # %bb.0:
15268; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15269; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15270; X86-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
15271; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
15272; X86-NEXT:    retl # encoding: [0xc3]
15273;
15274; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
15275; X64:       # %bb.0:
15276; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15277; X64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
15278; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
15279; X64-NEXT:    retq # encoding: [0xc3]
15280  %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
15281  ret <4 x double> %res
15282}
15283
15284declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
15285
15286define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
15287; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
15288; X86:       # %bb.0:
15289; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15290; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15291; X86-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
15292; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
15293; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
15294; X86-NEXT:    retl # encoding: [0xc3]
15295;
15296; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
15297; X64:       # %bb.0:
15298; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15299; X64-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
15300; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) + xmm2
15301; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
15302; X64-NEXT:    retq # encoding: [0xc3]
15303  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
15304  ret <4 x float> %res
15305}
15306
15307declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
15308
15309define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
15310; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
15311; X86:       # %bb.0:
15312; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15313; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15314; X86-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
15315; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
15316; X86-NEXT:    retl # encoding: [0xc3]
15317;
15318; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
15319; X64:       # %bb.0:
15320; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15321; X64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
15322; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2
15323; X64-NEXT:    retq # encoding: [0xc3]
15324  %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
15325  ret <4 x float> %res
15326}
15327
15328declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
15329
15330define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
15331; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
15332; X86:       # %bb.0:
15333; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15334; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15335; X86-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
15336; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
15337; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
15338; X86-NEXT:    retl # encoding: [0xc3]
15339;
15340; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
15341; X64:       # %bb.0:
15342; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15343; X64-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
15344; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) + ymm2
15345; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
15346; X64-NEXT:    retq # encoding: [0xc3]
15347  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
15348  ret <8 x float> %res
15349}
15350
15351declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
15352
15353define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
15354; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
15355; X86:       # %bb.0:
15356; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15357; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15358; X86-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
15359; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
15360; X86-NEXT:    retl # encoding: [0xc3]
15361;
15362; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
15363; X64:       # %bb.0:
15364; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15365; X64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
15366; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2
15367; X64-NEXT:    retq # encoding: [0xc3]
15368  %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
15369  ret <8 x float> %res
15370}
15371
15372
15373declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15374
15375define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
15376; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
15377; X86:       # %bb.0:
15378; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15379; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15380; X86-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
15381; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
15382; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15383; X86-NEXT:    retl # encoding: [0xc3]
15384;
15385; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
15386; X64:       # %bb.0:
15387; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15388; X64-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
15389; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
15390; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15391; X64-NEXT:    retq # encoding: [0xc3]
15392  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
15393  ret <2 x double> %res
15394}
15395
15396
15397declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15398
15399define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
15400; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
15401; X86:       # %bb.0:
15402; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15403; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15404; X86-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
15405; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
15406; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15407; X86-NEXT:    retl # encoding: [0xc3]
15408;
15409; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
15410; X64:       # %bb.0:
15411; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15412; X64-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
15413; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
15414; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15415; X64-NEXT:    retq # encoding: [0xc3]
15416  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
15417  ret <4 x double> %res
15418}
15419
15420declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
15421
15422define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
15423; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
15424; X86:       # %bb.0:
15425; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15426; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15427; X86-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
15428; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
15429; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
15430; X86-NEXT:    retl # encoding: [0xc3]
15431;
15432; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
15433; X64:       # %bb.0:
15434; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15435; X64-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
15436; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) - xmm2
15437; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
15438; X64-NEXT:    retq # encoding: [0xc3]
15439  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
15440  ret <4 x float> %res
15441}
15442
15443declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
15444
15445define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
15446; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
15447; X86:       # %bb.0:
15448; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15449; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15450; X86-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
15451; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
15452; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
15453; X86-NEXT:    retl # encoding: [0xc3]
15454;
15455; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
15456; X64:       # %bb.0:
15457; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15458; X64-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
15459; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) - ymm2
15460; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
15461; X64-NEXT:    retq # encoding: [0xc3]
15462  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
15463  ret <8 x float> %res
15464}
15465
15466declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
15467
15468define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
15469; CHECK-LABEL: test_vfnmadd256_ps:
15470; CHECK:       # %bb.0:
15471; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
15472; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
15473; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15474  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
15475  ret <8 x float> %res
15476}
15477
15478define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
15479; X86-LABEL: test_mask_vfnmadd256_ps:
15480; X86:       # %bb.0:
15481; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15482; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15483; X86-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
15484; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
15485; X86-NEXT:    retl # encoding: [0xc3]
15486;
15487; X64-LABEL: test_mask_vfnmadd256_ps:
15488; X64:       # %bb.0:
15489; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15490; X64-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
15491; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
15492; X64-NEXT:    retq # encoding: [0xc3]
15493  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
15494  ret <8 x float> %res
15495}
15496
15497declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
15498
15499define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
15500; CHECK-LABEL: test_vfnmadd128_ps:
15501; CHECK:       # %bb.0:
15502; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
15503; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
15504; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15505  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
15506  ret <4 x float> %res
15507}
15508
15509define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
15510; X86-LABEL: test_mask_vfnmadd128_ps:
15511; X86:       # %bb.0:
15512; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15513; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15514; X86-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
15515; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
15516; X86-NEXT:    retl # encoding: [0xc3]
15517;
15518; X64-LABEL: test_mask_vfnmadd128_ps:
15519; X64:       # %bb.0:
15520; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15521; X64-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
15522; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
15523; X64-NEXT:    retq # encoding: [0xc3]
15524  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
15525  ret <4 x float> %res
15526}
15527
15528declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
15529
15530define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
15531; CHECK-LABEL: test_vfnmadd256_pd:
15532; CHECK:       # %bb.0:
15533; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
15534; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
15535; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15536  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
15537  ret <4 x double> %res
15538}
15539
15540define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
15541; X86-LABEL: test_mask_vfnmadd256_pd:
15542; X86:       # %bb.0:
15543; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15544; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15545; X86-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
15546; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
15547; X86-NEXT:    retl # encoding: [0xc3]
15548;
15549; X64-LABEL: test_mask_vfnmadd256_pd:
15550; X64:       # %bb.0:
15551; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15552; X64-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
15553; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) + ymm2
15554; X64-NEXT:    retq # encoding: [0xc3]
15555  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
15556  ret <4 x double> %res
15557}
15558
15559declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
15560
15561define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
15562; CHECK-LABEL: test_vfnmadd128_pd:
15563; CHECK:       # %bb.0:
15564; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
15565; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
15566; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15567  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
15568  ret <2 x double> %res
15569}
15570
15571define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
15572; X86-LABEL: test_mask_vfnmadd128_pd:
15573; X86:       # %bb.0:
15574; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15575; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15576; X86-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
15577; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
15578; X86-NEXT:    retl # encoding: [0xc3]
15579;
15580; X64-LABEL: test_mask_vfnmadd128_pd:
15581; X64:       # %bb.0:
15582; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15583; X64-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
15584; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) + xmm2
15585; X64-NEXT:    retq # encoding: [0xc3]
15586  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
15587  ret <2 x double> %res
15588}
15589
15590declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
15591
15592define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
15593; CHECK-LABEL: test_vfnmsub256_ps:
15594; CHECK:       # %bb.0:
15595; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
15596; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
15597; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15598  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
15599  ret <8 x float> %res
15600}
15601
15602define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
15603; X86-LABEL: test_mask_vfnmsub256_ps:
15604; X86:       # %bb.0:
15605; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15606; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15607; X86-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
15608; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
15609; X86-NEXT:    retl # encoding: [0xc3]
15610;
15611; X64-LABEL: test_mask_vfnmsub256_ps:
15612; X64:       # %bb.0:
15613; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15614; X64-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
15615; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
15616; X64-NEXT:    retq # encoding: [0xc3]
15617  %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
15618  ret <8 x float> %res
15619}
15620
15621declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
15622
15623define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
15624; CHECK-LABEL: test_vfnmsub128_ps:
15625; CHECK:       # %bb.0:
15626; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
15627; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
15628; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15629  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
15630  ret <4 x float> %res
15631}
15632
15633define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
15634; X86-LABEL: test_mask_vfnmsub128_ps:
15635; X86:       # %bb.0:
15636; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15637; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15638; X86-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
15639; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
15640; X86-NEXT:    retl # encoding: [0xc3]
15641;
15642; X64-LABEL: test_mask_vfnmsub128_ps:
15643; X64:       # %bb.0:
15644; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15645; X64-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
15646; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
15647; X64-NEXT:    retq # encoding: [0xc3]
15648  %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
15649  ret <4 x float> %res
15650}
15651
15652declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
15653
15654define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
15655; CHECK-LABEL: test_vfnmsub256_pd:
15656; CHECK:       # %bb.0:
15657; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
15658; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
15659; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15660  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
15661  ret <4 x double> %res
15662}
15663
15664define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
15665; X86-LABEL: test_mask_vfnmsub256_pd:
15666; X86:       # %bb.0:
15667; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15668; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15669; X86-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
15670; X86-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
15671; X86-NEXT:    retl # encoding: [0xc3]
15672;
15673; X64-LABEL: test_mask_vfnmsub256_pd:
15674; X64:       # %bb.0:
15675; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15676; X64-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
15677; X64-NEXT:    # ymm0 {%k1} = -(ymm0 * ymm1) - ymm2
15678; X64-NEXT:    retq # encoding: [0xc3]
15679  %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
15680  ret <4 x double> %res
15681}
15682
15683declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
15684
15685define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
15686; CHECK-LABEL: test_vfnmsub128_pd:
15687; CHECK:       # %bb.0:
15688; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
15689; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
15690; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15691  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
15692  ret <2 x double> %res
15693}
15694
15695define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
15696; X86-LABEL: test_mask_vfnmsub128_pd:
15697; X86:       # %bb.0:
15698; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15699; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15700; X86-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
15701; X86-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
15702; X86-NEXT:    retl # encoding: [0xc3]
15703;
15704; X64-LABEL: test_mask_vfnmsub128_pd:
15705; X64:       # %bb.0:
15706; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15707; X64-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
15708; X64-NEXT:    # xmm0 {%k1} = -(xmm0 * xmm1) - xmm2
15709; X64-NEXT:    retq # encoding: [0xc3]
15710  %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
15711  ret <2 x double> %res
15712}
15713
15714declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15715
15716define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
15717; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
15718; X86:       # %bb.0:
15719; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15720; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15721; X86-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
15722; X86-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
15723; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15724; X86-NEXT:    retl # encoding: [0xc3]
15725;
15726; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
15727; X64:       # %bb.0:
15728; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15729; X64-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
15730; X64-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
15731; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15732; X64-NEXT:    retq # encoding: [0xc3]
15733  %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
15734  ret <2 x double> %res
15735}
15736
15737declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15738
15739define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
15740; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
15741; X86:       # %bb.0:
15742; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15743; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15744; X86-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
15745; X86-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
15746; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15747; X86-NEXT:    retl # encoding: [0xc3]
15748;
15749; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
15750; X64:       # %bb.0:
15751; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15752; X64-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
15753; X64-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
15754; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15755; X64-NEXT:    retq # encoding: [0xc3]
15756  %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
15757  ret <4 x double> %res
15758}
15759
15760declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
15761
15762define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
15763; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
15764; X86:       # %bb.0:
15765; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15766; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15767; X86-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
15768; X86-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
15769; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
15770; X86-NEXT:    retl # encoding: [0xc3]
15771;
15772; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
15773; X64:       # %bb.0:
15774; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15775; X64-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
15776; X64-NEXT:    # xmm2 {%k1} = -(xmm0 * xmm1) - xmm2
15777; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
15778; X64-NEXT:    retq # encoding: [0xc3]
15779  %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
15780  ret <4 x float> %res
15781}
15782
15783declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
15784
15785define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
15786; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
15787; X86:       # %bb.0:
15788; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15789; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15790; X86-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
15791; X86-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
15792; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
15793; X86-NEXT:    retl # encoding: [0xc3]
15794;
15795; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
15796; X64:       # %bb.0:
15797; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15798; X64-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
15799; X64-NEXT:    # ymm2 {%k1} = -(ymm0 * ymm1) - ymm2
15800; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
15801; X64-NEXT:    retq # encoding: [0xc3]
15802  %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
15803  ret <8 x float> %res
15804}
15805
15806declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
15807
15808define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
15809; CHECK-LABEL: test_fmaddsub256_ps:
15810; CHECK:       # %bb.0:
15811; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
15812; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
15813; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15814  %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1)
15815  ret <8 x float> %res
15816}
15817
15818define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
15819; X86-LABEL: test_mask_fmaddsub256_ps:
15820; X86:       # %bb.0:
15821; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15822; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15823; X86-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
15824; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
15825; X86-NEXT:    retl # encoding: [0xc3]
15826;
15827; X64-LABEL: test_mask_fmaddsub256_ps:
15828; X64:       # %bb.0:
15829; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15830; X64-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
15831; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
15832; X64-NEXT:    retq # encoding: [0xc3]
15833  %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
15834  ret <8 x float> %res
15835}
15836
15837declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
15838
15839define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
15840; CHECK-LABEL: test_fmaddsub128_ps:
15841; CHECK:       # %bb.0:
15842; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
15843; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
15844; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15845  %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 -1)
15846  ret <4 x float> %res
15847}
15848
15849define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
15850; X86-LABEL: test_mask_fmaddsub128_ps:
15851; X86:       # %bb.0:
15852; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15853; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15854; X86-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
15855; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
15856; X86-NEXT:    retl # encoding: [0xc3]
15857;
15858; X64-LABEL: test_mask_fmaddsub128_ps:
15859; X64:       # %bb.0:
15860; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15861; X64-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
15862; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
15863; X64-NEXT:    retq # encoding: [0xc3]
15864  %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
15865  ret <4 x float> %res
15866}
15867
15868declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
15869
15870define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
15871; CHECK-LABEL: test_vfmaddsub256_pd:
15872; CHECK:       # %bb.0:
15873; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
15874; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
15875; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15876  %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
15877  ret <4 x double> %res
15878}
15879
15880define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
15881; X86-LABEL: test_mask_vfmaddsub256_pd:
15882; X86:       # %bb.0:
15883; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15884; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15885; X86-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
15886; X86-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
15887; X86-NEXT:    retl # encoding: [0xc3]
15888;
15889; X64-LABEL: test_mask_vfmaddsub256_pd:
15890; X64:       # %bb.0:
15891; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15892; X64-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
15893; X64-NEXT:    # ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2
15894; X64-NEXT:    retq # encoding: [0xc3]
15895  %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
15896  ret <4 x double> %res
15897}
15898
15899declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
15900
15901define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
15902; CHECK-LABEL: test_vfmaddsub128_pd:
15903; CHECK:       # %bb.0:
15904; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
15905; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
15906; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
15907  %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
15908  ret <2 x double> %res
15909}
15910
15911define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
15912; X86-LABEL: test_mask_vfmaddsub128_pd:
15913; X86:       # %bb.0:
15914; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15915; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15916; X86-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
15917; X86-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
15918; X86-NEXT:    retl # encoding: [0xc3]
15919;
15920; X64-LABEL: test_mask_vfmaddsub128_pd:
15921; X64:       # %bb.0:
15922; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15923; X64-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
15924; X64-NEXT:    # xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2
15925; X64-NEXT:    retq # encoding: [0xc3]
15926  %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
15927  ret <2 x double> %res
15928}
15929
15930declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15931
15932define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
15933; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
15934; X86:       # %bb.0:
15935; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15936; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15937; X86-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
15938; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
15939; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15940; X86-NEXT:    retl # encoding: [0xc3]
15941;
15942; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
15943; X64:       # %bb.0:
15944; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15945; X64-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
15946; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
15947; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
15948; X64-NEXT:    retq # encoding: [0xc3]
15949  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
15950  ret <2 x double> %res
15951}
15952
15953declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
15954
15955define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
15956; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
15957; X86:       # %bb.0:
15958; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15959; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15960; X86-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
15961; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
15962; X86-NEXT:    retl # encoding: [0xc3]
15963;
15964; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
15965; X64:       # %bb.0:
15966; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15967; X64-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
15968; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
15969; X64-NEXT:    retq # encoding: [0xc3]
15970  %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
15971  ret <2 x double> %res
15972}
15973
15974declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15975
15976define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
15977; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
15978; X86:       # %bb.0:
15979; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
15980; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
15981; X86-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
15982; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
15983; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15984; X86-NEXT:    retl # encoding: [0xc3]
15985;
15986; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
15987; X64:       # %bb.0:
15988; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
15989; X64-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
15990; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
15991; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
15992; X64-NEXT:    retq # encoding: [0xc3]
15993  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
15994  ret <4 x double> %res
15995}
15996
15997declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
15998
15999define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
16000; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
16001; X86:       # %bb.0:
16002; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16003; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16004; X86-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
16005; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
16006; X86-NEXT:    retl # encoding: [0xc3]
16007;
16008; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
16009; X64:       # %bb.0:
16010; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16011; X64-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
16012; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
16013; X64-NEXT:    retq # encoding: [0xc3]
16014  %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
16015  ret <4 x double> %res
16016}
16017
16018declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
16019
16020define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
16021; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
16022; X86:       # %bb.0:
16023; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16024; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16025; X86-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
16026; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
16027; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
16028; X86-NEXT:    retl # encoding: [0xc3]
16029;
16030; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
16031; X64:       # %bb.0:
16032; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16033; X64-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
16034; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2
16035; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
16036; X64-NEXT:    retq # encoding: [0xc3]
16037  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
16038  ret <4 x float> %res
16039}
16040
16041declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
16042
16043define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
16044; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
16045; X86:       # %bb.0:
16046; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16047; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16048; X86-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
16049; X86-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
16050; X86-NEXT:    retl # encoding: [0xc3]
16051;
16052; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
16053; X64:       # %bb.0:
16054; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16055; X64-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
16056; X64-NEXT:    # xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2
16057; X64-NEXT:    retq # encoding: [0xc3]
16058  %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
16059  ret <4 x float> %res
16060}
16061
16062declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
16063
16064define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
16065; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
16066; X86:       # %bb.0:
16067; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16068; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16069; X86-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
16070; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
16071; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
16072; X86-NEXT:    retl # encoding: [0xc3]
16073;
16074; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
16075; X64:       # %bb.0:
16076; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16077; X64-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
16078; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2
16079; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
16080; X64-NEXT:    retq # encoding: [0xc3]
16081  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
16082  ret <8 x float> %res
16083}
16084
16085declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
16086
16087define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
16088; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
16089; X86:       # %bb.0:
16090; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16091; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16092; X86-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
16093; X86-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
16094; X86-NEXT:    retl # encoding: [0xc3]
16095;
16096; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
16097; X64:       # %bb.0:
16098; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16099; X64-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
16100; X64-NEXT:    # ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2
16101; X64-NEXT:    retq # encoding: [0xc3]
16102  %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
16103  ret <8 x float> %res
16104}
16105
16106declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
16107
16108define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
16109; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
16110; X86:       # %bb.0:
16111; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16112; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16113; X86-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
16114; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
16115; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
16116; X86-NEXT:    retl # encoding: [0xc3]
16117;
16118; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
16119; X64:       # %bb.0:
16120; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16121; X64-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
16122; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
16123; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
16124; X64-NEXT:    retq # encoding: [0xc3]
16125  %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
16126  ret <2 x double> %res
16127}
16128
16129declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
16130
16131define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
16132; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
16133; X86:       # %bb.0:
16134; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16135; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16136; X86-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
16137; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
16138; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
16139; X86-NEXT:    retl # encoding: [0xc3]
16140;
16141; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
16142; X64:       # %bb.0:
16143; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16144; X64-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
16145; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
16146; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
16147; X64-NEXT:    retq # encoding: [0xc3]
16148  %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
16149  ret <4 x double> %res
16150}
16151
16152declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
16153
16154define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
16155; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
16156; X86:       # %bb.0:
16157; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16158; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16159; X86-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
16160; X86-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
16161; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
16162; X86-NEXT:    retl # encoding: [0xc3]
16163;
16164; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
16165; X64:       # %bb.0:
16166; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16167; X64-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
16168; X64-NEXT:    # xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2
16169; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
16170; X64-NEXT:    retq # encoding: [0xc3]
16171  %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
16172  ret <4 x float> %res
16173}
16174
16175declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
16176
16177define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
16178; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
16179; X86:       # %bb.0:
16180; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16181; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16182; X86-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
16183; X86-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
16184; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
16185; X86-NEXT:    retl # encoding: [0xc3]
16186;
16187; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
16188; X64:       # %bb.0:
16189; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16190; X64-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
16191; X64-NEXT:    # ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2
16192; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
16193; X64-NEXT:    retq # encoding: [0xc3]
16194  %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
16195  ret <8 x float> %res
16196}
16197
16198
16199define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
16200; X86-LABEL: test_mask_vfmadd128_ps_rmk:
16201; X86:       # %bb.0:
16202; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16203; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
16204; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
16205; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
16206; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16207; X86-NEXT:    retl # encoding: [0xc3]
16208;
16209; X64-LABEL: test_mask_vfmadd128_ps_rmk:
16210; X64:       # %bb.0:
16211; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
16212; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
16213; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16214; X64-NEXT:    retq # encoding: [0xc3]
16215  %a2 = load <4 x float>, <4 x float>* %ptr_a2
16216  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
16217  ret <4 x float> %res
16218}
16219
16220define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
16221; X86-LABEL: test_mask_vfmadd128_ps_rmka:
16222; X86:       # %bb.0:
16223; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16224; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
16225; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
16226; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
16227; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16228; X86-NEXT:    retl # encoding: [0xc3]
16229;
16230; X64-LABEL: test_mask_vfmadd128_ps_rmka:
16231; X64:       # %bb.0:
16232; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
16233; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
16234; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16235; X64-NEXT:    retq # encoding: [0xc3]
16236  %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
16237  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
16238  ret <4 x float> %res
16239}
16240
16241define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
16242; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
16243; X86:       # %bb.0:
16244; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16245; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
16246; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16247; X86-NEXT:    retl # encoding: [0xc3]
16248;
16249; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
16250; X64:       # %bb.0:
16251; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
16252; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16253; X64-NEXT:    retq # encoding: [0xc3]
16254  %a2 = load <4 x float>, <4 x float>* %ptr_a2
16255  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
16256  ret <4 x float> %res
16257}
16258
16259define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
16260; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
16261; X86:       # %bb.0:
16262; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16263; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
16264; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16265; X86-NEXT:    retl # encoding: [0xc3]
16266;
16267; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
16268; X64:       # %bb.0:
16269; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
16270; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16271; X64-NEXT:    retq # encoding: [0xc3]
16272  %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
16273  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
16274  ret <4 x float> %res
16275}
16276
16277define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
16278; X86-LABEL: test_mask_vfmadd128_ps_rmb:
16279; X86:       # %bb.0:
16280; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16281; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
16282; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
16283; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
16284; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16285; X86-NEXT:    retl # encoding: [0xc3]
16286;
16287; X64-LABEL: test_mask_vfmadd128_ps_rmb:
16288; X64:       # %bb.0:
16289; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
16290; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
16291; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16292; X64-NEXT:    retq # encoding: [0xc3]
16293  %q = load float, float* %ptr_a2
16294  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
16295  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
16296  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
16297  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
16298  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
16299  ret <4 x float> %res
16300}
16301
16302define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
16303; X86-LABEL: test_mask_vfmadd128_ps_rmba:
16304; X86:       # %bb.0:
16305; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16306; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
16307; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
16308; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
16309; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16310; X86-NEXT:    retl # encoding: [0xc3]
16311;
16312; X64-LABEL: test_mask_vfmadd128_ps_rmba:
16313; X64:       # %bb.0:
16314; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
16315; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
16316; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16317; X64-NEXT:    retq # encoding: [0xc3]
16318  %q = load float, float* %ptr_a2, align 4
16319  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
16320  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
16321  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
16322  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
16323  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
16324  ret <4 x float> %res
16325}
16326
16327define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
16328; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
16329; X86:       # %bb.0:
16330; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16331; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
16332; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16333; X86-NEXT:    retl # encoding: [0xc3]
16334;
16335; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
16336; X64:       # %bb.0:
16337; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
16338; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16339; X64-NEXT:    retq # encoding: [0xc3]
16340  %q = load float, float* %ptr_a2
16341  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
16342  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
16343  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
16344  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
16345  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
16346  ret <4 x float> %res
16347}
16348
16349define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
16350; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
16351; X86:       # %bb.0:
16352; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16353; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
16354; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16355; X86-NEXT:    retl # encoding: [0xc3]
16356;
16357; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
16358; X64:       # %bb.0:
16359; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
16360; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16361; X64-NEXT:    retq # encoding: [0xc3]
16362  %q = load float, float* %ptr_a2, align 4
16363  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
16364  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
16365  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
16366  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
16367  %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
16368  ret <4 x float> %res
16369}
16370
16371define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
16372; X86-LABEL: test_mask_vfmadd128_pd_rmk:
16373; X86:       # %bb.0:
16374; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16375; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
16376; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
16377; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
16378; X86-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16379; X86-NEXT:    retl # encoding: [0xc3]
16380;
16381; X64-LABEL: test_mask_vfmadd128_pd_rmk:
16382; X64:       # %bb.0:
16383; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
16384; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
16385; X64-NEXT:    # xmm0 {%k1} = (xmm1 * xmm0) + mem
16386; X64-NEXT:    retq # encoding: [0xc3]
16387  %a2 = load <2 x double>, <2 x double>* %ptr_a2
16388  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
16389  ret <2 x double> %res
16390}
16391
16392define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
16393; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
16394; X86:       # %bb.0:
16395; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16396; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
16397; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16398; X86-NEXT:    retl # encoding: [0xc3]
16399;
16400; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
16401; X64:       # %bb.0:
16402; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
16403; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
16404; X64-NEXT:    retq # encoding: [0xc3]
16405  %a2 = load <2 x double>, <2 x double>* %ptr_a2
16406  %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
16407  ret <2 x double> %res
16408}
16409
16410define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
16411; X86-LABEL: test_mask_vfmadd256_pd_rmk:
16412; X86:       # %bb.0:
16413; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16414; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
16415; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
16416; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
16417; X86-NEXT:    # ymm0 {%k1} = (ymm1 * ymm0) + mem
16418; X86-NEXT:    retl # encoding: [0xc3]
16419;
16420; X64-LABEL: test_mask_vfmadd256_pd_rmk:
16421; X64:       # %bb.0:
16422; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
16423; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
16424; X64-NEXT:    # ymm0 {%k1} = (ymm1 * ymm0) + mem
16425; X64-NEXT:    retq # encoding: [0xc3]
16426  %a2 = load <4 x double>, <4 x double>* %ptr_a2
16427  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
16428  ret <4 x double> %res
16429}
16430
16431define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
16432; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
16433; X86:       # %bb.0:
16434; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
16435; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
16436; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
16437; X86-NEXT:    retl # encoding: [0xc3]
16438;
16439; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
16440; X64:       # %bb.0:
16441; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
16442; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
16443; X64-NEXT:    retq # encoding: [0xc3]
16444  %a2 = load <4 x double>, <4 x double>* %ptr_a2
16445  %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
16446  ret <4 x double> %res
16447}
16448
16449declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
16450
16451define <4 x i32>@test_int_x86_avx512_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1) {
16452; CHECK-LABEL: test_int_x86_avx512_pmov_qd_256:
16453; CHECK:       # %bb.0:
16454; CHECK-NEXT:    vpmovqd %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x35,0xc0]
16455; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
16456; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16457  %res = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
16458  ret <4 x i32> %res
16459}
16460
16461define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
16462; X86-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
16463; X86:       # %bb.0:
16464; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16465; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16466; X86-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
16467; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16468; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
16469; X86-NEXT:    retl # encoding: [0xc3]
16470;
16471; X64-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
16472; X64:       # %bb.0:
16473; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16474; X64-NEXT:    vpmovqd %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x35,0xc1]
16475; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16476; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
16477; X64-NEXT:    retq # encoding: [0xc3]
16478  %res = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
16479  ret <4 x i32> %res
16480}
16481
16482define <4 x i32>@test_int_x86_avx512_maskz_pmov_qd_256(<4 x i64> %x0, i8 %x2) {
16483; X86-LABEL: test_int_x86_avx512_maskz_pmov_qd_256:
16484; X86:       # %bb.0:
16485; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16486; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16487; X86-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
16488; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
16489; X86-NEXT:    retl # encoding: [0xc3]
16490;
16491; X64-LABEL: test_int_x86_avx512_maskz_pmov_qd_256:
16492; X64:       # %bb.0:
16493; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16494; X64-NEXT:    vpmovqd %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x35,0xc0]
16495; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
16496; X64-NEXT:    retq # encoding: [0xc3]
16497  %res = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
16498  ret <4 x i32> %res
16499}
16500
16501define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
16502; X86-LABEL: test_mask_compress_pd_128:
16503; X86:       # %bb.0:
16504; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16505; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16506; X86-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
16507; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16508; X86-NEXT:    retl # encoding: [0xc3]
16509;
16510; X64-LABEL: test_mask_compress_pd_128:
16511; X64:       # %bb.0:
16512; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16513; X64-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
16514; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16515; X64-NEXT:    retq # encoding: [0xc3]
16516  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
16517  ret <2 x double> %res
16518}
16519
16520define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
16521; X86-LABEL: test_maskz_compress_pd_128:
16522; X86:       # %bb.0:
16523; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16524; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16525; X86-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
16526; X86-NEXT:    retl # encoding: [0xc3]
16527;
16528; X64-LABEL: test_maskz_compress_pd_128:
16529; X64:       # %bb.0:
16530; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16531; X64-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
16532; X64-NEXT:    retq # encoding: [0xc3]
16533  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
16534  ret <2 x double> %res
16535}
16536
16537define <2 x double> @test_compress_pd_128(<2 x double> %data) {
16538; CHECK-LABEL: test_compress_pd_128:
16539; CHECK:       # %bb.0:
16540; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16541  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
16542  ret <2 x double> %res
16543}
16544
16545declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
16546
16547define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
16548; X86-LABEL: test_mask_compress_ps_128:
16549; X86:       # %bb.0:
16550; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16551; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16552; X86-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
16553; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16554; X86-NEXT:    retl # encoding: [0xc3]
16555;
16556; X64-LABEL: test_mask_compress_ps_128:
16557; X64:       # %bb.0:
16558; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16559; X64-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
16560; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16561; X64-NEXT:    retq # encoding: [0xc3]
16562  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
16563  ret <4 x float> %res
16564}
16565
16566define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
16567; X86-LABEL: test_maskz_compress_ps_128:
16568; X86:       # %bb.0:
16569; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16570; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16571; X86-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
16572; X86-NEXT:    retl # encoding: [0xc3]
16573;
16574; X64-LABEL: test_maskz_compress_ps_128:
16575; X64:       # %bb.0:
16576; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16577; X64-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
16578; X64-NEXT:    retq # encoding: [0xc3]
16579  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
16580  ret <4 x float> %res
16581}
16582
16583define <4 x float> @test_compress_ps_128(<4 x float> %data) {
16584; CHECK-LABEL: test_compress_ps_128:
16585; CHECK:       # %bb.0:
16586; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16587  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
16588  ret <4 x float> %res
16589}
16590
16591declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
16592
16593define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
16594; X86-LABEL: test_mask_compress_q_128:
16595; X86:       # %bb.0:
16596; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16597; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16598; X86-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
16599; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16600; X86-NEXT:    retl # encoding: [0xc3]
16601;
16602; X64-LABEL: test_mask_compress_q_128:
16603; X64:       # %bb.0:
16604; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16605; X64-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
16606; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16607; X64-NEXT:    retq # encoding: [0xc3]
16608  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
16609  ret <2 x i64> %res
16610}
16611
16612define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
16613; X86-LABEL: test_maskz_compress_q_128:
16614; X86:       # %bb.0:
16615; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16616; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16617; X86-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
16618; X86-NEXT:    retl # encoding: [0xc3]
16619;
16620; X64-LABEL: test_maskz_compress_q_128:
16621; X64:       # %bb.0:
16622; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16623; X64-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
16624; X64-NEXT:    retq # encoding: [0xc3]
16625  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
16626  ret <2 x i64> %res
16627}
16628
16629define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
16630; CHECK-LABEL: test_compress_q_128:
16631; CHECK:       # %bb.0:
16632; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16633  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
16634  ret <2 x i64> %res
16635}
16636
16637declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
16638
16639define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
16640; X86-LABEL: test_mask_compress_d_128:
16641; X86:       # %bb.0:
16642; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16643; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16644; X86-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
16645; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16646; X86-NEXT:    retl # encoding: [0xc3]
16647;
16648; X64-LABEL: test_mask_compress_d_128:
16649; X64:       # %bb.0:
16650; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16651; X64-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
16652; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16653; X64-NEXT:    retq # encoding: [0xc3]
16654  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
16655  ret <4 x i32> %res
16656}
16657
16658define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
16659; X86-LABEL: test_maskz_compress_d_128:
16660; X86:       # %bb.0:
16661; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16662; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16663; X86-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
16664; X86-NEXT:    retl # encoding: [0xc3]
16665;
16666; X64-LABEL: test_maskz_compress_d_128:
16667; X64:       # %bb.0:
16668; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16669; X64-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
16670; X64-NEXT:    retq # encoding: [0xc3]
16671  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
16672  ret <4 x i32> %res
16673}
16674
16675define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
16676; CHECK-LABEL: test_compress_d_128:
16677; CHECK:       # %bb.0:
16678; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16679  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
16680  ret <4 x i32> %res
16681}
16682
16683declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
16684
16685define <2 x double> @test_expand_pd_128(<2 x double> %data) {
16686; CHECK-LABEL: test_expand_pd_128:
16687; CHECK:       # %bb.0:
16688; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16689  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
16690  ret <2 x double> %res
16691}
16692
16693define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
16694; X86-LABEL: test_mask_expand_pd_128:
16695; X86:       # %bb.0:
16696; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16697; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16698; X86-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
16699; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16700; X86-NEXT:    retl # encoding: [0xc3]
16701;
16702; X64-LABEL: test_mask_expand_pd_128:
16703; X64:       # %bb.0:
16704; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16705; X64-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
16706; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16707; X64-NEXT:    retq # encoding: [0xc3]
16708  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
16709  ret <2 x double> %res
16710}
16711
16712define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
16713; X86-LABEL: test_maskz_expand_pd_128:
16714; X86:       # %bb.0:
16715; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16716; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16717; X86-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
16718; X86-NEXT:    retl # encoding: [0xc3]
16719;
16720; X64-LABEL: test_maskz_expand_pd_128:
16721; X64:       # %bb.0:
16722; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16723; X64-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
16724; X64-NEXT:    retq # encoding: [0xc3]
16725  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
16726  ret <2 x double> %res
16727}
16728
16729declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
16730
16731define <4 x float> @test_expand_ps_128(<4 x float> %data) {
16732; CHECK-LABEL: test_expand_ps_128:
16733; CHECK:       # %bb.0:
16734; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16735  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
16736  ret <4 x float> %res
16737}
16738
16739define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
16740; X86-LABEL: test_mask_expand_ps_128:
16741; X86:       # %bb.0:
16742; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16743; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16744; X86-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
16745; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16746; X86-NEXT:    retl # encoding: [0xc3]
16747;
16748; X64-LABEL: test_mask_expand_ps_128:
16749; X64:       # %bb.0:
16750; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16751; X64-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
16752; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16753; X64-NEXT:    retq # encoding: [0xc3]
16754  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
16755  ret <4 x float> %res
16756}
16757
16758define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
16759; X86-LABEL: test_maskz_expand_ps_128:
16760; X86:       # %bb.0:
16761; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16762; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16763; X86-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
16764; X86-NEXT:    retl # encoding: [0xc3]
16765;
16766; X64-LABEL: test_maskz_expand_ps_128:
16767; X64:       # %bb.0:
16768; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16769; X64-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
16770; X64-NEXT:    retq # encoding: [0xc3]
16771  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
16772  ret <4 x float> %res
16773}
16774
16775declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
16776
16777define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
16778; CHECK-LABEL: test_expand_q_128:
16779; CHECK:       # %bb.0:
16780; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16781  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
16782  ret <2 x i64> %res
16783}
16784
16785define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
16786; X86-LABEL: test_mask_expand_q_128:
16787; X86:       # %bb.0:
16788; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16789; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16790; X86-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
16791; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16792; X86-NEXT:    retl # encoding: [0xc3]
16793;
16794; X64-LABEL: test_mask_expand_q_128:
16795; X64:       # %bb.0:
16796; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16797; X64-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
16798; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16799; X64-NEXT:    retq # encoding: [0xc3]
16800  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
16801  ret <2 x i64> %res
16802}
16803
16804define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
16805; X86-LABEL: test_maskz_expand_q_128:
16806; X86:       # %bb.0:
16807; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16808; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16809; X86-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
16810; X86-NEXT:    retl # encoding: [0xc3]
16811;
16812; X64-LABEL: test_maskz_expand_q_128:
16813; X64:       # %bb.0:
16814; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16815; X64-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
16816; X64-NEXT:    retq # encoding: [0xc3]
16817  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
16818  ret <2 x i64> %res
16819}
16820
16821declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
16822
16823define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
16824; CHECK-LABEL: test_expand_d_128:
16825; CHECK:       # %bb.0:
16826; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16827  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
16828  ret <4 x i32> %res
16829}
16830
16831define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
16832; X86-LABEL: test_mask_expand_d_128:
16833; X86:       # %bb.0:
16834; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16835; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16836; X86-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
16837; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16838; X86-NEXT:    retl # encoding: [0xc3]
16839;
16840; X64-LABEL: test_mask_expand_d_128:
16841; X64:       # %bb.0:
16842; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16843; X64-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
16844; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
16845; X64-NEXT:    retq # encoding: [0xc3]
16846  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
16847  ret <4 x i32> %res
16848}
16849
16850define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
16851; X86-LABEL: test_maskz_expand_d_128:
16852; X86:       # %bb.0:
16853; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16854; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16855; X86-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
16856; X86-NEXT:    retl # encoding: [0xc3]
16857;
16858; X64-LABEL: test_maskz_expand_d_128:
16859; X64:       # %bb.0:
16860; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16861; X64-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
16862; X64-NEXT:    retq # encoding: [0xc3]
16863  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
16864  ret <4 x i32> %res
16865}
16866
16867declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
16868
16869define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
16870; X86-LABEL: test_mask_compress_pd_256:
16871; X86:       # %bb.0:
16872; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16873; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16874; X86-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
16875; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
16876; X86-NEXT:    retl # encoding: [0xc3]
16877;
16878; X64-LABEL: test_mask_compress_pd_256:
16879; X64:       # %bb.0:
16880; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16881; X64-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
16882; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
16883; X64-NEXT:    retq # encoding: [0xc3]
16884  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
16885  ret <4 x double> %res
16886}
16887
16888define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
16889; X86-LABEL: test_maskz_compress_pd_256:
16890; X86:       # %bb.0:
16891; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16892; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16893; X86-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
16894; X86-NEXT:    retl # encoding: [0xc3]
16895;
16896; X64-LABEL: test_maskz_compress_pd_256:
16897; X64:       # %bb.0:
16898; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16899; X64-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
16900; X64-NEXT:    retq # encoding: [0xc3]
16901  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
16902  ret <4 x double> %res
16903}
16904
16905define <4 x double> @test_compress_pd_256(<4 x double> %data) {
16906; CHECK-LABEL: test_compress_pd_256:
16907; CHECK:       # %bb.0:
16908; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16909  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
16910  ret <4 x double> %res
16911}
16912
16913declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
16914
16915define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
16916; X86-LABEL: test_mask_compress_ps_256:
16917; X86:       # %bb.0:
16918; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16919; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16920; X86-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
16921; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
16922; X86-NEXT:    retl # encoding: [0xc3]
16923;
16924; X64-LABEL: test_mask_compress_ps_256:
16925; X64:       # %bb.0:
16926; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16927; X64-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
16928; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
16929; X64-NEXT:    retq # encoding: [0xc3]
16930  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
16931  ret <8 x float> %res
16932}
16933
16934define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
16935; X86-LABEL: test_maskz_compress_ps_256:
16936; X86:       # %bb.0:
16937; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16938; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16939; X86-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
16940; X86-NEXT:    retl # encoding: [0xc3]
16941;
16942; X64-LABEL: test_maskz_compress_ps_256:
16943; X64:       # %bb.0:
16944; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16945; X64-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
16946; X64-NEXT:    retq # encoding: [0xc3]
16947  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
16948  ret <8 x float> %res
16949}
16950
16951define <8 x float> @test_compress_ps_256(<8 x float> %data) {
16952; CHECK-LABEL: test_compress_ps_256:
16953; CHECK:       # %bb.0:
16954; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
16955  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
16956  ret <8 x float> %res
16957}
16958
16959declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
16960
16961define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
16962; X86-LABEL: test_mask_compress_q_256:
16963; X86:       # %bb.0:
16964; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16965; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16966; X86-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
16967; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
16968; X86-NEXT:    retl # encoding: [0xc3]
16969;
16970; X64-LABEL: test_mask_compress_q_256:
16971; X64:       # %bb.0:
16972; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16973; X64-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
16974; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
16975; X64-NEXT:    retq # encoding: [0xc3]
16976  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
16977  ret <4 x i64> %res
16978}
16979
16980define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
16981; X86-LABEL: test_maskz_compress_q_256:
16982; X86:       # %bb.0:
16983; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
16984; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
16985; X86-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
16986; X86-NEXT:    retl # encoding: [0xc3]
16987;
16988; X64-LABEL: test_maskz_compress_q_256:
16989; X64:       # %bb.0:
16990; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
16991; X64-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
16992; X64-NEXT:    retq # encoding: [0xc3]
16993  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
16994  ret <4 x i64> %res
16995}
16996
16997define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
16998; CHECK-LABEL: test_compress_q_256:
16999; CHECK:       # %bb.0:
17000; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17001  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
17002  ret <4 x i64> %res
17003}
17004
17005declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
17006
17007define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
17008; X86-LABEL: test_mask_compress_d_256:
17009; X86:       # %bb.0:
17010; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17011; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17012; X86-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
17013; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17014; X86-NEXT:    retl # encoding: [0xc3]
17015;
17016; X64-LABEL: test_mask_compress_d_256:
17017; X64:       # %bb.0:
17018; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17019; X64-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
17020; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17021; X64-NEXT:    retq # encoding: [0xc3]
17022  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
17023  ret <8 x i32> %res
17024}
17025
17026define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
17027; X86-LABEL: test_maskz_compress_d_256:
17028; X86:       # %bb.0:
17029; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17030; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17031; X86-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
17032; X86-NEXT:    retl # encoding: [0xc3]
17033;
17034; X64-LABEL: test_maskz_compress_d_256:
17035; X64:       # %bb.0:
17036; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17037; X64-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
17038; X64-NEXT:    retq # encoding: [0xc3]
17039  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
17040  ret <8 x i32> %res
17041}
17042
17043define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
17044; CHECK-LABEL: test_compress_d_256:
17045; CHECK:       # %bb.0:
17046; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17047  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
17048  ret <8 x i32> %res
17049}
17050
17051declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
17052
17053define <4 x double> @test_expand_pd_256(<4 x double> %data) {
17054; CHECK-LABEL: test_expand_pd_256:
17055; CHECK:       # %bb.0:
17056; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17057  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
17058  ret <4 x double> %res
17059}
17060
17061define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
17062; X86-LABEL: test_mask_expand_pd_256:
17063; X86:       # %bb.0:
17064; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17065; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17066; X86-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
17067; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17068; X86-NEXT:    retl # encoding: [0xc3]
17069;
17070; X64-LABEL: test_mask_expand_pd_256:
17071; X64:       # %bb.0:
17072; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17073; X64-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
17074; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17075; X64-NEXT:    retq # encoding: [0xc3]
17076  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
17077  ret <4 x double> %res
17078}
17079
17080define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
17081; X86-LABEL: test_maskz_expand_pd_256:
17082; X86:       # %bb.0:
17083; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17084; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17085; X86-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
17086; X86-NEXT:    retl # encoding: [0xc3]
17087;
17088; X64-LABEL: test_maskz_expand_pd_256:
17089; X64:       # %bb.0:
17090; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17091; X64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
17092; X64-NEXT:    retq # encoding: [0xc3]
17093  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
17094  ret <4 x double> %res
17095}
17096
17097declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
17098
17099define <8 x float> @test_expand_ps_256(<8 x float> %data) {
17100; CHECK-LABEL: test_expand_ps_256:
17101; CHECK:       # %bb.0:
17102; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17103  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
17104  ret <8 x float> %res
17105}
17106
17107define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
17108; X86-LABEL: test_mask_expand_ps_256:
17109; X86:       # %bb.0:
17110; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17111; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17112; X86-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
17113; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17114; X86-NEXT:    retl # encoding: [0xc3]
17115;
17116; X64-LABEL: test_mask_expand_ps_256:
17117; X64:       # %bb.0:
17118; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17119; X64-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
17120; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17121; X64-NEXT:    retq # encoding: [0xc3]
17122  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
17123  ret <8 x float> %res
17124}
17125
17126define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
17127; X86-LABEL: test_maskz_expand_ps_256:
17128; X86:       # %bb.0:
17129; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17130; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17131; X86-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
17132; X86-NEXT:    retl # encoding: [0xc3]
17133;
17134; X64-LABEL: test_maskz_expand_ps_256:
17135; X64:       # %bb.0:
17136; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17137; X64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
17138; X64-NEXT:    retq # encoding: [0xc3]
17139  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
17140  ret <8 x float> %res
17141}
17142
17143declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
17144
17145define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
17146; CHECK-LABEL: test_expand_q_256:
17147; CHECK:       # %bb.0:
17148; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17149  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
17150  ret <4 x i64> %res
17151}
17152
17153define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
17154; X86-LABEL: test_mask_expand_q_256:
17155; X86:       # %bb.0:
17156; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17157; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17158; X86-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
17159; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17160; X86-NEXT:    retl # encoding: [0xc3]
17161;
17162; X64-LABEL: test_mask_expand_q_256:
17163; X64:       # %bb.0:
17164; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17165; X64-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
17166; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17167; X64-NEXT:    retq # encoding: [0xc3]
17168  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
17169  ret <4 x i64> %res
17170}
17171
17172define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
17173; X86-LABEL: test_maskz_expand_q_256:
17174; X86:       # %bb.0:
17175; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17176; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17177; X86-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
17178; X86-NEXT:    retl # encoding: [0xc3]
17179;
17180; X64-LABEL: test_maskz_expand_q_256:
17181; X64:       # %bb.0:
17182; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17183; X64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
17184; X64-NEXT:    retq # encoding: [0xc3]
17185  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
17186  ret <4 x i64> %res
17187}
17188
17189declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
17190
17191define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
17192; CHECK-LABEL: test_expand_d_256:
17193; CHECK:       # %bb.0:
17194; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
17195  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
17196  ret <8 x i32> %res
17197}
17198
17199define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
17200; X86-LABEL: test_mask_expand_d_256:
17201; X86:       # %bb.0:
17202; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17203; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17204; X86-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
17205; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17206; X86-NEXT:    retl # encoding: [0xc3]
17207;
17208; X64-LABEL: test_mask_expand_d_256:
17209; X64:       # %bb.0:
17210; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17211; X64-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
17212; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
17213; X64-NEXT:    retq # encoding: [0xc3]
17214  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
17215  ret <8 x i32> %res
17216}
17217
17218define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
17219; X86-LABEL: test_maskz_expand_d_256:
17220; X86:       # %bb.0:
17221; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
17222; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
17223; X86-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
17224; X86-NEXT:    retl # encoding: [0xc3]
17225;
17226; X64-LABEL: test_maskz_expand_d_256:
17227; X64:       # %bb.0:
17228; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
17229; X64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
17230; X64-NEXT:    retq # encoding: [0xc3]
17231  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
17232  ret <8 x i32> %res
17233}
17234
17235declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
17236
17237define void @test_cmp_128(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* %p) {
17238; X86-LABEL: test_cmp_128:
17239; X86:       # %bb.0: # %entry
17240; X86-NEXT:    pushl %ebp # encoding: [0x55]
17241; X86-NEXT:    .cfi_def_cfa_offset 8
17242; X86-NEXT:    .cfi_offset %ebp, -8
17243; X86-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
17244; X86-NEXT:    .cfi_def_cfa_register %ebp
17245; X86-NEXT:    andl $-16, %esp # encoding: [0x83,0xe4,0xf0]
17246; X86-NEXT:    subl $16, %esp # encoding: [0x83,0xec,0x10]
17247; X86-NEXT:    movl 24(%ebp), %eax # encoding: [0x8b,0x45,0x18]
17248; X86-NEXT:    vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01]
17249; X86-NEXT:    vcmpltps 8(%ebp), %xmm2, %k1 # encoding: [0x62,0xf1,0x6c,0x08,0xc2,0x8d,0x08,0x00,0x00,0x00,0x01]
17250; X86-NEXT:    kshiftlw $4, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x04]
17251; X86-NEXT:    korw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x45,0xc9]
17252; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
17253; X86-NEXT:    vmovaps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x00]
17254; X86-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
17255; X86-NEXT:    popl %ebp # encoding: [0x5d]
17256; X86-NEXT:    .cfi_def_cfa %esp, 4
17257; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
17258; X86-NEXT:    retl # encoding: [0xc3]
17259;
17260; X64-LABEL: test_cmp_128:
17261; X64:       # %bb.0: # %entry
17262; X64-NEXT:    vcmpltps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x01]
17263; X64-NEXT:    vcmpltps %xmm3, %xmm2, %k1 # encoding: [0x62,0xf1,0x6c,0x08,0xc2,0xcb,0x01]
17264; X64-NEXT:    kshiftlw $4, %k1, %k1 # encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x04]
17265; X64-NEXT:    korw %k1, %k0, %k1 # encoding: [0xc5,0xfc,0x45,0xc9]
17266; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
17267; X64-NEXT:    vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
17268; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
17269; X64-NEXT:    retq # encoding: [0xc3]
17270 entry:
17271   %0 = tail call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 1)
17272   %1 = tail call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %c, <4 x float> %d, i32 1)
17273   %2 = bitcast float* %p to <8 x float>*
17274   %3 = shufflevector <4 x i1> %0, <4 x i1> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
17275   tail call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> zeroinitializer, <8 x float>* %2, i32 64, <8 x i1> %3)
17276   ret void
17277}
17278
17279define void @test_cmp_256(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) {
17280; X86-LABEL: test_cmp_256:
17281; X86:       # %bb.0: # %entry
17282; X86-NEXT:    pushl %ebp # encoding: [0x55]
17283; X86-NEXT:    .cfi_def_cfa_offset 8
17284; X86-NEXT:    .cfi_offset %ebp, -8
17285; X86-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
17286; X86-NEXT:    .cfi_def_cfa_register %ebp
17287; X86-NEXT:    andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
17288; X86-NEXT:    subl $32, %esp # encoding: [0x83,0xec,0x20]
17289; X86-NEXT:    movl 40(%ebp), %eax # encoding: [0x8b,0x45,0x28]
17290; X86-NEXT:    vcmpltps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x01]
17291; X86-NEXT:    vcmpltps 8(%ebp), %ymm2, %k1 # encoding: [0x62,0xf1,0x6c,0x28,0xc2,0x8d,0x08,0x00,0x00,0x00,0x01]
17292; X86-NEXT:    kunpckbw %k0, %k1, %k1 # encoding: [0xc5,0xf5,0x4b,0xc8]
17293; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
17294; X86-NEXT:    vmovaps %zmm0, (%eax) {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x29,0x00]
17295; X86-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
17296; X86-NEXT:    popl %ebp # encoding: [0x5d]
17297; X86-NEXT:    .cfi_def_cfa %esp, 4
17298; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
17299; X86-NEXT:    retl # encoding: [0xc3]
17300;
17301; X64-LABEL: test_cmp_256:
17302; X64:       # %bb.0: # %entry
17303; X64-NEXT:    vcmpltps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x01]
17304; X64-NEXT:    vcmpltps %ymm3, %ymm2, %k1 # encoding: [0x62,0xf1,0x6c,0x28,0xc2,0xcb,0x01]
17305; X64-NEXT:    kunpckbw %k0, %k1, %k1 # encoding: [0xc5,0xf5,0x4b,0xc8]
17306; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0]
17307; X64-NEXT:    vmovaps %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07]
17308; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
17309; X64-NEXT:    retq # encoding: [0xc3]
17310 entry:
17311   %0 = tail call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1)
17312   %1 = tail call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1)
17313   %2 = bitcast float* %p to <16 x float>*
17314   %3 = shufflevector <8 x i1> %0, <8 x i1> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
17315   tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3)
17316   ret void
17317}
17318
17319declare <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float>, <4 x float>, i32)
17320declare <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float>, <8 x float>, i32)
17321declare void @llvm.masked.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, i32, <8 x i1>)
17322declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
17323