1 use crate::{
2     core_arch::{simd::*, simd_llvm::*, x86::*},
3     mem::{self, transmute},
4     ptr,
5 };
6 
7 #[cfg(test)]
8 use stdarch_test::assert_instr;
9 
10 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
11 ///
12 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
13 #[inline]
14 #[target_feature(enable = "avx512bw")]
15 #[cfg_attr(test, assert_instr(vpabsw))]
_mm512_abs_epi16(a: __m512i) -> __m512i16 pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i {
17     let a = a.as_i16x32();
18     // all-0 is a properly initialized i16x32
19     let zero: i16x32 = mem::zeroed();
20     let sub = simd_sub(zero, a);
21     let cmp: i16x32 = simd_gt(a, zero);
22     transmute(simd_select(cmp, a, sub))
23 }
24 
25 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26 ///
27 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi16&expand=31)
28 #[inline]
29 #[target_feature(enable = "avx512bw")]
30 #[cfg_attr(test, assert_instr(vpabsw))]
_mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i31 pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
32     let abs = _mm512_abs_epi16(a).as_i16x32();
33     transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
34 }
35 
36 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37 ///
38 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi16&expand=32)
39 #[inline]
40 #[target_feature(enable = "avx512bw")]
41 #[cfg_attr(test, assert_instr(vpabsw))]
_mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i42 pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
43     let abs = _mm512_abs_epi16(a).as_i16x32();
44     let zero = _mm512_setzero_si512().as_i16x32();
45     transmute(simd_select_bitmask(k, abs, zero))
46 }
47 
48 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
49 ///
50 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi16&expand=28)
51 #[inline]
52 #[target_feature(enable = "avx512bw,avx512vl")]
53 #[cfg_attr(test, assert_instr(vpabsw))]
_mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i54 pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
55     let abs = _mm256_abs_epi16(a).as_i16x16();
56     transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
57 }
58 
59 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
60 ///
61 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi16&expand=29)
62 #[inline]
63 #[target_feature(enable = "avx512bw,avx512vl")]
64 #[cfg_attr(test, assert_instr(vpabsw))]
_mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i65 pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
66     let abs = _mm256_abs_epi16(a).as_i16x16();
67     let zero = _mm256_setzero_si256().as_i16x16();
68     transmute(simd_select_bitmask(k, abs, zero))
69 }
70 
71 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
72 ///
73 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi16&expand=25)
74 #[inline]
75 #[target_feature(enable = "avx512bw,avx512vl")]
76 #[cfg_attr(test, assert_instr(vpabsw))]
_mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i77 pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
78     let abs = _mm_abs_epi16(a).as_i16x8();
79     transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
80 }
81 
82 /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
83 ///
84 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi16&expand=26)
85 #[inline]
86 #[target_feature(enable = "avx512bw,avx512vl")]
87 #[cfg_attr(test, assert_instr(vpabsw))]
_mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i88 pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
89     let abs = _mm_abs_epi16(a).as_i16x8();
90     let zero = _mm_setzero_si128().as_i16x8();
91     transmute(simd_select_bitmask(k, abs, zero))
92 }
93 
94 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
95 ///
96 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi8&expand=57)
97 #[inline]
98 #[target_feature(enable = "avx512bw")]
99 #[cfg_attr(test, assert_instr(vpabsb))]
_mm512_abs_epi8(a: __m512i) -> __m512i100 pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i {
101     let a = a.as_i8x64();
102     // all-0 is a properly initialized i8x64
103     let zero: i8x64 = mem::zeroed();
104     let sub = simd_sub(zero, a);
105     let cmp: i8x64 = simd_gt(a, zero);
106     transmute(simd_select(cmp, a, sub))
107 }
108 
109 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
110 ///
111 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi8&expand=58)
112 #[inline]
113 #[target_feature(enable = "avx512bw")]
114 #[cfg_attr(test, assert_instr(vpabsb))]
_mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i115 pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
116     let abs = _mm512_abs_epi8(a).as_i8x64();
117     transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
118 }
119 
120 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
121 ///
122 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi8&expand=59)
123 #[inline]
124 #[target_feature(enable = "avx512bw")]
125 #[cfg_attr(test, assert_instr(vpabsb))]
_mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i126 pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
127     let abs = _mm512_abs_epi8(a).as_i8x64();
128     let zero = _mm512_setzero_si512().as_i8x64();
129     transmute(simd_select_bitmask(k, abs, zero))
130 }
131 
132 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
133 ///
134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi8&expand=55)
135 #[inline]
136 #[target_feature(enable = "avx512bw,avx512vl")]
137 #[cfg_attr(test, assert_instr(vpabsb))]
_mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i138 pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
139     let abs = _mm256_abs_epi8(a).as_i8x32();
140     transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
141 }
142 
143 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
144 ///
145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi8&expand=56)
146 #[inline]
147 #[target_feature(enable = "avx512bw,avx512vl")]
148 #[cfg_attr(test, assert_instr(vpabsb))]
_mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i149 pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
150     let abs = _mm256_abs_epi8(a).as_i8x32();
151     let zero = _mm256_setzero_si256().as_i8x32();
152     transmute(simd_select_bitmask(k, abs, zero))
153 }
154 
155 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
156 ///
157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi8&expand=52)
158 #[inline]
159 #[target_feature(enable = "avx512bw,avx512vl")]
160 #[cfg_attr(test, assert_instr(vpabsb))]
_mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i161 pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
162     let abs = _mm_abs_epi8(a).as_i8x16();
163     transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
164 }
165 
166 /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
167 ///
168 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi8&expand=53)
169 #[inline]
170 #[target_feature(enable = "avx512bw,avx512vl")]
171 #[cfg_attr(test, assert_instr(vpabsb))]
_mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i172 pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
173     let abs = _mm_abs_epi8(a).as_i8x16();
174     let zero = _mm_setzero_si128().as_i8x16();
175     transmute(simd_select_bitmask(k, abs, zero))
176 }
177 
178 /// Add packed 16-bit integers in a and b, and store the results in dst.
179 ///
180 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi16&expand=91)
181 #[inline]
182 #[target_feature(enable = "avx512bw")]
183 #[cfg_attr(test, assert_instr(vpaddw))]
_mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i184 pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
185     transmute(simd_add(a.as_i16x32(), b.as_i16x32()))
186 }
187 
188 /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
189 ///
190 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi16&expand=92)
191 #[inline]
192 #[target_feature(enable = "avx512bw")]
193 #[cfg_attr(test, assert_instr(vpaddw))]
_mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i194 pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
195     let add = _mm512_add_epi16(a, b).as_i16x32();
196     transmute(simd_select_bitmask(k, add, src.as_i16x32()))
197 }
198 
199 /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
200 ///
201 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi16&expand=93)
202 #[inline]
203 #[target_feature(enable = "avx512bw")]
204 #[cfg_attr(test, assert_instr(vpaddw))]
_mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i205 pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
206     let add = _mm512_add_epi16(a, b).as_i16x32();
207     let zero = _mm512_setzero_si512().as_i16x32();
208     transmute(simd_select_bitmask(k, add, zero))
209 }
210 
211 /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
212 ///
213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi&expand=89)
214 #[inline]
215 #[target_feature(enable = "avx512bw,avx512vl")]
216 #[cfg_attr(test, assert_instr(vpaddw))]
_mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i217 pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
218     let add = _mm256_add_epi16(a, b).as_i16x16();
219     transmute(simd_select_bitmask(k, add, src.as_i16x16()))
220 }
221 
222 /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi16&expand=90)
225 #[inline]
226 #[target_feature(enable = "avx512bw,avx512vl")]
227 #[cfg_attr(test, assert_instr(vpaddw))]
_mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i228 pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
229     let add = _mm256_add_epi16(a, b).as_i16x16();
230     let zero = _mm256_setzero_si256().as_i16x16();
231     transmute(simd_select_bitmask(k, add, zero))
232 }
233 
234 /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
235 ///
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi16&expand=86)
237 #[inline]
238 #[target_feature(enable = "avx512bw,avx512vl")]
239 #[cfg_attr(test, assert_instr(vpaddw))]
_mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i240 pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
241     let add = _mm_add_epi16(a, b).as_i16x8();
242     transmute(simd_select_bitmask(k, add, src.as_i16x8()))
243 }
244 
245 /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
246 ///
247 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi16&expand=87)
248 #[inline]
249 #[target_feature(enable = "avx512bw,avx512vl")]
250 #[cfg_attr(test, assert_instr(vpaddw))]
_mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i251 pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
252     let add = _mm_add_epi16(a, b).as_i16x8();
253     let zero = _mm_setzero_si128().as_i16x8();
254     transmute(simd_select_bitmask(k, add, zero))
255 }
256 
257 /// Add packed 8-bit integers in a and b, and store the results in dst.
258 ///
259 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi8&expand=118)
260 #[inline]
261 #[target_feature(enable = "avx512bw")]
262 #[cfg_attr(test, assert_instr(vpaddb))]
_mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i263 pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
264     transmute(simd_add(a.as_i8x64(), b.as_i8x64()))
265 }
266 
267 /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
268 ///
269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi8&expand=119)
270 #[inline]
271 #[target_feature(enable = "avx512bw")]
272 #[cfg_attr(test, assert_instr(vpaddb))]
_mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i273 pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
274     let add = _mm512_add_epi8(a, b).as_i8x64();
275     transmute(simd_select_bitmask(k, add, src.as_i8x64()))
276 }
277 
278 /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
279 ///
280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi8&expand=120)
281 #[inline]
282 #[target_feature(enable = "avx512bw")]
283 #[cfg_attr(test, assert_instr(vpaddb))]
_mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i284 pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
285     let add = _mm512_add_epi8(a, b).as_i8x64();
286     let zero = _mm512_setzero_si512().as_i8x64();
287     transmute(simd_select_bitmask(k, add, zero))
288 }
289 
290 /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
291 ///
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi8&expand=116)
293 #[inline]
294 #[target_feature(enable = "avx512bw,avx512vl")]
295 #[cfg_attr(test, assert_instr(vpaddb))]
_mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i296 pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
297     let add = _mm256_add_epi8(a, b).as_i8x32();
298     transmute(simd_select_bitmask(k, add, src.as_i8x32()))
299 }
300 
301 /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
302 ///
303 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi8&expand=117)
304 #[inline]
305 #[target_feature(enable = "avx512bw,avx512vl")]
306 #[cfg_attr(test, assert_instr(vpaddb))]
_mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i307 pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
308     let add = _mm256_add_epi8(a, b).as_i8x32();
309     let zero = _mm256_setzero_si256().as_i8x32();
310     transmute(simd_select_bitmask(k, add, zero))
311 }
312 
313 /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
314 ///
315 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi8&expand=113)
316 #[inline]
317 #[target_feature(enable = "avx512bw,avx512vl")]
318 #[cfg_attr(test, assert_instr(vpaddb))]
_mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i319 pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
320     let add = _mm_add_epi8(a, b).as_i8x16();
321     transmute(simd_select_bitmask(k, add, src.as_i8x16()))
322 }
323 
324 /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
325 ///
326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi8&expand=114)
327 #[inline]
328 #[target_feature(enable = "avx512bw,avx512vl")]
329 #[cfg_attr(test, assert_instr(vpaddb))]
_mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i330 pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
331     let add = _mm_add_epi8(a, b).as_i8x16();
332     let zero = _mm_setzero_si128().as_i8x16();
333     transmute(simd_select_bitmask(k, add, zero))
334 }
335 
336 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
337 ///
338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu16&expand=197)
339 #[inline]
340 #[target_feature(enable = "avx512bw")]
341 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i342 pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
343     transmute(vpaddusw(
344         a.as_u16x32(),
345         b.as_u16x32(),
346         _mm512_setzero_si512().as_u16x32(),
347         0b11111111_11111111_11111111_11111111,
348     ))
349 }
350 
351 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
352 ///
353 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu16&expand=198)
354 #[inline]
355 #[target_feature(enable = "avx512bw")]
356 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm512_mask_adds_epu16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i357 pub unsafe fn _mm512_mask_adds_epu16(
358     src: __m512i,
359     k: __mmask32,
360     a: __m512i,
361     b: __m512i,
362 ) -> __m512i {
363     transmute(vpaddusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
364 }
365 
366 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
367 ///
368 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu16&expand=199)
369 #[inline]
370 #[target_feature(enable = "avx512bw")]
371 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i372 pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
373     transmute(vpaddusw(
374         a.as_u16x32(),
375         b.as_u16x32(),
376         _mm512_setzero_si512().as_u16x32(),
377         k,
378     ))
379 }
380 
381 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
382 ///
383 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epu16&expand=195)
384 #[inline]
385 #[target_feature(enable = "avx512bw,avx512vl")]
386 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm256_mask_adds_epu16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i387 pub unsafe fn _mm256_mask_adds_epu16(
388     src: __m256i,
389     k: __mmask16,
390     a: __m256i,
391     b: __m256i,
392 ) -> __m256i {
393     transmute(vpaddusw256(
394         a.as_u16x16(),
395         b.as_u16x16(),
396         src.as_u16x16(),
397         k,
398     ))
399 }
400 
401 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
402 ///
403 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epu16&expand=196)
404 #[inline]
405 #[target_feature(enable = "avx512bw,avx512vl")]
406 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i407 pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
408     transmute(vpaddusw256(
409         a.as_u16x16(),
410         b.as_u16x16(),
411         _mm256_setzero_si256().as_u16x16(),
412         k,
413     ))
414 }
415 
416 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
417 ///
418 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epu16&expand=192)
419 #[inline]
420 #[target_feature(enable = "avx512bw,avx512vl")]
421 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i422 pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
423     transmute(vpaddusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k))
424 }
425 
426 /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
427 ///
428 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epu16&expand=193)
429 #[inline]
430 #[target_feature(enable = "avx512bw,avx512vl")]
431 #[cfg_attr(test, assert_instr(vpaddusw))]
_mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i432 pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
433     transmute(vpaddusw128(
434         a.as_u16x8(),
435         b.as_u16x8(),
436         _mm_setzero_si128().as_u16x8(),
437         k,
438     ))
439 }
440 
441 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
442 ///
443 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu8&expand=206)
444 #[inline]
445 #[target_feature(enable = "avx512bw")]
446 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i447 pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
448     transmute(vpaddusb(
449         a.as_u8x64(),
450         b.as_u8x64(),
451         _mm512_setzero_si512().as_u8x64(),
452         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
453     ))
454 }
455 
456 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
457 ///
458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu8&expand=207)
459 #[inline]
460 #[target_feature(enable = "avx512bw")]
461 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i462 pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
463     transmute(vpaddusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
464 }
465 
466 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
467 ///
468 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu8&expand=208)
469 #[inline]
470 #[target_feature(enable = "avx512bw")]
471 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i472 pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
473     transmute(vpaddusb(
474         a.as_u8x64(),
475         b.as_u8x64(),
476         _mm512_setzero_si512().as_u8x64(),
477         k,
478     ))
479 }
480 
481 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
482 ///
483 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epu8&expand=204)
484 #[inline]
485 #[target_feature(enable = "avx512bw,avx512vl")]
486 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i487 pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
488     transmute(vpaddusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k))
489 }
490 
491 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
492 ///
493 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epu8&expand=205)
494 #[inline]
495 #[target_feature(enable = "avx512bw,avx512vl")]
496 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i497 pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
498     transmute(vpaddusb256(
499         a.as_u8x32(),
500         b.as_u8x32(),
501         _mm256_setzero_si256().as_u8x32(),
502         k,
503     ))
504 }
505 
506 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
507 ///
508 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epu8&expand=201)
509 #[inline]
510 #[target_feature(enable = "avx512bw,avx512vl")]
511 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i512 pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
513     transmute(vpaddusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k))
514 }
515 
516 /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
517 ///
518 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epu8&expand=202)
519 #[inline]
520 #[target_feature(enable = "avx512bw,avx512vl")]
521 #[cfg_attr(test, assert_instr(vpaddusb))]
_mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i522 pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
523     transmute(vpaddusb128(
524         a.as_u8x16(),
525         b.as_u8x16(),
526         _mm_setzero_si128().as_u8x16(),
527         k,
528     ))
529 }
530 
531 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
532 ///
533 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi16&expand=179)
534 #[inline]
535 #[target_feature(enable = "avx512bw")]
536 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i537 pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
538     transmute(vpaddsw(
539         a.as_i16x32(),
540         b.as_i16x32(),
541         _mm512_setzero_si512().as_i16x32(),
542         0b11111111_11111111_11111111_11111111,
543     ))
544 }
545 
546 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
547 ///
548 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi16&expand=180)
549 #[inline]
550 #[target_feature(enable = "avx512bw")]
551 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm512_mask_adds_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i552 pub unsafe fn _mm512_mask_adds_epi16(
553     src: __m512i,
554     k: __mmask32,
555     a: __m512i,
556     b: __m512i,
557 ) -> __m512i {
558     transmute(vpaddsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
559 }
560 
561 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
562 ///
563 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi16&expand=181)
564 #[inline]
565 #[target_feature(enable = "avx512bw")]
566 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i567 pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
568     transmute(vpaddsw(
569         a.as_i16x32(),
570         b.as_i16x32(),
571         _mm512_setzero_si512().as_i16x32(),
572         k,
573     ))
574 }
575 
576 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
577 ///
578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epi16&expand=177)
579 #[inline]
580 #[target_feature(enable = "avx512bw,avx512vl")]
581 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm256_mask_adds_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i582 pub unsafe fn _mm256_mask_adds_epi16(
583     src: __m256i,
584     k: __mmask16,
585     a: __m256i,
586     b: __m256i,
587 ) -> __m256i {
588     transmute(vpaddsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k))
589 }
590 
591 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
592 ///
593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epi16&expand=178)
594 #[inline]
595 #[target_feature(enable = "avx512bw,avx512vl")]
596 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i597 pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
598     transmute(vpaddsw256(
599         a.as_i16x16(),
600         b.as_i16x16(),
601         _mm256_setzero_si256().as_i16x16(),
602         k,
603     ))
604 }
605 
606 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
607 ///
608 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epi16&expand=174)
609 #[inline]
610 #[target_feature(enable = "avx512bw,avx512vl")]
611 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i612 pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
613     transmute(vpaddsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k))
614 }
615 
616 /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
617 ///
618 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epi16&expand=175)
619 #[inline]
620 #[target_feature(enable = "avx512bw,avx512vl")]
621 #[cfg_attr(test, assert_instr(vpaddsw))]
_mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i622 pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
623     transmute(vpaddsw128(
624         a.as_i16x8(),
625         b.as_i16x8(),
626         _mm_setzero_si128().as_i16x8(),
627         k,
628     ))
629 }
630 
631 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
632 ///
633 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi8&expand=188)
634 #[inline]
635 #[target_feature(enable = "avx512bw")]
636 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i637 pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
638     transmute(vpaddsb(
639         a.as_i8x64(),
640         b.as_i8x64(),
641         _mm512_setzero_si512().as_i8x64(),
642         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
643     ))
644 }
645 
646 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
647 ///
648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi8&expand=189)
649 #[inline]
650 #[target_feature(enable = "avx512bw")]
651 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i652 pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
653     transmute(vpaddsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
654 }
655 
656 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
657 ///
658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi8&expand=190)
659 #[inline]
660 #[target_feature(enable = "avx512bw")]
661 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i662 pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
663     transmute(vpaddsb(
664         a.as_i8x64(),
665         b.as_i8x64(),
666         _mm512_setzero_si512().as_i8x64(),
667         k,
668     ))
669 }
670 
671 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
672 ///
673 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epi8&expand=186)
674 #[inline]
675 #[target_feature(enable = "avx512bw,avx512vl")]
676 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i677 pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
678     transmute(vpaddsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k))
679 }
680 
681 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
682 ///
683 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epi8&expand=187)
684 #[inline]
685 #[target_feature(enable = "avx512bw,avx512vl")]
686 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i687 pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
688     transmute(vpaddsb256(
689         a.as_i8x32(),
690         b.as_i8x32(),
691         _mm256_setzero_si256().as_i8x32(),
692         k,
693     ))
694 }
695 
696 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
697 ///
698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epi8&expand=183)
699 #[inline]
700 #[target_feature(enable = "avx512bw,avx512vl")]
701 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i702 pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
703     transmute(vpaddsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k))
704 }
705 
706 /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707 ///
708 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epi8&expand=184)
709 #[inline]
710 #[target_feature(enable = "avx512bw,avx512vl")]
711 #[cfg_attr(test, assert_instr(vpaddsb))]
_mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i712 pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
713     transmute(vpaddsb128(
714         a.as_i8x16(),
715         b.as_i8x16(),
716         _mm_setzero_si128().as_i8x16(),
717         k,
718     ))
719 }
720 
721 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
722 ///
723 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi16&expand=5685)
724 #[inline]
725 #[target_feature(enable = "avx512bw")]
726 #[cfg_attr(test, assert_instr(vpsubw))]
_mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i727 pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
728     transmute(simd_sub(a.as_i16x32(), b.as_i16x32()))
729 }
730 
731 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732 ///
733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi16&expand=5683)
734 #[inline]
735 #[target_feature(enable = "avx512bw")]
736 #[cfg_attr(test, assert_instr(vpsubw))]
_mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i737 pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
738     let sub = _mm512_sub_epi16(a, b).as_i16x32();
739     transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
740 }
741 
742 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
743 ///
744 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi16&expand=5684)
745 #[inline]
746 #[target_feature(enable = "avx512bw")]
747 #[cfg_attr(test, assert_instr(vpsubw))]
_mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i748 pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
749     let sub = _mm512_sub_epi16(a, b).as_i16x32();
750     let zero = _mm512_setzero_si512().as_i16x32();
751     transmute(simd_select_bitmask(k, sub, zero))
752 }
753 
754 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
755 ///
756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi16&expand=5680)
757 #[inline]
758 #[target_feature(enable = "avx512bw,avx512vl")]
759 #[cfg_attr(test, assert_instr(vpsubw))]
_mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i760 pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
761     let sub = _mm256_sub_epi16(a, b).as_i16x16();
762     transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
763 }
764 
765 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
766 ///
767 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi16&expand=5681)
768 #[inline]
769 #[target_feature(enable = "avx512bw,avx512vl")]
770 #[cfg_attr(test, assert_instr(vpsubw))]
_mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i771 pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
772     let sub = _mm256_sub_epi16(a, b).as_i16x16();
773     let zero = _mm256_setzero_si256().as_i16x16();
774     transmute(simd_select_bitmask(k, sub, zero))
775 }
776 
777 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
778 ///
779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi16&expand=5677)
780 #[inline]
781 #[target_feature(enable = "avx512bw,avx512vl")]
782 #[cfg_attr(test, assert_instr(vpsubw))]
_mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i783 pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
784     let sub = _mm_sub_epi16(a, b).as_i16x8();
785     transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
786 }
787 
788 /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
789 ///
790 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi16&expand=5678)
791 #[inline]
792 #[target_feature(enable = "avx512bw,avx512vl")]
793 #[cfg_attr(test, assert_instr(vpsubw))]
_mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i794 pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795     let sub = _mm_sub_epi16(a, b).as_i16x8();
796     let zero = _mm_setzero_si128().as_i16x8();
797     transmute(simd_select_bitmask(k, sub, zero))
798 }
799 
800 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
801 ///
802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi8&expand=5712)
803 #[inline]
804 #[target_feature(enable = "avx512bw")]
805 #[cfg_attr(test, assert_instr(vpsubb))]
_mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i806 pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
807     transmute(simd_sub(a.as_i8x64(), b.as_i8x64()))
808 }
809 
810 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
811 ///
812 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi8&expand=5710)
813 #[inline]
814 #[target_feature(enable = "avx512bw")]
815 #[cfg_attr(test, assert_instr(vpsubb))]
_mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i816 pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
817     let sub = _mm512_sub_epi8(a, b).as_i8x64();
818     transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
819 }
820 
821 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
822 ///
823 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi8&expand=5711)
824 #[inline]
825 #[target_feature(enable = "avx512bw")]
826 #[cfg_attr(test, assert_instr(vpsubb))]
_mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i827 pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
828     let sub = _mm512_sub_epi8(a, b).as_i8x64();
829     let zero = _mm512_setzero_si512().as_i8x64();
830     transmute(simd_select_bitmask(k, sub, zero))
831 }
832 
833 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
834 ///
835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi8&expand=5707)
836 #[inline]
837 #[target_feature(enable = "avx512bw,avx512vl")]
838 #[cfg_attr(test, assert_instr(vpsubb))]
_mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i839 pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
840     let sub = _mm256_sub_epi8(a, b).as_i8x32();
841     transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
842 }
843 
844 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
845 ///
846 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi8&expand=5708)
847 #[inline]
848 #[target_feature(enable = "avx512bw,avx512vl")]
849 #[cfg_attr(test, assert_instr(vpsubb))]
_mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i850 pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
851     let sub = _mm256_sub_epi8(a, b).as_i8x32();
852     let zero = _mm256_setzero_si256().as_i8x32();
853     transmute(simd_select_bitmask(k, sub, zero))
854 }
855 
856 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
857 ///
858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi8&expand=5704)
859 #[inline]
860 #[target_feature(enable = "avx512bw,avx512vl")]
861 #[cfg_attr(test, assert_instr(vpsubb))]
_mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i862 pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
863     let sub = _mm_sub_epi8(a, b).as_i8x16();
864     transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
865 }
866 
867 /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
868 ///
869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi8&expand=5705)
870 #[inline]
871 #[target_feature(enable = "avx512bw,avx512vl")]
872 #[cfg_attr(test, assert_instr(vpsubb))]
_mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i873 pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
874     let sub = _mm_sub_epi8(a, b).as_i8x16();
875     let zero = _mm_setzero_si128().as_i8x16();
876     transmute(simd_select_bitmask(k, sub, zero))
877 }
878 
879 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
880 ///
881 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu16&expand=5793)
882 #[inline]
883 #[target_feature(enable = "avx512bw")]
884 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i885 pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
886     transmute(vpsubusw(
887         a.as_u16x32(),
888         b.as_u16x32(),
889         _mm512_setzero_si512().as_u16x32(),
890         0b11111111_11111111_11111111_11111111,
891     ))
892 }
893 
894 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
895 ///
896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu16&expand=5791)
897 #[inline]
898 #[target_feature(enable = "avx512bw")]
899 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm512_mask_subs_epu16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i900 pub unsafe fn _mm512_mask_subs_epu16(
901     src: __m512i,
902     k: __mmask32,
903     a: __m512i,
904     b: __m512i,
905 ) -> __m512i {
906     transmute(vpsubusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
907 }
908 
909 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
910 ///
911 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu16&expand=5792)
912 #[inline]
913 #[target_feature(enable = "avx512bw")]
914 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i915 pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
916     transmute(vpsubusw(
917         a.as_u16x32(),
918         b.as_u16x32(),
919         _mm512_setzero_si512().as_u16x32(),
920         k,
921     ))
922 }
923 
924 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
925 ///
926 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epu16&expand=5788)
927 #[inline]
928 #[target_feature(enable = "avx512bw,avx512vl")]
929 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm256_mask_subs_epu16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i930 pub unsafe fn _mm256_mask_subs_epu16(
931     src: __m256i,
932     k: __mmask16,
933     a: __m256i,
934     b: __m256i,
935 ) -> __m256i {
936     transmute(vpsubusw256(
937         a.as_u16x16(),
938         b.as_u16x16(),
939         src.as_u16x16(),
940         k,
941     ))
942 }
943 
944 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
945 ///
946 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epu16&expand=5789)
947 #[inline]
948 #[target_feature(enable = "avx512bw,avx512vl")]
949 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i950 pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
951     transmute(vpsubusw256(
952         a.as_u16x16(),
953         b.as_u16x16(),
954         _mm256_setzero_si256().as_u16x16(),
955         k,
956     ))
957 }
958 
959 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
960 ///
961 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epu16&expand=5785)
962 #[inline]
963 #[target_feature(enable = "avx512bw,avx512vl")]
964 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i965 pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
966     transmute(vpsubusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k))
967 }
968 
969 /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
970 ///
971 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epu16&expand=5786)
972 #[inline]
973 #[target_feature(enable = "avx512bw,avx512vl")]
974 #[cfg_attr(test, assert_instr(vpsubusw))]
_mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i975 pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
976     transmute(vpsubusw128(
977         a.as_u16x8(),
978         b.as_u16x8(),
979         _mm_setzero_si128().as_u16x8(),
980         k,
981     ))
982 }
983 
984 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
985 ///
986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu8&expand=5802)
987 #[inline]
988 #[target_feature(enable = "avx512bw")]
989 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i990 pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
991     transmute(vpsubusb(
992         a.as_u8x64(),
993         b.as_u8x64(),
994         _mm512_setzero_si512().as_u8x64(),
995         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
996     ))
997 }
998 
999 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1000 ///
1001 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu8&expand=5800)
1002 #[inline]
1003 #[target_feature(enable = "avx512bw")]
1004 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i1005 pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1006     transmute(vpsubusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
1007 }
1008 
1009 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1010 ///
1011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu8&expand=5801)
1012 #[inline]
1013 #[target_feature(enable = "avx512bw")]
1014 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i1015 pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1016     transmute(vpsubusb(
1017         a.as_u8x64(),
1018         b.as_u8x64(),
1019         _mm512_setzero_si512().as_u8x64(),
1020         k,
1021     ))
1022 }
1023 
1024 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1025 ///
1026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epu8&expand=5797)
1027 #[inline]
1028 #[target_feature(enable = "avx512bw,avx512vl")]
1029 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i1030 pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1031     transmute(vpsubusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k))
1032 }
1033 
1034 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1035 ///
1036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epu8&expand=5798)
1037 #[inline]
1038 #[target_feature(enable = "avx512bw,avx512vl")]
1039 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i1040 pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1041     transmute(vpsubusb256(
1042         a.as_u8x32(),
1043         b.as_u8x32(),
1044         _mm256_setzero_si256().as_u8x32(),
1045         k,
1046     ))
1047 }
1048 
1049 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1050 ///
1051 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epu8&expand=5794)
1052 #[inline]
1053 #[target_feature(enable = "avx512bw,avx512vl")]
1054 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i1055 pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1056     transmute(vpsubusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k))
1057 }
1058 
1059 /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1060 ///
1061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epu8&expand=5795)
1062 #[inline]
1063 #[target_feature(enable = "avx512bw,avx512vl")]
1064 #[cfg_attr(test, assert_instr(vpsubusb))]
_mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i1065 pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1066     transmute(vpsubusb128(
1067         a.as_u8x16(),
1068         b.as_u8x16(),
1069         _mm_setzero_si128().as_u8x16(),
1070         k,
1071     ))
1072 }
1073 
1074 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1075 ///
1076 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi16&expand=5775)
1077 #[inline]
1078 #[target_feature(enable = "avx512bw")]
1079 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i1080 pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1081     transmute(vpsubsw(
1082         a.as_i16x32(),
1083         b.as_i16x32(),
1084         _mm512_setzero_si512().as_i16x32(),
1085         0b11111111_11111111_11111111_11111111,
1086     ))
1087 }
1088 
1089 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1090 ///
1091 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi16&expand=5773)
1092 #[inline]
1093 #[target_feature(enable = "avx512bw")]
1094 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm512_mask_subs_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i1095 pub unsafe fn _mm512_mask_subs_epi16(
1096     src: __m512i,
1097     k: __mmask32,
1098     a: __m512i,
1099     b: __m512i,
1100 ) -> __m512i {
1101     transmute(vpsubsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
1102 }
1103 
1104 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105 ///
1106 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi16&expand=5774)
1107 #[inline]
1108 #[target_feature(enable = "avx512bw")]
1109 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1110 pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1111     transmute(vpsubsw(
1112         a.as_i16x32(),
1113         b.as_i16x32(),
1114         _mm512_setzero_si512().as_i16x32(),
1115         k,
1116     ))
1117 }
1118 
1119 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1120 ///
1121 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epi16&expand=5770)
1122 #[inline]
1123 #[target_feature(enable = "avx512bw,avx512vl")]
1124 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm256_mask_subs_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i1125 pub unsafe fn _mm256_mask_subs_epi16(
1126     src: __m256i,
1127     k: __mmask16,
1128     a: __m256i,
1129     b: __m256i,
1130 ) -> __m256i {
1131     transmute(vpsubsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k))
1132 }
1133 
1134 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1135 ///
1136 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epi16&expand=5771)
1137 #[inline]
1138 #[target_feature(enable = "avx512bw,avx512vl")]
1139 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1140 pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1141     transmute(vpsubsw256(
1142         a.as_i16x16(),
1143         b.as_i16x16(),
1144         _mm256_setzero_si256().as_i16x16(),
1145         k,
1146     ))
1147 }
1148 
1149 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1150 ///
1151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epi16&expand=5767)
1152 #[inline]
1153 #[target_feature(enable = "avx512bw,avx512vl")]
1154 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1155 pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1156     transmute(vpsubsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k))
1157 }
1158 
1159 /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1160 ///
1161 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epi16&expand=5768)
1162 #[inline]
1163 #[target_feature(enable = "avx512bw,avx512vl")]
1164 #[cfg_attr(test, assert_instr(vpsubsw))]
_mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1165 pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1166     transmute(vpsubsw128(
1167         a.as_i16x8(),
1168         b.as_i16x8(),
1169         _mm_setzero_si128().as_i16x8(),
1170         k,
1171     ))
1172 }
1173 
1174 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1175 ///
1176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi8&expand=5784)
1177 #[inline]
1178 #[target_feature(enable = "avx512bw")]
1179 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i1180 pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1181     transmute(vpsubsb(
1182         a.as_i8x64(),
1183         b.as_i8x64(),
1184         _mm512_setzero_si512().as_i8x64(),
1185         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
1186     ))
1187 }
1188 
1189 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1190 ///
1191 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi8&expand=5782)
1192 #[inline]
1193 #[target_feature(enable = "avx512bw")]
1194 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i1195 pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1196     transmute(vpsubsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
1197 }
1198 
1199 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1200 ///
1201 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi8&expand=5783)
1202 #[inline]
1203 #[target_feature(enable = "avx512bw")]
1204 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i1205 pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1206     transmute(vpsubsb(
1207         a.as_i8x64(),
1208         b.as_i8x64(),
1209         _mm512_setzero_si512().as_i8x64(),
1210         k,
1211     ))
1212 }
1213 
1214 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1215 ///
1216 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epi8&expand=5779)
1217 #[inline]
1218 #[target_feature(enable = "avx512bw,avx512vl")]
1219 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i1220 pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1221     transmute(vpsubsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k))
1222 }
1223 
1224 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1225 ///
1226 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epi8&expand=5780)
1227 #[inline]
1228 #[target_feature(enable = "avx512bw,avx512vl")]
1229 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i1230 pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1231     transmute(vpsubsb256(
1232         a.as_i8x32(),
1233         b.as_i8x32(),
1234         _mm256_setzero_si256().as_i8x32(),
1235         k,
1236     ))
1237 }
1238 
1239 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1240 ///
1241 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epi8&expand=5776)
1242 #[inline]
1243 #[target_feature(enable = "avx512bw,avx512vl")]
1244 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i1245 pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1246     transmute(vpsubsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k))
1247 }
1248 
1249 /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1250 ///
1251 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epi8&expand=5777)
1252 #[inline]
1253 #[target_feature(enable = "avx512bw,avx512vl")]
1254 #[cfg_attr(test, assert_instr(vpsubsb))]
_mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i1255 pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1256     transmute(vpsubsb128(
1257         a.as_i8x16(),
1258         b.as_i8x16(),
1259         _mm_setzero_si128().as_i8x16(),
1260         k,
1261     ))
1262 }
1263 
1264 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1265 ///
1266 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epu16&expand=3973)
1267 #[inline]
1268 #[target_feature(enable = "avx512bw")]
1269 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i1270 pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1271     transmute(vpmulhuw(a.as_u16x32(), b.as_u16x32()))
1272 }
1273 
1274 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1275 ///
1276 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epu16&expand=3971)
1277 #[inline]
1278 #[target_feature(enable = "avx512bw")]
1279 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm512_mask_mulhi_epu16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i1280 pub unsafe fn _mm512_mask_mulhi_epu16(
1281     src: __m512i,
1282     k: __mmask32,
1283     a: __m512i,
1284     b: __m512i,
1285 ) -> __m512i {
1286     let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1287     transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1288 }
1289 
1290 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1291 ///
1292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epu16&expand=3972)
1293 #[inline]
1294 #[target_feature(enable = "avx512bw")]
1295 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1296 pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1297     let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1298     let zero = _mm512_setzero_si512().as_u16x32();
1299     transmute(simd_select_bitmask(k, mul, zero))
1300 }
1301 
1302 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1303 ///
1304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhi_epu16&expand=3968)
1305 #[inline]
1306 #[target_feature(enable = "avx512bw,avx512vl")]
1307 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm256_mask_mulhi_epu16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i1308 pub unsafe fn _mm256_mask_mulhi_epu16(
1309     src: __m256i,
1310     k: __mmask16,
1311     a: __m256i,
1312     b: __m256i,
1313 ) -> __m256i {
1314     let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1315     transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1316 }
1317 
1318 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1319 ///
1320 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhi_epu16&expand=3969)
1321 #[inline]
1322 #[target_feature(enable = "avx512bw,avx512vl")]
1323 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1324 pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1325     let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1326     let zero = _mm256_setzero_si256().as_u16x16();
1327     transmute(simd_select_bitmask(k, mul, zero))
1328 }
1329 
1330 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1331 ///
1332 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhi_epu16&expand=3965)
1333 #[inline]
1334 #[target_feature(enable = "avx512bw,avx512vl")]
1335 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1336 pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1337     let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1338     transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1339 }
1340 
1341 /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1342 ///
1343 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhi_epu16&expand=3966)
1344 #[inline]
1345 #[target_feature(enable = "avx512bw,avx512vl")]
1346 #[cfg_attr(test, assert_instr(vpmulhuw))]
_mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1347 pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1348     let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1349     let zero = _mm_setzero_si128().as_u16x8();
1350     transmute(simd_select_bitmask(k, mul, zero))
1351 }
1352 
1353 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1354 ///
1355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epi16&expand=3962)
1356 #[inline]
1357 #[target_feature(enable = "avx512bw")]
1358 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i1359 pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1360     transmute(vpmulhw(a.as_i16x32(), b.as_i16x32()))
1361 }
1362 
1363 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1364 ///
1365 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epi16&expand=3960)
1366 #[inline]
1367 #[target_feature(enable = "avx512bw")]
1368 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm512_mask_mulhi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i1369 pub unsafe fn _mm512_mask_mulhi_epi16(
1370     src: __m512i,
1371     k: __mmask32,
1372     a: __m512i,
1373     b: __m512i,
1374 ) -> __m512i {
1375     let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1376     transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1377 }
1378 
1379 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1380 ///
1381 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epi16&expand=3961)
1382 #[inline]
1383 #[target_feature(enable = "avx512bw")]
1384 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1385 pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1386     let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1387     let zero = _mm512_setzero_si512().as_i16x32();
1388     transmute(simd_select_bitmask(k, mul, zero))
1389 }
1390 
1391 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1392 ///
1393 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhi_epi16&expand=3957)
1394 #[inline]
1395 #[target_feature(enable = "avx512bw,avx512vl")]
1396 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm256_mask_mulhi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i1397 pub unsafe fn _mm256_mask_mulhi_epi16(
1398     src: __m256i,
1399     k: __mmask16,
1400     a: __m256i,
1401     b: __m256i,
1402 ) -> __m256i {
1403     let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1404     transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1405 }
1406 
1407 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1408 ///
1409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhi_epi16&expand=3958)
1410 #[inline]
1411 #[target_feature(enable = "avx512bw,avx512vl")]
1412 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1413 pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1414     let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1415     let zero = _mm256_setzero_si256().as_i16x16();
1416     transmute(simd_select_bitmask(k, mul, zero))
1417 }
1418 
1419 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1420 ///
1421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhi_epi16&expand=3954)
1422 #[inline]
1423 #[target_feature(enable = "avx512bw,avx512vl")]
1424 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1425 pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1426     let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1427     transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1428 }
1429 
1430 /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1431 ///
1432 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhi_epi16&expand=3955)
1433 #[inline]
1434 #[target_feature(enable = "avx512bw,avx512vl")]
1435 #[cfg_attr(test, assert_instr(vpmulhw))]
_mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1436 pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1437     let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1438     let zero = _mm_setzero_si128().as_i16x8();
1439     transmute(simd_select_bitmask(k, mul, zero))
1440 }
1441 
1442 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1443 ///
1444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhrs_epi16&expand=3986)
1445 #[inline]
1446 #[target_feature(enable = "avx512bw")]
1447 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i1448 pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1449     transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32()))
1450 }
1451 
1452 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1453 ///
1454 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhrs_epi16&expand=3984)
1455 #[inline]
1456 #[target_feature(enable = "avx512bw")]
1457 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm512_mask_mulhrs_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i1458 pub unsafe fn _mm512_mask_mulhrs_epi16(
1459     src: __m512i,
1460     k: __mmask32,
1461     a: __m512i,
1462     b: __m512i,
1463 ) -> __m512i {
1464     let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1465     transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1466 }
1467 
1468 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1469 ///
1470 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1471 #[inline]
1472 #[target_feature(enable = "avx512bw")]
1473 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1474 pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1475     let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1476     let zero = _mm512_setzero_si512().as_i16x32();
1477     transmute(simd_select_bitmask(k, mul, zero))
1478 }
1479 
1480 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1481 ///
1482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhrs_epi16&expand=3981)
1483 #[inline]
1484 #[target_feature(enable = "avx512bw,avx512vl")]
1485 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm256_mask_mulhrs_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i1486 pub unsafe fn _mm256_mask_mulhrs_epi16(
1487     src: __m256i,
1488     k: __mmask16,
1489     a: __m256i,
1490     b: __m256i,
1491 ) -> __m256i {
1492     let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1493     transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1494 }
1495 
1496 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1497 ///
1498 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1499 #[inline]
1500 #[target_feature(enable = "avx512bw,avx512vl")]
1501 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1502 pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1503     let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1504     let zero = _mm256_setzero_si256().as_i16x16();
1505     transmute(simd_select_bitmask(k, mul, zero))
1506 }
1507 
1508 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1509 ///
1510 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhrs_epi16&expand=3978)
1511 #[inline]
1512 #[target_feature(enable = "avx512bw,avx512vl")]
1513 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1514 pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1515     let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1516     transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1517 }
1518 
1519 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1520 ///
1521 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhrs_epi16&expand=3979)
1522 #[inline]
1523 #[target_feature(enable = "avx512bw,avx512vl")]
1524 #[cfg_attr(test, assert_instr(vpmulhrsw))]
_mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1525 pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1526     let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1527     let zero = _mm_setzero_si128().as_i16x8();
1528     transmute(simd_select_bitmask(k, mul, zero))
1529 }
1530 
1531 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1532 ///
1533 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi16&expand=3996)
1534 #[inline]
1535 #[target_feature(enable = "avx512bw")]
1536 #[cfg_attr(test, assert_instr(vpmullw))]
_mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i1537 pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1538     transmute(simd_mul(a.as_i16x32(), b.as_i16x32()))
1539 }
1540 
1541 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1542 ///
1543 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi16&expand=3994)
1544 #[inline]
1545 #[target_feature(enable = "avx512bw")]
1546 #[cfg_attr(test, assert_instr(vpmullw))]
_mm512_mask_mullo_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i1547 pub unsafe fn _mm512_mask_mullo_epi16(
1548     src: __m512i,
1549     k: __mmask32,
1550     a: __m512i,
1551     b: __m512i,
1552 ) -> __m512i {
1553     let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1554     transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1555 }
1556 
1557 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1558 ///
1559 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi16&expand=3995)
1560 #[inline]
1561 #[target_feature(enable = "avx512bw")]
1562 #[cfg_attr(test, assert_instr(vpmullw))]
_mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1563 pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1564     let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1565     let zero = _mm512_setzero_si512().as_i16x32();
1566     transmute(simd_select_bitmask(k, mul, zero))
1567 }
1568 
1569 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1570 ///
1571 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mullo_epi16&expand=3991)
1572 #[inline]
1573 #[target_feature(enable = "avx512bw,avx512vl")]
1574 #[cfg_attr(test, assert_instr(vpmullw))]
_mm256_mask_mullo_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i1575 pub unsafe fn _mm256_mask_mullo_epi16(
1576     src: __m256i,
1577     k: __mmask16,
1578     a: __m256i,
1579     b: __m256i,
1580 ) -> __m256i {
1581     let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1582     transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1583 }
1584 
1585 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1586 ///
1587 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mullo_epi16&expand=3992)
1588 #[inline]
1589 #[target_feature(enable = "avx512bw,avx512vl")]
1590 #[cfg_attr(test, assert_instr(vpmullw))]
_mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1591 pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1592     let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1593     let zero = _mm256_setzero_si256().as_i16x16();
1594     transmute(simd_select_bitmask(k, mul, zero))
1595 }
1596 
1597 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1598 ///
1599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mullo_epi16&expand=3988)
1600 #[inline]
1601 #[target_feature(enable = "avx512bw,avx512vl")]
1602 #[cfg_attr(test, assert_instr(vpmullw))]
_mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1603 pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1604     let mul = _mm_mullo_epi16(a, b).as_i16x8();
1605     transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1606 }
1607 
1608 /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1609 ///
1610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mullo_epi16&expand=3989)
1611 #[inline]
1612 #[target_feature(enable = "avx512bw,avx512vl")]
1613 #[cfg_attr(test, assert_instr(vpmullw))]
_mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1614 pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1615     let mul = _mm_mullo_epi16(a, b).as_i16x8();
1616     let zero = _mm_setzero_si128().as_i16x8();
1617     transmute(simd_select_bitmask(k, mul, zero))
1618 }
1619 
1620 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1621 ///
1622 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu16&expand=3609)
1623 #[inline]
1624 #[target_feature(enable = "avx512bw")]
1625 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i1626 pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1627     transmute(vpmaxuw(a.as_u16x32(), b.as_u16x32()))
1628 }
1629 
1630 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1631 ///
1632 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu16&expand=3607)
1633 #[inline]
1634 #[target_feature(enable = "avx512bw")]
1635 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i1636 pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1637     let max = _mm512_max_epu16(a, b).as_u16x32();
1638     transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1639 }
1640 
1641 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1642 ///
1643 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu16&expand=3608)
1644 #[inline]
1645 #[target_feature(enable = "avx512bw")]
1646 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1647 pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1648     let max = _mm512_max_epu16(a, b).as_u16x32();
1649     let zero = _mm512_setzero_si512().as_u16x32();
1650     transmute(simd_select_bitmask(k, max, zero))
1651 }
1652 
1653 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1654 ///
1655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epu16&expand=3604)
1656 #[inline]
1657 #[target_feature(enable = "avx512bw,avx512vl")]
1658 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i1659 pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1660     let max = _mm256_max_epu16(a, b).as_u16x16();
1661     transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1662 }
1663 
1664 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665 ///
1666 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epu16&expand=3605)
1667 #[inline]
1668 #[target_feature(enable = "avx512bw,avx512vl")]
1669 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1670 pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1671     let max = _mm256_max_epu16(a, b).as_u16x16();
1672     let zero = _mm256_setzero_si256().as_u16x16();
1673     transmute(simd_select_bitmask(k, max, zero))
1674 }
1675 
1676 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1677 ///
1678 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epu16&expand=3601)
1679 #[inline]
1680 #[target_feature(enable = "avx512bw,avx512vl")]
1681 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1682 pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1683     let max = _mm_max_epu16(a, b).as_u16x8();
1684     transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1685 }
1686 
1687 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1688 ///
1689 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epu16&expand=3602)
1690 #[inline]
1691 #[target_feature(enable = "avx512bw,avx512vl")]
1692 #[cfg_attr(test, assert_instr(vpmaxuw))]
_mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1693 pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1694     let max = _mm_max_epu16(a, b).as_u16x8();
1695     let zero = _mm_setzero_si128().as_u16x8();
1696     transmute(simd_select_bitmask(k, max, zero))
1697 }
1698 
1699 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1700 ///
1701 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu8&expand=3636)
1702 #[inline]
1703 #[target_feature(enable = "avx512bw")]
1704 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i1705 pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1706     transmute(vpmaxub(a.as_u8x64(), b.as_u8x64()))
1707 }
1708 
1709 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1710 ///
1711 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu8&expand=3634)
1712 #[inline]
1713 #[target_feature(enable = "avx512bw")]
1714 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i1715 pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1716     let max = _mm512_max_epu8(a, b).as_u8x64();
1717     transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1718 }
1719 
1720 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1721 ///
1722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu8&expand=3635)
1723 #[inline]
1724 #[target_feature(enable = "avx512bw")]
1725 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i1726 pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1727     let max = _mm512_max_epu8(a, b).as_u8x64();
1728     let zero = _mm512_setzero_si512().as_u8x64();
1729     transmute(simd_select_bitmask(k, max, zero))
1730 }
1731 
1732 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1733 ///
1734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epu8&expand=3631)
1735 #[inline]
1736 #[target_feature(enable = "avx512bw,avx512vl")]
1737 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i1738 pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1739     let max = _mm256_max_epu8(a, b).as_u8x32();
1740     transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1741 }
1742 
1743 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1744 ///
1745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epu8&expand=3632)
1746 #[inline]
1747 #[target_feature(enable = "avx512bw,avx512vl")]
1748 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i1749 pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1750     let max = _mm256_max_epu8(a, b).as_u8x32();
1751     let zero = _mm256_setzero_si256().as_u8x32();
1752     transmute(simd_select_bitmask(k, max, zero))
1753 }
1754 
1755 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1756 ///
1757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epu8&expand=3628)
1758 #[inline]
1759 #[target_feature(enable = "avx512bw,avx512vl")]
1760 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i1761 pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1762     let max = _mm_max_epu8(a, b).as_u8x16();
1763     transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1764 }
1765 
1766 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1767 ///
1768 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epu8&expand=3629)
1769 #[inline]
1770 #[target_feature(enable = "avx512bw,avx512vl")]
1771 #[cfg_attr(test, assert_instr(vpmaxub))]
_mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i1772 pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1773     let max = _mm_max_epu8(a, b).as_u8x16();
1774     let zero = _mm_setzero_si128().as_u8x16();
1775     transmute(simd_select_bitmask(k, max, zero))
1776 }
1777 
1778 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1779 ///
1780 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi16&expand=3573)
1781 #[inline]
1782 #[target_feature(enable = "avx512bw")]
1783 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i1784 pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1785     transmute(vpmaxsw(a.as_i16x32(), b.as_i16x32()))
1786 }
1787 
1788 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1789 ///
1790 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi16&expand=3571)
1791 #[inline]
1792 #[target_feature(enable = "avx512bw")]
1793 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i1794 pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1795     let max = _mm512_max_epi16(a, b).as_i16x32();
1796     transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1797 }
1798 
1799 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1800 ///
1801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi16&expand=3572)
1802 #[inline]
1803 #[target_feature(enable = "avx512bw")]
1804 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1805 pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1806     let max = _mm512_max_epi16(a, b).as_i16x32();
1807     let zero = _mm512_setzero_si512().as_i16x32();
1808     transmute(simd_select_bitmask(k, max, zero))
1809 }
1810 
1811 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1812 ///
1813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epi16&expand=3568)
1814 #[inline]
1815 #[target_feature(enable = "avx512bw,avx512vl")]
1816 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i1817 pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1818     let max = _mm256_max_epi16(a, b).as_i16x16();
1819     transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1820 }
1821 
1822 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1823 ///
1824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epi16&expand=3569)
1825 #[inline]
1826 #[target_feature(enable = "avx512bw,avx512vl")]
1827 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1828 pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1829     let max = _mm256_max_epi16(a, b).as_i16x16();
1830     let zero = _mm256_setzero_si256().as_i16x16();
1831     transmute(simd_select_bitmask(k, max, zero))
1832 }
1833 
1834 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1835 ///
1836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epi16&expand=3565)
1837 #[inline]
1838 #[target_feature(enable = "avx512bw,avx512vl")]
1839 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1840 pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1841     let max = _mm_max_epi16(a, b).as_i16x8();
1842     transmute(simd_select_bitmask(k, max, src.as_i16x8()))
1843 }
1844 
1845 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1846 ///
1847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epi16&expand=3566)
1848 #[inline]
1849 #[target_feature(enable = "avx512bw,avx512vl")]
1850 #[cfg_attr(test, assert_instr(vpmaxsw))]
_mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i1851 pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1852     let max = _mm_max_epi16(a, b).as_i16x8();
1853     let zero = _mm_setzero_si128().as_i16x8();
1854     transmute(simd_select_bitmask(k, max, zero))
1855 }
1856 
1857 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
1858 ///
1859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi8&expand=3600)
1860 #[inline]
1861 #[target_feature(enable = "avx512bw")]
1862 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i1863 pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
1864     transmute(vpmaxsb(a.as_i8x64(), b.as_i8x64()))
1865 }
1866 
1867 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1868 ///
1869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi8&expand=3598)
1870 #[inline]
1871 #[target_feature(enable = "avx512bw")]
1872 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i1873 pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1874     let max = _mm512_max_epi8(a, b).as_i8x64();
1875     transmute(simd_select_bitmask(k, max, src.as_i8x64()))
1876 }
1877 
1878 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1879 ///
1880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi8&expand=3599)
1881 #[inline]
1882 #[target_feature(enable = "avx512bw")]
1883 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i1884 pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1885     let max = _mm512_max_epi8(a, b).as_i8x64();
1886     let zero = _mm512_setzero_si512().as_i8x64();
1887     transmute(simd_select_bitmask(k, max, zero))
1888 }
1889 
1890 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1891 ///
1892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epi8&expand=3595)
1893 #[inline]
1894 #[target_feature(enable = "avx512bw,avx512vl")]
1895 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i1896 pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1897     let max = _mm256_max_epi8(a, b).as_i8x32();
1898     transmute(simd_select_bitmask(k, max, src.as_i8x32()))
1899 }
1900 
1901 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1902 ///
1903 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epi8&expand=3596)
1904 #[inline]
1905 #[target_feature(enable = "avx512bw,avx512vl")]
1906 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i1907 pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1908     let max = _mm256_max_epi8(a, b).as_i8x32();
1909     let zero = _mm256_setzero_si256().as_i8x32();
1910     transmute(simd_select_bitmask(k, max, zero))
1911 }
1912 
1913 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1914 ///
1915 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epi8&expand=3592)
1916 #[inline]
1917 #[target_feature(enable = "avx512bw,avx512vl")]
1918 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i1919 pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1920     let max = _mm_max_epi8(a, b).as_i8x16();
1921     transmute(simd_select_bitmask(k, max, src.as_i8x16()))
1922 }
1923 
1924 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1925 ///
1926 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epi8&expand=3593)
1927 #[inline]
1928 #[target_feature(enable = "avx512bw,avx512vl")]
1929 #[cfg_attr(test, assert_instr(vpmaxsb))]
_mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i1930 pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1931     let max = _mm_max_epi8(a, b).as_i8x16();
1932     let zero = _mm_setzero_si128().as_i8x16();
1933     transmute(simd_select_bitmask(k, max, zero))
1934 }
1935 
1936 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
1937 ///
1938 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu16&expand=3723)
1939 #[inline]
1940 #[target_feature(enable = "avx512bw")]
1941 #[cfg_attr(test, assert_instr(vpminuw))]
_mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i1942 pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
1943     transmute(vpminuw(a.as_u16x32(), b.as_u16x32()))
1944 }
1945 
1946 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1947 ///
1948 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu16&expand=3721)
1949 #[inline]
1950 #[target_feature(enable = "avx512bw")]
1951 #[cfg_attr(test, assert_instr(vpminuw))]
_mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i1952 pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1953     let min = _mm512_min_epu16(a, b).as_u16x32();
1954     transmute(simd_select_bitmask(k, min, src.as_u16x32()))
1955 }
1956 
1957 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1958 ///
1959 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu16&expand=3722)
1960 #[inline]
1961 #[target_feature(enable = "avx512bw")]
1962 #[cfg_attr(test, assert_instr(vpminuw))]
_mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i1963 pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1964     let min = _mm512_min_epu16(a, b).as_u16x32();
1965     let zero = _mm512_setzero_si512().as_u16x32();
1966     transmute(simd_select_bitmask(k, min, zero))
1967 }
1968 
1969 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1970 ///
1971 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epu16&expand=3718)
1972 #[inline]
1973 #[target_feature(enable = "avx512bw,avx512vl")]
1974 #[cfg_attr(test, assert_instr(vpminuw))]
_mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i1975 pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1976     let min = _mm256_min_epu16(a, b).as_u16x16();
1977     transmute(simd_select_bitmask(k, min, src.as_u16x16()))
1978 }
1979 
1980 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1981 ///
1982 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epu16&expand=3719)
1983 #[inline]
1984 #[target_feature(enable = "avx512bw,avx512vl")]
1985 #[cfg_attr(test, assert_instr(vpminuw))]
_mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i1986 pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1987     let min = _mm256_min_epu16(a, b).as_u16x16();
1988     let zero = _mm256_setzero_si256().as_u16x16();
1989     transmute(simd_select_bitmask(k, min, zero))
1990 }
1991 
1992 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1993 ///
1994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epu16&expand=3715)
1995 #[inline]
1996 #[target_feature(enable = "avx512bw,avx512vl")]
1997 #[cfg_attr(test, assert_instr(vpminuw))]
_mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i1998 pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1999     let min = _mm_min_epu16(a, b).as_u16x8();
2000     transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2001 }
2002 
2003 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2004 ///
2005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epu16&expand=3716)
2006 #[inline]
2007 #[target_feature(enable = "avx512bw,avx512vl")]
2008 #[cfg_attr(test, assert_instr(vpminuw))]
_mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i2009 pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2010     let min = _mm_min_epu16(a, b).as_u16x8();
2011     let zero = _mm_setzero_si128().as_u16x8();
2012     transmute(simd_select_bitmask(k, min, zero))
2013 }
2014 
2015 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2016 ///
2017 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu8&expand=3750)
2018 #[inline]
2019 #[target_feature(enable = "avx512bw")]
2020 #[cfg_attr(test, assert_instr(vpminub))]
_mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i2021 pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2022     transmute(vpminub(a.as_u8x64(), b.as_u8x64()))
2023 }
2024 
2025 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2026 ///
2027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu8&expand=3748)
2028 #[inline]
2029 #[target_feature(enable = "avx512bw")]
2030 #[cfg_attr(test, assert_instr(vpminub))]
_mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i2031 pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2032     let min = _mm512_min_epu8(a, b).as_u8x64();
2033     transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2034 }
2035 
2036 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2037 ///
2038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu8&expand=3749)
2039 #[inline]
2040 #[target_feature(enable = "avx512bw")]
2041 #[cfg_attr(test, assert_instr(vpminub))]
_mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i2042 pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2043     let min = _mm512_min_epu8(a, b).as_u8x64();
2044     let zero = _mm512_setzero_si512().as_u8x64();
2045     transmute(simd_select_bitmask(k, min, zero))
2046 }
2047 
2048 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2049 ///
2050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epu8&expand=3745)
2051 #[inline]
2052 #[target_feature(enable = "avx512bw,avx512vl")]
2053 #[cfg_attr(test, assert_instr(vpminub))]
_mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i2054 pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2055     let min = _mm256_min_epu8(a, b).as_u8x32();
2056     transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2057 }
2058 
2059 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2060 ///
2061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epu8&expand=3746)
2062 #[inline]
2063 #[target_feature(enable = "avx512bw,avx512vl")]
2064 #[cfg_attr(test, assert_instr(vpminub))]
_mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i2065 pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2066     let min = _mm256_min_epu8(a, b).as_u8x32();
2067     let zero = _mm256_setzero_si256().as_u8x32();
2068     transmute(simd_select_bitmask(k, min, zero))
2069 }
2070 
2071 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2072 ///
2073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epu8&expand=3742)
2074 #[inline]
2075 #[target_feature(enable = "avx512bw,avx512vl")]
2076 #[cfg_attr(test, assert_instr(vpminub))]
_mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i2077 pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2078     let min = _mm_min_epu8(a, b).as_u8x16();
2079     transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2080 }
2081 
2082 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2083 ///
2084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epu8&expand=3743)
2085 #[inline]
2086 #[target_feature(enable = "avx512bw,avx512vl")]
2087 #[cfg_attr(test, assert_instr(vpminub))]
_mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i2088 pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2089     let min = _mm_min_epu8(a, b).as_u8x16();
2090     let zero = _mm_setzero_si128().as_u8x16();
2091     transmute(simd_select_bitmask(k, min, zero))
2092 }
2093 
2094 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2095 ///
2096 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi16&expand=3687)
2097 #[inline]
2098 #[target_feature(enable = "avx512bw")]
2099 #[cfg_attr(test, assert_instr(vpminsw))]
_mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i2100 pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2101     transmute(vpminsw(a.as_i16x32(), b.as_i16x32()))
2102 }
2103 
2104 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2105 ///
2106 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi16&expand=3685)
2107 #[inline]
2108 #[target_feature(enable = "avx512bw")]
2109 #[cfg_attr(test, assert_instr(vpminsw))]
_mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i2110 pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2111     let min = _mm512_min_epi16(a, b).as_i16x32();
2112     transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2113 }
2114 
2115 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116 ///
2117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi16&expand=3686)
2118 #[inline]
2119 #[target_feature(enable = "avx512bw")]
2120 #[cfg_attr(test, assert_instr(vpminsw))]
_mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i2121 pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2122     let min = _mm512_min_epi16(a, b).as_i16x32();
2123     let zero = _mm512_setzero_si512().as_i16x32();
2124     transmute(simd_select_bitmask(k, min, zero))
2125 }
2126 
2127 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2128 ///
2129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epi16&expand=3682)
2130 #[inline]
2131 #[target_feature(enable = "avx512bw,avx512vl")]
2132 #[cfg_attr(test, assert_instr(vpminsw))]
_mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i2133 pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2134     let min = _mm256_min_epi16(a, b).as_i16x16();
2135     transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2136 }
2137 
2138 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2139 ///
2140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epi16&expand=3683)
2141 #[inline]
2142 #[target_feature(enable = "avx512bw,avx512vl")]
2143 #[cfg_attr(test, assert_instr(vpminsw))]
_mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i2144 pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2145     let min = _mm256_min_epi16(a, b).as_i16x16();
2146     let zero = _mm256_setzero_si256().as_i16x16();
2147     transmute(simd_select_bitmask(k, min, zero))
2148 }
2149 
2150 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2151 ///
2152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi16&expand=3679)
2153 #[inline]
2154 #[target_feature(enable = "avx512bw,avx512vl")]
2155 #[cfg_attr(test, assert_instr(vpminsw))]
_mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i2156 pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2157     let min = _mm_min_epi16(a, b).as_i16x8();
2158     transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2159 }
2160 
2161 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2162 ///
2163 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi16&expand=3680)
2164 #[inline]
2165 #[target_feature(enable = "avx512bw,avx512vl")]
2166 #[cfg_attr(test, assert_instr(vpminsw))]
_mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i2167 pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2168     let min = _mm_min_epi16(a, b).as_i16x8();
2169     let zero = _mm_setzero_si128().as_i16x8();
2170     transmute(simd_select_bitmask(k, min, zero))
2171 }
2172 
2173 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2174 ///
2175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi8&expand=3714)
2176 #[inline]
2177 #[target_feature(enable = "avx512bw")]
2178 #[cfg_attr(test, assert_instr(vpminsb))]
_mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i2179 pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2180     transmute(vpminsb(a.as_i8x64(), b.as_i8x64()))
2181 }
2182 
2183 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2184 ///
2185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi8&expand=3712)
2186 #[inline]
2187 #[target_feature(enable = "avx512bw")]
2188 #[cfg_attr(test, assert_instr(vpminsb))]
_mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i2189 pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2190     let min = _mm512_min_epi8(a, b).as_i8x64();
2191     transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2192 }
2193 
2194 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2195 ///
2196 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi8&expand=3713)
2197 #[inline]
2198 #[target_feature(enable = "avx512bw")]
2199 #[cfg_attr(test, assert_instr(vpminsb))]
_mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i2200 pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2201     let min = _mm512_min_epi8(a, b).as_i8x64();
2202     let zero = _mm512_setzero_si512().as_i8x64();
2203     transmute(simd_select_bitmask(k, min, zero))
2204 }
2205 
2206 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2207 ///
2208 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epi8&expand=3709)
2209 #[inline]
2210 #[target_feature(enable = "avx512bw,avx512vl")]
2211 #[cfg_attr(test, assert_instr(vpminsb))]
_mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i2212 pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2213     let min = _mm256_min_epi8(a, b).as_i8x32();
2214     transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2215 }
2216 
2217 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2218 ///
2219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epi8&expand=3710)
2220 #[inline]
2221 #[target_feature(enable = "avx512bw,avx512vl")]
2222 #[cfg_attr(test, assert_instr(vpminsb))]
_mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i2223 pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2224     let min = _mm256_min_epi8(a, b).as_i8x32();
2225     let zero = _mm256_setzero_si256().as_i8x32();
2226     transmute(simd_select_bitmask(k, min, zero))
2227 }
2228 
2229 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2230 ///
2231 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi8&expand=3706)
2232 #[inline]
2233 #[target_feature(enable = "avx512bw,avx512vl")]
2234 #[cfg_attr(test, assert_instr(vpminsb))]
_mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i2235 pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2236     let min = _mm_min_epi8(a, b).as_i8x16();
2237     transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2238 }
2239 
2240 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2241 ///
2242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi8&expand=3707)
2243 #[inline]
2244 #[target_feature(enable = "avx512bw,avx512vl")]
2245 #[cfg_attr(test, assert_instr(vpminsb))]
_mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i2246 pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2247     let min = _mm_min_epi8(a, b).as_i8x16();
2248     let zero = _mm_setzero_si128().as_i8x16();
2249     transmute(simd_select_bitmask(k, min, zero))
2250 }
2251 
2252 /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2253 ///
2254 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050)
2255 #[inline]
2256 #[target_feature(enable = "avx512bw")]
2257 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask322258 pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2259     simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32()))
2260 }
2261 
2262 /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2263 ///
2264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2265 #[inline]
2266 #[target_feature(enable = "avx512bw")]
2267 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322268 pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2269     _mm512_cmplt_epu16_mask(a, b) & k1
2270 }
2271 
2272 /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2273 ///
2274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050)
2275 #[inline]
2276 #[target_feature(enable = "avx512bw,avx512vl")]
2277 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask162278 pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2279     simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16()))
2280 }
2281 
2282 /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2283 ///
2284 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2285 #[inline]
2286 #[target_feature(enable = "avx512bw,avx512vl")]
2287 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask162288 pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2289     _mm256_cmplt_epu16_mask(a, b) & k1
2290 }
2291 
2292 /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2293 ///
2294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16_mask&expand=1018)
2295 #[inline]
2296 #[target_feature(enable = "avx512bw,avx512vl")]
2297 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask82298 pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2299     simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8()))
2300 }
2301 
2302 /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2303 ///
2304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2305 #[inline]
2306 #[target_feature(enable = "avx512bw,avx512vl")]
2307 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask82308 pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2309     _mm_cmplt_epu16_mask(a, b) & k1
2310 }
2311 
2312 /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2313 ///
2314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_cmplt_epu8_mask&expand=1068)
2315 #[inline]
2316 #[target_feature(enable = "avx512bw")]
2317 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask642318 pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2319     simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64()))
2320 }
2321 
2322 /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2323 ///
2324 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2325 #[inline]
2326 #[target_feature(enable = "avx512bw")]
2327 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask642328 pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2329     _mm512_cmplt_epu8_mask(a, b) & k1
2330 }
2331 
2332 /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2333 ///
2334 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epu8_mask&expand=1066)
2335 #[inline]
2336 #[target_feature(enable = "avx512bw,avx512vl")]
2337 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask322338 pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2339     simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32()))
2340 }
2341 
2342 /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2343 ///
2344 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
2345 #[inline]
2346 #[target_feature(enable = "avx512bw,avx512vl")]
2347 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask322348 pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2349     _mm256_cmplt_epu8_mask(a, b) & k1
2350 }
2351 
2352 /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2353 ///
2354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epu8_mask&expand=1064)
2355 #[inline]
2356 #[target_feature(enable = "avx512bw,avx512vl")]
2357 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask162358 pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2359     simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16()))
2360 }
2361 
2362 /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2363 ///
2364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epu8_mask&expand=1065)
2365 #[inline]
2366 #[target_feature(enable = "avx512bw,avx512vl")]
2367 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask162368 pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2369     _mm_cmplt_epu8_mask(a, b) & k1
2370 }
2371 
2372 /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2373 ///
2374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi16_mask&expand=1022)
2375 #[inline]
2376 #[target_feature(enable = "avx512bw")]
2377 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask322378 pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2379     simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32()))
2380 }
2381 
2382 /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2383 ///
2384 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
2385 #[inline]
2386 #[target_feature(enable = "avx512bw")]
2387 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322388 pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2389     _mm512_cmplt_epi16_mask(a, b) & k1
2390 }
2391 
2392 /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2393 ///
2394 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epi16_mask&expand=1020)
2395 #[inline]
2396 #[target_feature(enable = "avx512bw,avx512vl")]
2397 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask162398 pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2399     simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16()))
2400 }
2401 
2402 /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2403 ///
2404 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
2405 #[inline]
2406 #[target_feature(enable = "avx512bw,avx512vl")]
2407 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask162408 pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2409     _mm256_cmplt_epi16_mask(a, b) & k1
2410 }
2411 
2412 /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
2413 ///
2414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16_mask&expand=1018)
2415 #[inline]
2416 #[target_feature(enable = "avx512bw,avx512vl")]
2417 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask82418 pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2419     simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
2420 }
2421 
2422 /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2423 ///
2424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2425 #[inline]
2426 #[target_feature(enable = "avx512bw,avx512vl")]
2427 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask82428 pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2429     _mm_cmplt_epi16_mask(a, b) & k1
2430 }
2431 
2432 /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2433 ///
2434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi8_mask&expand=1044)
2435 #[inline]
2436 #[target_feature(enable = "avx512bw")]
2437 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask642438 pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2439     simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64()))
2440 }
2441 
2442 /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2443 ///
2444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
2445 #[inline]
2446 #[target_feature(enable = "avx512bw")]
2447 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask642448 pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2449     _mm512_cmplt_epi8_mask(a, b) & k1
2450 }
2451 
2452 /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2453 ///
2454 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epi8_mask&expand=1042)
2455 #[inline]
2456 #[target_feature(enable = "avx512bw,avx512vl")]
2457 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask322458 pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2459     simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32()))
2460 }
2461 
2462 /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2463 ///
2464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
2465 #[inline]
2466 #[target_feature(enable = "avx512bw,avx512vl")]
2467 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask322468 pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2469     _mm256_cmplt_epi8_mask(a, b) & k1
2470 }
2471 
2472 /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
2473 ///
2474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8_mask&expand=1040)
2475 #[inline]
2476 #[target_feature(enable = "avx512bw,avx512vl")]
2477 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask162478 pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2479     simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
2480 }
2481 
2482 /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2483 ///
2484 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi8_mask&expand=1041)
2485 #[inline]
2486 #[target_feature(enable = "avx512bw,avx512vl")]
2487 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask162488 pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2489     _mm_cmplt_epi8_mask(a, b) & k1
2490 }
2491 
2492 /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2493 ///
2494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu16_mask&expand=927)
2495 #[inline]
2496 #[target_feature(enable = "avx512bw")]
2497 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask322498 pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2499     simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32()))
2500 }
2501 
2502 /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2503 ///
2504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
2505 #[inline]
2506 #[target_feature(enable = "avx512bw")]
2507 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322508 pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2509     _mm512_cmpgt_epu16_mask(a, b) & k1
2510 }
2511 
2512 /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2513 ///
2514 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epu16_mask&expand=925)
2515 #[inline]
2516 #[target_feature(enable = "avx512bw,avx512vl")]
2517 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask162518 pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2519     simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16()))
2520 }
2521 
2522 /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2523 ///
2524 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
2525 #[inline]
2526 #[target_feature(enable = "avx512bw,avx512vl")]
2527 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask162528 pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2529     _mm256_cmpgt_epu16_mask(a, b) & k1
2530 }
2531 
2532 /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2533 ///
2534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epu16_mask&expand=923)
2535 #[inline]
2536 #[target_feature(enable = "avx512bw,avx512vl")]
2537 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask82538 pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2539     simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8()))
2540 }
2541 
2542 /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2543 ///
2544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epu16_mask&expand=924)
2545 #[inline]
2546 #[target_feature(enable = "avx512bw,avx512vl")]
2547 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask82548 pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2549     _mm_cmpgt_epu16_mask(a, b) & k1
2550 }
2551 
2552 /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2553 ///
2554 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu8_mask&expand=945)
2555 #[inline]
2556 #[target_feature(enable = "avx512bw")]
2557 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask642558 pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2559     simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64()))
2560 }
2561 
2562 /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2563 ///
2564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
2565 #[inline]
2566 #[target_feature(enable = "avx512bw")]
2567 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask642568 pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2569     _mm512_cmpgt_epu8_mask(a, b) & k1
2570 }
2571 
2572 /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2573 ///
2574 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epu8_mask&expand=943)
2575 #[inline]
2576 #[target_feature(enable = "avx512bw,avx512vl")]
2577 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask322578 pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2579     simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32()))
2580 }
2581 
2582 /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2583 ///
2584 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
2585 #[inline]
2586 #[target_feature(enable = "avx512bw,avx512vl")]
2587 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask322588 pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2589     _mm256_cmpgt_epu8_mask(a, b) & k1
2590 }
2591 
2592 /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2593 ///
2594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epu8_mask&expand=941)
2595 #[inline]
2596 #[target_feature(enable = "avx512bw,avx512vl")]
2597 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask162598 pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2599     simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16()))
2600 }
2601 
2602 /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2603 ///
2604 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epu8_mask&expand=942)
2605 #[inline]
2606 #[target_feature(enable = "avx512bw,avx512vl")]
2607 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask162608 pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2609     _mm_cmpgt_epu8_mask(a, b) & k1
2610 }
2611 
2612 /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2613 ///
2614 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi16_mask&expand=897)
2615 #[inline]
2616 #[target_feature(enable = "avx512bw")]
2617 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask322618 pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2619     simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32()))
2620 }
2621 
2622 /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2623 ///
2624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
2625 #[inline]
2626 #[target_feature(enable = "avx512bw")]
2627 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322628 pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2629     _mm512_cmpgt_epi16_mask(a, b) & k1
2630 }
2631 
2632 /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2633 ///
2634 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16_mask&expand=895)
2635 #[inline]
2636 #[target_feature(enable = "avx512bw,avx512vl")]
2637 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask162638 pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2639     simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16()))
2640 }
2641 
2642 /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2643 ///
2644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
2645 #[inline]
2646 #[target_feature(enable = "avx512bw,avx512vl")]
2647 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask162648 pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2649     _mm256_cmpgt_epi16_mask(a, b) & k1
2650 }
2651 
2652 /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
2653 ///
2654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16_mask&expand=893)
2655 #[inline]
2656 #[target_feature(enable = "avx512bw,avx512vl")]
2657 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask82658 pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2659     simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
2660 }
2661 
2662 /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2663 ///
2664 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epi16_mask&expand=894)
2665 #[inline]
2666 #[target_feature(enable = "avx512bw,avx512vl")]
2667 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask82668 pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2669     _mm_cmpgt_epi16_mask(a, b) & k1
2670 }
2671 
2672 /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2673 ///
2674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi8_mask&expand=921)
2675 #[inline]
2676 #[target_feature(enable = "avx512bw")]
2677 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask642678 pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2679     simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64()))
2680 }
2681 
2682 /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2683 ///
2684 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
2685 #[inline]
2686 #[target_feature(enable = "avx512bw")]
2687 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask642688 pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2689     _mm512_cmpgt_epi8_mask(a, b) & k1
2690 }
2691 
2692 /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2693 ///
2694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8_mask&expand=919)
2695 #[inline]
2696 #[target_feature(enable = "avx512bw,avx512vl")]
2697 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask322698 pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2699     simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32()))
2700 }
2701 
2702 /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2703 ///
2704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
2705 #[inline]
2706 #[target_feature(enable = "avx512bw,avx512vl")]
2707 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask322708 pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2709     _mm256_cmpgt_epi8_mask(a, b) & k1
2710 }
2711 
2712 /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
2713 ///
2714 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8_mask&expand=917)
2715 #[inline]
2716 #[target_feature(enable = "avx512bw,avx512vl")]
2717 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask162718 pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2719     simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
2720 }
2721 
2722 /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2723 ///
2724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epi8_mask&expand=918)
2725 #[inline]
2726 #[target_feature(enable = "avx512bw,avx512vl")]
2727 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask162728 pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2729     _mm_cmpgt_epi8_mask(a, b) & k1
2730 }
2731 
2732 /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2733 ///
2734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu16_mask&expand=989)
2735 #[inline]
2736 #[target_feature(enable = "avx512bw")]
2737 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask322738 pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2739     simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32()))
2740 }
2741 
2742 /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2743 ///
2744 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu16_mask&expand=990)
2745 #[inline]
2746 #[target_feature(enable = "avx512bw")]
2747 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322748 pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2749     _mm512_cmple_epu16_mask(a, b) & k1
2750 }
2751 
2752 /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2753 ///
2754 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epu16_mask&expand=987)
2755 #[inline]
2756 #[target_feature(enable = "avx512bw,avx512vl")]
2757 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask162758 pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2759     simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16()))
2760 }
2761 
2762 /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2763 ///
2764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epu16_mask&expand=988)
2765 #[inline]
2766 #[target_feature(enable = "avx512bw,avx512vl")]
2767 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask162768 pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2769     _mm256_cmple_epu16_mask(a, b) & k1
2770 }
2771 
2772 /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2773 ///
2774 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epu16_mask&expand=985)
2775 #[inline]
2776 #[target_feature(enable = "avx512bw,avx512vl")]
2777 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask82778 pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2779     simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8()))
2780 }
2781 
2782 /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2783 ///
2784 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epu16_mask&expand=986)
2785 #[inline]
2786 #[target_feature(enable = "avx512bw,avx512vl")]
2787 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask82788 pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2789     _mm_cmple_epu16_mask(a, b) & k1
2790 }
2791 
2792 /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2793 ///
2794 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu8_mask&expand=1007)
2795 #[inline]
2796 #[target_feature(enable = "avx512bw")]
2797 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask642798 pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2799     simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64()))
2800 }
2801 
2802 /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2803 ///
2804 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu8_mask&expand=1008)
2805 #[inline]
2806 #[target_feature(enable = "avx512bw")]
2807 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask642808 pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2809     _mm512_cmple_epu8_mask(a, b) & k1
2810 }
2811 
2812 /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2813 ///
2814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epu8_mask&expand=1005)
2815 #[inline]
2816 #[target_feature(enable = "avx512bw,avx512vl")]
2817 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask322818 pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2819     simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32()))
2820 }
2821 
2822 /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2823 ///
2824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epu8_mask&expand=1006)
2825 #[inline]
2826 #[target_feature(enable = "avx512bw,avx512vl")]
2827 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask322828 pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2829     _mm256_cmple_epu8_mask(a, b) & k1
2830 }
2831 
2832 /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2833 ///
2834 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epu8_mask&expand=1003)
2835 #[inline]
2836 #[target_feature(enable = "avx512bw,avx512vl")]
2837 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask162838 pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2839     simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16()))
2840 }
2841 
2842 /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2843 ///
2844 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epu8_mask&expand=1004)
2845 #[inline]
2846 #[target_feature(enable = "avx512bw,avx512vl")]
2847 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask162848 pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2849     _mm_cmple_epu8_mask(a, b) & k1
2850 }
2851 
2852 /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2853 ///
2854 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi16_mask&expand=965)
2855 #[inline]
2856 #[target_feature(enable = "avx512bw")]
2857 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask322858 pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2859     simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32()))
2860 }
2861 
2862 /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2863 ///
2864 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi16_mask&expand=966)
2865 #[inline]
2866 #[target_feature(enable = "avx512bw")]
2867 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322868 pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2869     _mm512_cmple_epi16_mask(a, b) & k1
2870 }
2871 
2872 /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2873 ///
2874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epi16_mask&expand=963)
2875 #[inline]
2876 #[target_feature(enable = "avx512bw,avx512vl")]
2877 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask162878 pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2879     simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16()))
2880 }
2881 
2882 /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2883 ///
2884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epi16_mask&expand=964)
2885 #[inline]
2886 #[target_feature(enable = "avx512bw,avx512vl")]
2887 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask162888 pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2889     _mm256_cmple_epi16_mask(a, b) & k1
2890 }
2891 
2892 /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2893 ///
2894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epi16_mask&expand=961)
2895 #[inline]
2896 #[target_feature(enable = "avx512bw,avx512vl")]
2897 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask82898 pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2899     simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8()))
2900 }
2901 
2902 /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2903 ///
2904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epi16_mask&expand=962)
2905 #[inline]
2906 #[target_feature(enable = "avx512bw,avx512vl")]
2907 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask82908 pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2909     _mm_cmple_epi16_mask(a, b) & k1
2910 }
2911 
2912 /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2913 ///
2914 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi8_mask&expand=983)
2915 #[inline]
2916 #[target_feature(enable = "avx512bw")]
2917 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask642918 pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2919     simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64()))
2920 }
2921 
2922 /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2923 ///
2924 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi8_mask&expand=984)
2925 #[inline]
2926 #[target_feature(enable = "avx512bw")]
2927 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask642928 pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2929     _mm512_cmple_epi8_mask(a, b) & k1
2930 }
2931 
2932 /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2933 ///
2934 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epi8_mask&expand=981)
2935 #[inline]
2936 #[target_feature(enable = "avx512bw,avx512vl")]
2937 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask322938 pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2939     simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32()))
2940 }
2941 
2942 /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2943 ///
2944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epi8_mask&expand=982)
2945 #[inline]
2946 #[target_feature(enable = "avx512bw,avx512vl")]
2947 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask322948 pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2949     _mm256_cmple_epi8_mask(a, b) & k1
2950 }
2951 
2952 /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
2953 ///
2954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epi8_mask&expand=979)
2955 #[inline]
2956 #[target_feature(enable = "avx512bw,avx512vl")]
2957 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask162958 pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2959     simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16()))
2960 }
2961 
2962 /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2963 ///
2964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epi8_mask&expand=980)
2965 #[inline]
2966 #[target_feature(enable = "avx512bw,avx512vl")]
2967 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask162968 pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2969     _mm_cmple_epi8_mask(a, b) & k1
2970 }
2971 
2972 /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
2973 ///
2974 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu16_mask&expand=867)
2975 #[inline]
2976 #[target_feature(enable = "avx512bw")]
2977 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask322978 pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2979     simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32()))
2980 }
2981 
2982 /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2983 ///
2984 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu16_mask&expand=868)
2985 #[inline]
2986 #[target_feature(enable = "avx512bw")]
2987 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask322988 pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2989     _mm512_cmpge_epu16_mask(a, b) & k1
2990 }
2991 
2992 /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
2993 ///
2994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epu16_mask&expand=865)
2995 #[inline]
2996 #[target_feature(enable = "avx512bw,avx512vl")]
2997 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask162998 pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2999     simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16()))
3000 }
3001 
3002 /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3003 ///
3004 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epu16_mask&expand=866)
3005 #[inline]
3006 #[target_feature(enable = "avx512bw,avx512vl")]
3007 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask163008 pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3009     _mm256_cmpge_epu16_mask(a, b) & k1
3010 }
3011 
3012 /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3013 ///
3014 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epu16_mask&expand=863)
3015 #[inline]
3016 #[target_feature(enable = "avx512bw,avx512vl")]
3017 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask83018 pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3019     simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8()))
3020 }
3021 
3022 /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3023 ///
3024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epu16_mask&expand=864)
3025 #[inline]
3026 #[target_feature(enable = "avx512bw,avx512vl")]
3027 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask83028 pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3029     _mm_cmpge_epu16_mask(a, b) & k1
3030 }
3031 
3032 /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3033 ///
3034 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu8_mask&expand=885)
3035 #[inline]
3036 #[target_feature(enable = "avx512bw")]
3037 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask643038 pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3039     simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64()))
3040 }
3041 
3042 /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3043 ///
3044 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu8_mask&expand=886)
3045 #[inline]
3046 #[target_feature(enable = "avx512bw")]
3047 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask643048 pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3049     _mm512_cmpge_epu8_mask(a, b) & k1
3050 }
3051 
3052 /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3053 ///
3054 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epu8_mask&expand=883)
3055 #[inline]
3056 #[target_feature(enable = "avx512bw,avx512vl")]
3057 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask323058 pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3059     simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32()))
3060 }
3061 
3062 /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3063 ///
3064 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epu8_mask&expand=884)
3065 #[inline]
3066 #[target_feature(enable = "avx512bw,avx512vl")]
3067 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask323068 pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3069     _mm256_cmpge_epu8_mask(a, b) & k1
3070 }
3071 
3072 /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3073 ///
3074 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epu8_mask&expand=881)
3075 #[inline]
3076 #[target_feature(enable = "avx512bw,avx512vl")]
3077 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask163078 pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3079     simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16()))
3080 }
3081 
3082 /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3083 ///
3084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epu8_mask&expand=882)
3085 #[inline]
3086 #[target_feature(enable = "avx512bw,avx512vl")]
3087 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask163088 pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3089     _mm_cmpge_epu8_mask(a, b) & k1
3090 }
3091 
3092 /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3093 ///
3094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi16_mask&expand=843)
3095 #[inline]
3096 #[target_feature(enable = "avx512bw")]
3097 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask323098 pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3099     simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32()))
3100 }
3101 
3102 /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3103 ///
3104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3105 #[inline]
3106 #[target_feature(enable = "avx512bw")]
3107 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask323108 pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3109     _mm512_cmpge_epi16_mask(a, b) & k1
3110 }
3111 
3112 /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3113 ///
3114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epi16_mask&expand=841)
3115 #[inline]
3116 #[target_feature(enable = "avx512bw,avx512vl")]
3117 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask163118 pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3119     simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16()))
3120 }
3121 
3122 /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3123 ///
3124 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3125 #[inline]
3126 #[target_feature(enable = "avx512bw,avx512vl")]
3127 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask163128 pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3129     _mm256_cmpge_epi16_mask(a, b) & k1
3130 }
3131 
3132 /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3133 ///
3134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epi16_mask&expand=839)
3135 #[inline]
3136 #[target_feature(enable = "avx512bw,avx512vl")]
3137 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask83138 pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3139     simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8()))
3140 }
3141 
3142 /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3143 ///
3144 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epi16_mask&expand=840)
3145 #[inline]
3146 #[target_feature(enable = "avx512bw,avx512vl")]
3147 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask83148 pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3149     _mm_cmpge_epi16_mask(a, b) & k1
3150 }
3151 
3152 /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3153 ///
3154 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi8_mask&expand=861)
3155 #[inline]
3156 #[target_feature(enable = "avx512bw")]
3157 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask643158 pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3159     simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64()))
3160 }
3161 
3162 /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3163 ///
3164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3165 #[inline]
3166 #[target_feature(enable = "avx512bw")]
3167 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask643168 pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3169     _mm512_cmpge_epi8_mask(a, b) & k1
3170 }
3171 
3172 /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3173 ///
3174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epi8_mask&expand=859)
3175 #[inline]
3176 #[target_feature(enable = "avx512bw,avx512vl")]
3177 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask323178 pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3179     simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32()))
3180 }
3181 
3182 /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3183 ///
3184 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3185 #[inline]
3186 #[target_feature(enable = "avx512bw,avx512vl")]
3187 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask323188 pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3189     _mm256_cmpge_epi8_mask(a, b) & k1
3190 }
3191 
3192 /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3193 ///
3194 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epi8_mask&expand=857)
3195 #[inline]
3196 #[target_feature(enable = "avx512bw,avx512vl")]
3197 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask163198 pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3199     simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16()))
3200 }
3201 
3202 /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3203 ///
3204 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epi8_mask&expand=858)
3205 #[inline]
3206 #[target_feature(enable = "avx512bw,avx512vl")]
3207 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask163208 pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3209     _mm_cmpge_epi8_mask(a, b) & k1
3210 }
3211 
3212 /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3213 ///
3214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu16_mask&expand=801)
3215 #[inline]
3216 #[target_feature(enable = "avx512bw")]
3217 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask323218 pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3219     simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32()))
3220 }
3221 
3222 /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3223 ///
3224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3225 #[inline]
3226 #[target_feature(enable = "avx512bw")]
3227 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask323228 pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3229     _mm512_cmpeq_epu16_mask(a, b) & k1
3230 }
3231 
3232 /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3233 ///
3234 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epu16_mask&expand=799)
3235 #[inline]
3236 #[target_feature(enable = "avx512bw,avx512vl")]
3237 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask163238 pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3239     simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16()))
3240 }
3241 
3242 /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3243 ///
3244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3245 #[inline]
3246 #[target_feature(enable = "avx512bw,avx512vl")]
3247 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask163248 pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3249     _mm256_cmpeq_epu16_mask(a, b) & k1
3250 }
3251 
3252 /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3253 ///
3254 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epu16_mask&expand=797)
3255 #[inline]
3256 #[target_feature(enable = "avx512bw,avx512vl")]
3257 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask83258 pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3259     simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8()))
3260 }
3261 
3262 /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3263 ///
3264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3265 #[inline]
3266 #[target_feature(enable = "avx512bw,avx512vl")]
3267 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask83268 pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3269     _mm_cmpeq_epu16_mask(a, b) & k1
3270 }
3271 
3272 /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3273 ///
3274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu8_mask&expand=819)
3275 #[inline]
3276 #[target_feature(enable = "avx512bw")]
3277 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask643278 pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3279     simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64()))
3280 }
3281 
3282 /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3283 ///
3284 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3285 #[inline]
3286 #[target_feature(enable = "avx512bw")]
3287 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask643288 pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3289     _mm512_cmpeq_epu8_mask(a, b) & k1
3290 }
3291 
3292 /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3293 ///
3294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epu8_mask&expand=817)
3295 #[inline]
3296 #[target_feature(enable = "avx512bw,avx512vl")]
3297 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask323298 pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3299     simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32()))
3300 }
3301 
3302 /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3303 ///
3304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3305 #[inline]
3306 #[target_feature(enable = "avx512bw,avx512vl")]
3307 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask323308 pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3309     _mm256_cmpeq_epu8_mask(a, b) & k1
3310 }
3311 
3312 /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3313 ///
3314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epu8_mask&expand=815)
3315 #[inline]
3316 #[target_feature(enable = "avx512bw,avx512vl")]
3317 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask163318 pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3319     simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16()))
3320 }
3321 
3322 /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3323 ///
3324 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3325 #[inline]
3326 #[target_feature(enable = "avx512bw,avx512vl")]
3327 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask163328 pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3329     _mm_cmpeq_epu8_mask(a, b) & k1
3330 }
3331 
3332 /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3333 ///
3334 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi16_mask&expand=771)
3335 #[inline]
3336 #[target_feature(enable = "avx512bw")]
3337 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask323338 pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3339     simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32()))
3340 }
3341 
3342 /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3343 ///
3344 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3345 #[inline]
3346 #[target_feature(enable = "avx512bw")]
3347 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask323348 pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3349     _mm512_cmpeq_epi16_mask(a, b) & k1
3350 }
3351 
3352 /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3353 ///
3354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16_mask&expand=769)
3355 #[inline]
3356 #[target_feature(enable = "avx512bw,avx512vl")]
3357 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask163358 pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3359     simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16()))
3360 }
3361 
3362 /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3363 ///
3364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3365 #[inline]
3366 #[target_feature(enable = "avx512bw,avx512vl")]
3367 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask163368 pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3369     _mm256_cmpeq_epi16_mask(a, b) & k1
3370 }
3371 
3372 /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3373 ///
3374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16_mask&expand=767)
3375 #[inline]
3376 #[target_feature(enable = "avx512bw,avx512vl")]
3377 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask83378 pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3379     simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
3380 }
3381 
3382 /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3383 ///
3384 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3385 #[inline]
3386 #[target_feature(enable = "avx512bw,avx512vl")]
3387 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask83388 pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3389     _mm_cmpeq_epi16_mask(a, b) & k1
3390 }
3391 
3392 /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3393 ///
3394 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi8_mask&expand=795)
3395 #[inline]
3396 #[target_feature(enable = "avx512bw")]
3397 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask643398 pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3399     simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64()))
3400 }
3401 
3402 /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3403 ///
3404 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3405 #[inline]
3406 #[target_feature(enable = "avx512bw")]
3407 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask643408 pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3409     _mm512_cmpeq_epi8_mask(a, b) & k1
3410 }
3411 
3412 /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3413 ///
3414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8_mask&expand=793)
3415 #[inline]
3416 #[target_feature(enable = "avx512bw,avx512vl")]
3417 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask323418 pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3419     simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32()))
3420 }
3421 
3422 /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3423 ///
3424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3425 #[inline]
3426 #[target_feature(enable = "avx512bw,avx512vl")]
3427 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask323428 pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3429     _mm256_cmpeq_epi8_mask(a, b) & k1
3430 }
3431 
3432 /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3433 ///
3434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8_mask&expand=791)
3435 #[inline]
3436 #[target_feature(enable = "avx512bw,avx512vl")]
3437 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask163438 pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3439     simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
3440 }
3441 
3442 /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3443 ///
3444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3445 #[inline]
3446 #[target_feature(enable = "avx512bw,avx512vl")]
3447 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask163448 pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3449     _mm_cmpeq_epi8_mask(a, b) & k1
3450 }
3451 
3452 /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3453 ///
3454 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu16_mask&expand=1106)
3455 #[inline]
3456 #[target_feature(enable = "avx512bw")]
3457 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask323458 pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3459     simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32()))
3460 }
3461 
3462 /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3463 ///
3464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3465 #[inline]
3466 #[target_feature(enable = "avx512bw")]
3467 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask323468 pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3469     _mm512_cmpneq_epu16_mask(a, b) & k1
3470 }
3471 
3472 /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3473 ///
3474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epu16_mask&expand=1104)
3475 #[inline]
3476 #[target_feature(enable = "avx512bw,avx512vl")]
3477 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask163478 pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3479     simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16()))
3480 }
3481 
3482 /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3483 ///
3484 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3485 #[inline]
3486 #[target_feature(enable = "avx512bw,avx512vl")]
3487 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask163488 pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3489     _mm256_cmpneq_epu16_mask(a, b) & k1
3490 }
3491 
3492 /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3493 ///
3494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epu16_mask&expand=1102)
3495 #[inline]
3496 #[target_feature(enable = "avx512bw,avx512vl")]
3497 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask83498 pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3499     simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8()))
3500 }
3501 
3502 /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3503 ///
3504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3505 #[inline]
3506 #[target_feature(enable = "avx512bw,avx512vl")]
3507 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask83508 pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3509     _mm_cmpneq_epu16_mask(a, b) & k1
3510 }
3511 
3512 /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3513 ///
3514 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu8_mask&expand=1124)
3515 #[inline]
3516 #[target_feature(enable = "avx512bw")]
3517 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask643518 pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3519     simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64()))
3520 }
3521 
3522 /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3523 ///
3524 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3525 #[inline]
3526 #[target_feature(enable = "avx512bw")]
3527 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask643528 pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3529     _mm512_cmpneq_epu8_mask(a, b) & k1
3530 }
3531 
3532 /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3533 ///
3534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epu8_mask&expand=1122)
3535 #[inline]
3536 #[target_feature(enable = "avx512bw,avx512vl")]
3537 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask323538 pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3539     simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32()))
3540 }
3541 
3542 /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3543 ///
3544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3545 #[inline]
3546 #[target_feature(enable = "avx512bw,avx512vl")]
3547 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask323548 pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3549     _mm256_cmpneq_epu8_mask(a, b) & k1
3550 }
3551 
3552 /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3553 ///
3554 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epu8_mask&expand=1120)
3555 #[inline]
3556 #[target_feature(enable = "avx512bw,avx512vl")]
3557 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask163558 pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3559     simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16()))
3560 }
3561 
3562 /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3563 ///
3564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3565 #[inline]
3566 #[target_feature(enable = "avx512bw,avx512vl")]
3567 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask163568 pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3569     _mm_cmpneq_epu8_mask(a, b) & k1
3570 }
3571 
3572 /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3573 ///
3574 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi16_mask&expand=1082)
3575 #[inline]
3576 #[target_feature(enable = "avx512bw")]
3577 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask323578 pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3579     simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32()))
3580 }
3581 
3582 /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3583 ///
3584 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3585 #[inline]
3586 #[target_feature(enable = "avx512bw")]
3587 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask323588 pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3589     _mm512_cmpneq_epi16_mask(a, b) & k1
3590 }
3591 
3592 /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3593 ///
3594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epi16_mask&expand=1080)
3595 #[inline]
3596 #[target_feature(enable = "avx512bw,avx512vl")]
3597 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask163598 pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3599     simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16()))
3600 }
3601 
3602 /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3603 ///
3604 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
3605 #[inline]
3606 #[target_feature(enable = "avx512bw,avx512vl")]
3607 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask163608 pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3609     _mm256_cmpneq_epi16_mask(a, b) & k1
3610 }
3611 
3612 /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3613 ///
3614 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epi16_mask&expand=1078)
3615 #[inline]
3616 #[target_feature(enable = "avx512bw,avx512vl")]
3617 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask83618 pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3619     simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8()))
3620 }
3621 
3622 /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3623 ///
3624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
3625 #[inline]
3626 #[target_feature(enable = "avx512bw,avx512vl")]
3627 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask83628 pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3629     _mm_cmpneq_epi16_mask(a, b) & k1
3630 }
3631 
3632 /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3633 ///
3634 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi8_mask&expand=1100)
3635 #[inline]
3636 #[target_feature(enable = "avx512bw")]
3637 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask643638 pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3639     simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64()))
3640 }
3641 
3642 /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3643 ///
3644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
3645 #[inline]
3646 #[target_feature(enable = "avx512bw")]
3647 #[cfg_attr(test, assert_instr(vpcmp))]
_mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask643648 pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3649     _mm512_cmpneq_epi8_mask(a, b) & k1
3650 }
3651 
3652 /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3653 ///
3654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epi8_mask&expand=1098)
3655 #[inline]
3656 #[target_feature(enable = "avx512bw,avx512vl")]
3657 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask323658 pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3659     simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32()))
3660 }
3661 
3662 /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3663 ///
3664 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
3665 #[inline]
3666 #[target_feature(enable = "avx512bw,avx512vl")]
3667 #[cfg_attr(test, assert_instr(vpcmp))]
_mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask323668 pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3669     _mm256_cmpneq_epi8_mask(a, b) & k1
3670 }
3671 
3672 /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3673 ///
3674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epi8_mask&expand=1096)
3675 #[inline]
3676 #[target_feature(enable = "avx512bw,avx512vl")]
3677 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask163678 pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3679     simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16()))
3680 }
3681 
3682 /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3683 ///
3684 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
3685 #[inline]
3686 #[target_feature(enable = "avx512bw,avx512vl")]
3687 #[cfg_attr(test, assert_instr(vpcmp))]
_mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask163688 pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3689     _mm_cmpneq_epi8_mask(a, b) & k1
3690 }
3691 
3692 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
3693 ///
3694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu16_mask&expand=715)
3695 #[inline]
3696 #[target_feature(enable = "avx512bw")]
3697 #[rustc_legacy_const_generics(2)]
3698 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask323699 pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
3700     static_assert_imm3!(IMM8);
3701     let a = a.as_u16x32();
3702     let b = b.as_u16x32();
3703     let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
3704     transmute(r)
3705 }
3706 
3707 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3708 ///
3709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu16_mask&expand=716)
3710 #[inline]
3711 #[target_feature(enable = "avx512bw")]
3712 #[rustc_legacy_const_generics(3)]
3713 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_mask_cmp_epu16_mask<const IMM8: i32>( k1: __mmask32, a: __m512i, b: __m512i, ) -> __mmask323714 pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
3715     k1: __mmask32,
3716     a: __m512i,
3717     b: __m512i,
3718 ) -> __mmask32 {
3719     static_assert_imm3!(IMM8);
3720     let a = a.as_u16x32();
3721     let b = b.as_u16x32();
3722     let r = vpcmpuw(a, b, IMM8, k1);
3723     transmute(r)
3724 }
3725 
3726 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3727 ///
3728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu16_mask&expand=713)
3729 #[inline]
3730 #[target_feature(enable = "avx512bw,avx512vl")]
3731 #[rustc_legacy_const_generics(2)]
3732 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask163733 pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
3734     static_assert_imm3!(IMM8);
3735     let a = a.as_u16x16();
3736     let b = b.as_u16x16();
3737     let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111);
3738     transmute(r)
3739 }
3740 
3741 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3742 ///
3743 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu16_mask&expand=714)
3744 #[inline]
3745 #[target_feature(enable = "avx512bw,avx512vl")]
3746 #[rustc_legacy_const_generics(3)]
3747 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_mask_cmp_epu16_mask<const IMM8: i32>( k1: __mmask16, a: __m256i, b: __m256i, ) -> __mmask163748 pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
3749     k1: __mmask16,
3750     a: __m256i,
3751     b: __m256i,
3752 ) -> __mmask16 {
3753     static_assert_imm3!(IMM8);
3754     let a = a.as_u16x16();
3755     let b = b.as_u16x16();
3756     let r = vpcmpuw256(a, b, IMM8, k1);
3757     transmute(r)
3758 }
3759 
3760 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3761 ///
3762 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu16_mask&expand=711)
3763 #[inline]
3764 #[target_feature(enable = "avx512bw,avx512vl")]
3765 #[rustc_legacy_const_generics(2)]
3766 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask83767 pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
3768     static_assert_imm3!(IMM8);
3769     let a = a.as_u16x8();
3770     let b = b.as_u16x8();
3771     let r = vpcmpuw128(a, b, IMM8, 0b11111111);
3772     transmute(r)
3773 }
3774 
3775 /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3776 ///
3777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu16_mask&expand=712)
3778 #[inline]
3779 #[target_feature(enable = "avx512bw,avx512vl")]
3780 #[rustc_legacy_const_generics(3)]
3781 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_mask_cmp_epu16_mask<const IMM8: i32>( k1: __mmask8, a: __m128i, b: __m128i, ) -> __mmask83782 pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
3783     k1: __mmask8,
3784     a: __m128i,
3785     b: __m128i,
3786 ) -> __mmask8 {
3787     static_assert_imm3!(IMM8);
3788     let a = a.as_u16x8();
3789     let b = b.as_u16x8();
3790     let r = vpcmpuw128(a, b, IMM8, k1);
3791     transmute(r)
3792 }
3793 
3794 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3795 ///
3796 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu8_mask&expand=733)
3797 #[inline]
3798 #[target_feature(enable = "avx512bw")]
3799 #[rustc_legacy_const_generics(2)]
3800 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask643801 pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
3802     static_assert_imm3!(IMM8);
3803     let a = a.as_u8x64();
3804     let b = b.as_u8x64();
3805     let r = vpcmpub(
3806         a,
3807         b,
3808         IMM8,
3809         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
3810     );
3811     transmute(r)
3812 }
3813 
3814 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3815 ///
3816 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu8_mask&expand=734)
3817 #[inline]
3818 #[target_feature(enable = "avx512bw")]
3819 #[rustc_legacy_const_generics(3)]
3820 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_mask_cmp_epu8_mask<const IMM8: i32>( k1: __mmask64, a: __m512i, b: __m512i, ) -> __mmask643821 pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
3822     k1: __mmask64,
3823     a: __m512i,
3824     b: __m512i,
3825 ) -> __mmask64 {
3826     static_assert_imm3!(IMM8);
3827     let a = a.as_u8x64();
3828     let b = b.as_u8x64();
3829     let r = vpcmpub(a, b, IMM8, k1);
3830     transmute(r)
3831 }
3832 
3833 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3834 ///
3835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu8_mask&expand=731)
3836 #[inline]
3837 #[target_feature(enable = "avx512bw,avx512vl")]
3838 #[rustc_legacy_const_generics(2)]
3839 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask323840 pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
3841     static_assert_imm3!(IMM8);
3842     let a = a.as_u8x32();
3843     let b = b.as_u8x32();
3844     let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
3845     transmute(r)
3846 }
3847 
3848 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3849 ///
3850 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu8_mask&expand=732)
3851 #[inline]
3852 #[target_feature(enable = "avx512bw,avx512vl")]
3853 #[rustc_legacy_const_generics(3)]
3854 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_mask_cmp_epu8_mask<const IMM8: i32>( k1: __mmask32, a: __m256i, b: __m256i, ) -> __mmask323855 pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
3856     k1: __mmask32,
3857     a: __m256i,
3858     b: __m256i,
3859 ) -> __mmask32 {
3860     static_assert_imm3!(IMM8);
3861     let a = a.as_u8x32();
3862     let b = b.as_u8x32();
3863     let r = vpcmpub256(a, b, IMM8, k1);
3864     transmute(r)
3865 }
3866 
3867 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3868 ///
3869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu8_mask&expand=729)
3870 #[inline]
3871 #[target_feature(enable = "avx512bw,avx512vl")]
3872 #[rustc_legacy_const_generics(2)]
3873 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask163874 pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
3875     static_assert_imm3!(IMM8);
3876     let a = a.as_u8x16();
3877     let b = b.as_u8x16();
3878     let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111);
3879     transmute(r)
3880 }
3881 
3882 /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3883 ///
3884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu8_mask&expand=730)
3885 #[inline]
3886 #[target_feature(enable = "avx512bw,avx512vl")]
3887 #[rustc_legacy_const_generics(3)]
3888 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_mask_cmp_epu8_mask<const IMM8: i32>( k1: __mmask16, a: __m128i, b: __m128i, ) -> __mmask163889 pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
3890     k1: __mmask16,
3891     a: __m128i,
3892     b: __m128i,
3893 ) -> __mmask16 {
3894     static_assert_imm3!(IMM8);
3895     let a = a.as_u8x16();
3896     let b = b.as_u8x16();
3897     let r = vpcmpub128(a, b, IMM8, k1);
3898     transmute(r)
3899 }
3900 
3901 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3902 ///
3903 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi16_mask&expand=691)
3904 #[inline]
3905 #[target_feature(enable = "avx512bw")]
3906 #[rustc_legacy_const_generics(2)]
3907 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask323908 pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
3909     static_assert_imm3!(IMM8);
3910     let a = a.as_i16x32();
3911     let b = b.as_i16x32();
3912     let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
3913     transmute(r)
3914 }
3915 
3916 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3917 ///
3918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi16_mask&expand=692)
3919 #[inline]
3920 #[target_feature(enable = "avx512bw")]
3921 #[rustc_legacy_const_generics(3)]
3922 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_mask_cmp_epi16_mask<const IMM8: i32>( k1: __mmask32, a: __m512i, b: __m512i, ) -> __mmask323923 pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
3924     k1: __mmask32,
3925     a: __m512i,
3926     b: __m512i,
3927 ) -> __mmask32 {
3928     static_assert_imm3!(IMM8);
3929     let a = a.as_i16x32();
3930     let b = b.as_i16x32();
3931     let r = vpcmpw(a, b, IMM8, k1);
3932     transmute(r)
3933 }
3934 
3935 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3936 ///
3937 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi16_mask&expand=689)
3938 #[inline]
3939 #[target_feature(enable = "avx512bw,avx512vl")]
3940 #[rustc_legacy_const_generics(2)]
3941 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask163942 pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
3943     static_assert_imm3!(IMM8);
3944     let a = a.as_i16x16();
3945     let b = b.as_i16x16();
3946     let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111);
3947     transmute(r)
3948 }
3949 
3950 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3951 ///
3952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi16_mask&expand=690)
3953 #[inline]
3954 #[target_feature(enable = "avx512bw,avx512vl")]
3955 #[rustc_legacy_const_generics(3)]
3956 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_mask_cmp_epi16_mask<const IMM8: i32>( k1: __mmask16, a: __m256i, b: __m256i, ) -> __mmask163957 pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
3958     k1: __mmask16,
3959     a: __m256i,
3960     b: __m256i,
3961 ) -> __mmask16 {
3962     static_assert_imm3!(IMM8);
3963     let a = a.as_i16x16();
3964     let b = b.as_i16x16();
3965     let r = vpcmpw256(a, b, IMM8, k1);
3966     transmute(r)
3967 }
3968 
3969 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3970 ///
3971 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi16_mask&expand=687)
3972 #[inline]
3973 #[target_feature(enable = "avx512bw,avx512vl")]
3974 #[rustc_legacy_const_generics(2)]
3975 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask83976 pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
3977     static_assert_imm3!(IMM8);
3978     let a = a.as_i16x8();
3979     let b = b.as_i16x8();
3980     let r = vpcmpw128(a, b, IMM8, 0b11111111);
3981     transmute(r)
3982 }
3983 
3984 /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3985 ///
3986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi16_mask&expand=688)
3987 #[inline]
3988 #[target_feature(enable = "avx512bw,avx512vl")]
3989 #[rustc_legacy_const_generics(3)]
3990 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_mask_cmp_epi16_mask<const IMM8: i32>( k1: __mmask8, a: __m128i, b: __m128i, ) -> __mmask83991 pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
3992     k1: __mmask8,
3993     a: __m128i,
3994     b: __m128i,
3995 ) -> __mmask8 {
3996     static_assert_imm3!(IMM8);
3997     let a = a.as_i16x8();
3998     let b = b.as_i16x8();
3999     let r = vpcmpw128(a, b, IMM8, k1);
4000     transmute(r)
4001 }
4002 
4003 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4004 ///
4005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi8_mask&expand=709)
4006 #[inline]
4007 #[target_feature(enable = "avx512bw")]
4008 #[rustc_legacy_const_generics(2)]
4009 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask644010 pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4011     static_assert_imm3!(IMM8);
4012     let a = a.as_i8x64();
4013     let b = b.as_i8x64();
4014     let r = vpcmpb(
4015         a,
4016         b,
4017         IMM8,
4018         0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
4019     );
4020     transmute(r)
4021 }
4022 
4023 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4024 ///
4025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi8_mask&expand=710)
4026 #[inline]
4027 #[target_feature(enable = "avx512bw")]
4028 #[rustc_legacy_const_generics(3)]
4029 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm512_mask_cmp_epi8_mask<const IMM8: i32>( k1: __mmask64, a: __m512i, b: __m512i, ) -> __mmask644030 pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
4031     k1: __mmask64,
4032     a: __m512i,
4033     b: __m512i,
4034 ) -> __mmask64 {
4035     static_assert_imm3!(IMM8);
4036     let a = a.as_i8x64();
4037     let b = b.as_i8x64();
4038     let r = vpcmpb(a, b, IMM8, k1);
4039     transmute(r)
4040 }
4041 
4042 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4043 ///
4044 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi8_mask&expand=707)
4045 #[inline]
4046 #[target_feature(enable = "avx512bw,avx512vl")]
4047 #[rustc_legacy_const_generics(2)]
4048 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask324049 pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4050     static_assert_imm3!(IMM8);
4051     let a = a.as_i8x32();
4052     let b = b.as_i8x32();
4053     let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
4054     transmute(r)
4055 }
4056 
4057 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4058 ///
4059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi8_mask&expand=708)
4060 #[inline]
4061 #[target_feature(enable = "avx512bw,avx512vl")]
4062 #[rustc_legacy_const_generics(3)]
4063 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm256_mask_cmp_epi8_mask<const IMM8: i32>( k1: __mmask32, a: __m256i, b: __m256i, ) -> __mmask324064 pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
4065     k1: __mmask32,
4066     a: __m256i,
4067     b: __m256i,
4068 ) -> __mmask32 {
4069     static_assert_imm3!(IMM8);
4070     let a = a.as_i8x32();
4071     let b = b.as_i8x32();
4072     let r = vpcmpb256(a, b, IMM8, k1);
4073     transmute(r)
4074 }
4075 
4076 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4077 ///
4078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi8_mask&expand=705)
4079 #[inline]
4080 #[target_feature(enable = "avx512bw,avx512vl")]
4081 #[rustc_legacy_const_generics(2)]
4082 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask164083 pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4084     static_assert_imm3!(IMM8);
4085     let a = a.as_i8x16();
4086     let b = b.as_i8x16();
4087     let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111);
4088     transmute(r)
4089 }
4090 
4091 /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4092 ///
4093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi8_mask&expand=706)
4094 #[inline]
4095 #[target_feature(enable = "avx512bw,avx512vl")]
4096 #[rustc_legacy_const_generics(3)]
4097 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
_mm_mask_cmp_epi8_mask<const IMM8: i32>( k1: __mmask16, a: __m128i, b: __m128i, ) -> __mmask164098 pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
4099     k1: __mmask16,
4100     a: __m128i,
4101     b: __m128i,
4102 ) -> __mmask16 {
4103     static_assert_imm3!(IMM8);
4104     let a = a.as_i8x16();
4105     let b = b.as_i8x16();
4106     let r = vpcmpb128(a, b, IMM8, k1);
4107     transmute(r)
4108 }
4109 
4110 /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4111 ///
4112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi16&expand=3368)
4113 #[inline]
4114 #[target_feature(enable = "avx512bw")]
4115 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
_mm512_loadu_epi16(mem_addr: *const i16) -> __m512i4116 pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
4117     ptr::read_unaligned(mem_addr as *const __m512i)
4118 }
4119 
4120 /// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4121 ///
4122 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_epi16&expand=3365)
4123 #[inline]
4124 #[target_feature(enable = "avx512bw,avx512vl")]
4125 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
_mm256_loadu_epi16(mem_addr: *const i16) -> __m256i4126 pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
4127     ptr::read_unaligned(mem_addr as *const __m256i)
4128 }
4129 
4130 /// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4131 ///
4132 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_epi16&expand=3362)
4133 #[inline]
4134 #[target_feature(enable = "avx512bw,avx512vl")]
4135 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
_mm_loadu_epi16(mem_addr: *const i16) -> __m128i4136 pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
4137     ptr::read_unaligned(mem_addr as *const __m128i)
4138 }
4139 
4140 /// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4141 ///
4142 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi8&expand=3395)
4143 #[inline]
4144 #[target_feature(enable = "avx512bw")]
4145 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
_mm512_loadu_epi8(mem_addr: *const i8) -> __m512i4146 pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
4147     ptr::read_unaligned(mem_addr as *const __m512i)
4148 }
4149 
4150 /// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4151 ///
4152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_epi8&expand=3392)
4153 #[inline]
4154 #[target_feature(enable = "avx512bw,avx512vl")]
4155 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
_mm256_loadu_epi8(mem_addr: *const i8) -> __m256i4156 pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
4157     ptr::read_unaligned(mem_addr as *const __m256i)
4158 }
4159 
4160 /// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4161 ///
4162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_epi8&expand=3389)
4163 #[inline]
4164 #[target_feature(enable = "avx512bw,avx512vl")]
4165 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
_mm_loadu_epi8(mem_addr: *const i8) -> __m128i4166 pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
4167     ptr::read_unaligned(mem_addr as *const __m128i)
4168 }
4169 
4170 /// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4171 ///
4172 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi16&expand=5622)
4173 #[inline]
4174 #[target_feature(enable = "avx512bw")]
4175 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
_mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i)4176 pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
4177     ptr::write_unaligned(mem_addr as *mut __m512i, a);
4178 }
4179 
4180 /// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4181 ///
4182 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_epi16&expand=5620)
4183 #[inline]
4184 #[target_feature(enable = "avx512bw,avx512vl")]
4185 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
_mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i)4186 pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
4187     ptr::write_unaligned(mem_addr as *mut __m256i, a);
4188 }
4189 
4190 /// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4191 ///
4192 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_epi16&expand=5618)
4193 #[inline]
4194 #[target_feature(enable = "avx512bw,avx512vl")]
4195 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
_mm_storeu_epi16(mem_addr: *mut i16, a: __m128i)4196 pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
4197     ptr::write_unaligned(mem_addr as *mut __m128i, a);
4198 }
4199 
4200 /// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4201 ///
4202 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi8&expand=5640)
4203 #[inline]
4204 #[target_feature(enable = "avx512bw")]
4205 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
_mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i)4206 pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
4207     ptr::write_unaligned(mem_addr as *mut __m512i, a);
4208 }
4209 
4210 /// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4211 ///
4212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_epi8&expand=5638)
4213 #[inline]
4214 #[target_feature(enable = "avx512bw,avx512vl")]
4215 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
_mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i)4216 pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
4217     ptr::write_unaligned(mem_addr as *mut __m256i, a);
4218 }
4219 
4220 /// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4221 ///
4222 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_epi8&expand=5636)
4223 #[inline]
4224 #[target_feature(enable = "avx512bw,avx512vl")]
4225 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
_mm_storeu_epi8(mem_addr: *mut i8, a: __m128i)4226 pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
4227     ptr::write_unaligned(mem_addr as *mut __m128i, a);
4228 }
4229 
4230 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
4231 ///
4232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_madd_epi16&expand=3511)
4233 #[inline]
4234 #[target_feature(enable = "avx512bw")]
4235 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i4236 pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
4237     transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32()))
4238 }
4239 
4240 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4241 ///
4242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_madd_epi16&expand=3512)
4243 #[inline]
4244 #[target_feature(enable = "avx512bw")]
4245 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm512_mask_madd_epi16( src: __m512i, k: __mmask16, a: __m512i, b: __m512i, ) -> __m512i4246 pub unsafe fn _mm512_mask_madd_epi16(
4247     src: __m512i,
4248     k: __mmask16,
4249     a: __m512i,
4250     b: __m512i,
4251 ) -> __m512i {
4252     let madd = _mm512_madd_epi16(a, b).as_i32x16();
4253     transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
4254 }
4255 
4256 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4257 ///
4258 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_madd_epi16&expand=3513)
4259 #[inline]
4260 #[target_feature(enable = "avx512bw")]
4261 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i4262 pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
4263     let madd = _mm512_madd_epi16(a, b).as_i32x16();
4264     let zero = _mm512_setzero_si512().as_i32x16();
4265     transmute(simd_select_bitmask(k, madd, zero))
4266 }
4267 
4268 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4269 ///
4270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_madd_epi16&expand=3509)
4271 #[inline]
4272 #[target_feature(enable = "avx512bw,avx512vl")]
4273 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i4274 pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4275     let madd = _mm256_madd_epi16(a, b).as_i32x8();
4276     transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
4277 }
4278 
4279 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280 ///
4281 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_madd_epi16&expand=3510)
4282 #[inline]
4283 #[target_feature(enable = "avx512bw,avx512vl")]
4284 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i4285 pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4286     let madd = _mm256_madd_epi16(a, b).as_i32x8();
4287     let zero = _mm256_setzero_si256().as_i32x8();
4288     transmute(simd_select_bitmask(k, madd, zero))
4289 }
4290 
4291 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4292 ///
4293 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_madd_epi16&expand=3506)
4294 #[inline]
4295 #[target_feature(enable = "avx512bw,avx512vl")]
4296 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i4297 pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4298     let madd = _mm_madd_epi16(a, b).as_i32x4();
4299     transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
4300 }
4301 
4302 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4303 ///
4304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_madd_epi16&expand=3507)
4305 #[inline]
4306 #[target_feature(enable = "avx512bw,avx512vl")]
4307 #[cfg_attr(test, assert_instr(vpmaddwd))]
_mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i4308 pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4309     let madd = _mm_madd_epi16(a, b).as_i32x4();
4310     let zero = _mm_setzero_si128().as_i32x4();
4311     transmute(simd_select_bitmask(k, madd, zero))
4312 }
4313 
4314 /// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
4315 ///
4316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maddubs_epi16&expand=3539)
4317 #[inline]
4318 #[target_feature(enable = "avx512bw")]
4319 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i4320 pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
4321     transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64()))
4322 }
4323 
4324 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4325 ///
4326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_maddubs_epi16&expand=3540)
4327 #[inline]
4328 #[target_feature(enable = "avx512bw")]
4329 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm512_mask_maddubs_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i4330 pub unsafe fn _mm512_mask_maddubs_epi16(
4331     src: __m512i,
4332     k: __mmask32,
4333     a: __m512i,
4334     b: __m512i,
4335 ) -> __m512i {
4336     let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
4337     transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
4338 }
4339 
4340 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4341 ///
4342 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_maddubs_epi16&expand=3541)
4343 #[inline]
4344 #[target_feature(enable = "avx512bw")]
4345 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i4346 pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4347     let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
4348     let zero = _mm512_setzero_si512().as_i16x32();
4349     transmute(simd_select_bitmask(k, madd, zero))
4350 }
4351 
4352 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4353 ///
4354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_maddubs_epi16&expand=3537)
4355 #[inline]
4356 #[target_feature(enable = "avx512bw,avx512vl")]
4357 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm256_mask_maddubs_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i4358 pub unsafe fn _mm256_mask_maddubs_epi16(
4359     src: __m256i,
4360     k: __mmask16,
4361     a: __m256i,
4362     b: __m256i,
4363 ) -> __m256i {
4364     let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
4365     transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
4366 }
4367 
4368 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4369 ///
4370 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_maddubs_epi16&expand=3538)
4371 #[inline]
4372 #[target_feature(enable = "avx512bw,avx512vl")]
4373 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i4374 pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4375     let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
4376     let zero = _mm256_setzero_si256().as_i16x16();
4377     transmute(simd_select_bitmask(k, madd, zero))
4378 }
4379 
4380 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4381 ///
4382 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_maddubs_epi16&expand=3534)
4383 #[inline]
4384 #[target_feature(enable = "avx512bw,avx512vl")]
4385 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i4386 pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4387     let madd = _mm_maddubs_epi16(a, b).as_i16x8();
4388     transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
4389 }
4390 
4391 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4392 ///
4393 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_maddubs_epi16&expand=3535)
4394 #[inline]
4395 #[target_feature(enable = "avx512bw,avx512vl")]
4396 #[cfg_attr(test, assert_instr(vpmaddubsw))]
_mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i4397 pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4398     let madd = _mm_maddubs_epi16(a, b).as_i16x8();
4399     let zero = _mm_setzero_si128().as_i16x8();
4400     transmute(simd_select_bitmask(k, madd, zero))
4401 }
4402 
4403 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
4404 ///
4405 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi32&expand=4091)
4406 #[inline]
4407 #[target_feature(enable = "avx512bw")]
4408 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i4409 pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
4410     transmute(vpackssdw(a.as_i32x16(), b.as_i32x16()))
4411 }
4412 
4413 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4414 ///
4415 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi32&expand=4089)
4416 #[inline]
4417 #[target_feature(enable = "avx512bw")]
4418 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm512_mask_packs_epi32( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i4419 pub unsafe fn _mm512_mask_packs_epi32(
4420     src: __m512i,
4421     k: __mmask32,
4422     a: __m512i,
4423     b: __m512i,
4424 ) -> __m512i {
4425     let pack = _mm512_packs_epi32(a, b).as_i16x32();
4426     transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
4427 }
4428 
4429 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4430 ///
4431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi32&expand=4090)
4432 #[inline]
4433 #[target_feature(enable = "avx512bw")]
4434 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i4435 pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4436     let pack = _mm512_packs_epi32(a, b).as_i16x32();
4437     let zero = _mm512_setzero_si512().as_i16x32();
4438     transmute(simd_select_bitmask(k, pack, zero))
4439 }
4440 
4441 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4442 ///
4443 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packs_epi32&expand=4086)
4444 #[inline]
4445 #[target_feature(enable = "avx512bw,avx512vl")]
4446 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm256_mask_packs_epi32( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i4447 pub unsafe fn _mm256_mask_packs_epi32(
4448     src: __m256i,
4449     k: __mmask16,
4450     a: __m256i,
4451     b: __m256i,
4452 ) -> __m256i {
4453     let pack = _mm256_packs_epi32(a, b).as_i16x16();
4454     transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
4455 }
4456 
4457 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4458 ///
4459 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packs_epi32&expand=4087)
4460 #[inline]
4461 #[target_feature(enable = "avx512bw,avx512vl")]
4462 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i4463 pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4464     let pack = _mm256_packs_epi32(a, b).as_i16x16();
4465     let zero = _mm256_setzero_si256().as_i16x16();
4466     transmute(simd_select_bitmask(k, pack, zero))
4467 }
4468 
4469 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4470 ///
4471 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packs_epi32&expand=4083)
4472 #[inline]
4473 #[target_feature(enable = "avx512bw,avx512vl")]
4474 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i4475 pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4476     let pack = _mm_packs_epi32(a, b).as_i16x8();
4477     transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
4478 }
4479 
4480 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4481 ///
4482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packs_epi32&expand=4084)
4483 #[inline]
4484 #[target_feature(enable = "avx512bw,avx512vl")]
4485 #[cfg_attr(test, assert_instr(vpackssdw))]
_mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i4486 pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4487     let pack = _mm_packs_epi32(a, b).as_i16x8();
4488     let zero = _mm_setzero_si128().as_i16x8();
4489     transmute(simd_select_bitmask(k, pack, zero))
4490 }
4491 
4492 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
4493 ///
4494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi16&expand=4082)
4495 #[inline]
4496 #[target_feature(enable = "avx512bw")]
4497 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i4498 pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
4499     transmute(vpacksswb(a.as_i16x32(), b.as_i16x32()))
4500 }
4501 
4502 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4503 ///
4504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi16&expand=4080)
4505 #[inline]
4506 #[target_feature(enable = "avx512bw")]
4507 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm512_mask_packs_epi16( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i4508 pub unsafe fn _mm512_mask_packs_epi16(
4509     src: __m512i,
4510     k: __mmask64,
4511     a: __m512i,
4512     b: __m512i,
4513 ) -> __m512i {
4514     let pack = _mm512_packs_epi16(a, b).as_i8x64();
4515     transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
4516 }
4517 
4518 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4519 ///
4520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi16&expand=4081)
4521 #[inline]
4522 #[target_feature(enable = "avx512bw")]
4523 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i4524 pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
4525     let pack = _mm512_packs_epi16(a, b).as_i8x64();
4526     let zero = _mm512_setzero_si512().as_i8x64();
4527     transmute(simd_select_bitmask(k, pack, zero))
4528 }
4529 
4530 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4531 ///
4532 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packs_epi16&expand=4077)
4533 #[inline]
4534 #[target_feature(enable = "avx512bw,avx512vl")]
4535 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm256_mask_packs_epi16( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i4536 pub unsafe fn _mm256_mask_packs_epi16(
4537     src: __m256i,
4538     k: __mmask32,
4539     a: __m256i,
4540     b: __m256i,
4541 ) -> __m256i {
4542     let pack = _mm256_packs_epi16(a, b).as_i8x32();
4543     transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
4544 }
4545 
4546 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4547 ///
4548 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm256_maskz_packs_epi16&expand=4078)
4549 #[inline]
4550 #[target_feature(enable = "avx512bw,avx512vl")]
4551 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i4552 pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
4553     let pack = _mm256_packs_epi16(a, b).as_i8x32();
4554     let zero = _mm256_setzero_si256().as_i8x32();
4555     transmute(simd_select_bitmask(k, pack, zero))
4556 }
4557 
4558 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4559 ///
4560 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packs_epi16&expand=4074)
4561 #[inline]
4562 #[target_feature(enable = "avx512bw,avx512vl")]
4563 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i4564 pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4565     let pack = _mm_packs_epi16(a, b).as_i8x16();
4566     transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
4567 }
4568 
4569 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4570 ///
4571 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packs_epi16&expand=4075)
4572 #[inline]
4573 #[target_feature(enable = "avx512bw,avx512vl")]
4574 #[cfg_attr(test, assert_instr(vpacksswb))]
_mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i4575 pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4576     let pack = _mm_packs_epi16(a, b).as_i8x16();
4577     let zero = _mm_setzero_si128().as_i8x16();
4578     transmute(simd_select_bitmask(k, pack, zero))
4579 }
4580 
4581 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
4582 ///
4583 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi32&expand=4130)
4584 #[inline]
4585 #[target_feature(enable = "avx512bw")]
4586 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i4587 pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
4588     transmute(vpackusdw(a.as_i32x16(), b.as_i32x16()))
4589 }
4590 
4591 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4592 ///
4593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi32&expand=4128)
4594 #[inline]
4595 #[target_feature(enable = "avx512bw")]
4596 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm512_mask_packus_epi32( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i4597 pub unsafe fn _mm512_mask_packus_epi32(
4598     src: __m512i,
4599     k: __mmask32,
4600     a: __m512i,
4601     b: __m512i,
4602 ) -> __m512i {
4603     let pack = _mm512_packus_epi32(a, b).as_i16x32();
4604     transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
4605 }
4606 
4607 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4608 ///
4609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi32&expand=4129)
4610 #[inline]
4611 #[target_feature(enable = "avx512bw")]
4612 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i4613 pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4614     let pack = _mm512_packus_epi32(a, b).as_i16x32();
4615     let zero = _mm512_setzero_si512().as_i16x32();
4616     transmute(simd_select_bitmask(k, pack, zero))
4617 }
4618 
4619 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4620 ///
4621 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packus_epi32&expand=4125)
4622 #[inline]
4623 #[target_feature(enable = "avx512bw,avx512vl")]
4624 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm256_mask_packus_epi32( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i4625 pub unsafe fn _mm256_mask_packus_epi32(
4626     src: __m256i,
4627     k: __mmask16,
4628     a: __m256i,
4629     b: __m256i,
4630 ) -> __m256i {
4631     let pack = _mm256_packus_epi32(a, b).as_i16x16();
4632     transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
4633 }
4634 
4635 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4636 ///
4637 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packus_epi32&expand=4126)
4638 #[inline]
4639 #[target_feature(enable = "avx512bw,avx512vl")]
4640 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i4641 pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4642     let pack = _mm256_packus_epi32(a, b).as_i16x16();
4643     let zero = _mm256_setzero_si256().as_i16x16();
4644     transmute(simd_select_bitmask(k, pack, zero))
4645 }
4646 
4647 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4648 ///
4649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packus_epi32&expand=4122)
4650 #[inline]
4651 #[target_feature(enable = "avx512bw,avx512vl")]
4652 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i4653 pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4654     let pack = _mm_packus_epi32(a, b).as_i16x8();
4655     transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
4656 }
4657 
4658 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4659 ///
4660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packus_epi32&expand=4123)
4661 #[inline]
4662 #[target_feature(enable = "avx512bw,avx512vl")]
4663 #[cfg_attr(test, assert_instr(vpackusdw))]
_mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i4664 pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4665     let pack = _mm_packus_epi32(a, b).as_i16x8();
4666     let zero = _mm_setzero_si128().as_i16x8();
4667     transmute(simd_select_bitmask(k, pack, zero))
4668 }
4669 
4670 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
4671 ///
4672 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi16&expand=4121)
4673 #[inline]
4674 #[target_feature(enable = "avx512bw")]
4675 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i4676 pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
4677     transmute(vpackuswb(a.as_i16x32(), b.as_i16x32()))
4678 }
4679 
4680 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4681 ///
4682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi16&expand=4119)
4683 #[inline]
4684 #[target_feature(enable = "avx512bw")]
4685 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm512_mask_packus_epi16( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i4686 pub unsafe fn _mm512_mask_packus_epi16(
4687     src: __m512i,
4688     k: __mmask64,
4689     a: __m512i,
4690     b: __m512i,
4691 ) -> __m512i {
4692     let pack = _mm512_packus_epi16(a, b).as_i8x64();
4693     transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
4694 }
4695 
4696 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4697 ///
4698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi16&expand=4120)
4699 #[inline]
4700 #[target_feature(enable = "avx512bw")]
4701 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i4702 pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
4703     let pack = _mm512_packus_epi16(a, b).as_i8x64();
4704     let zero = _mm512_setzero_si512().as_i8x64();
4705     transmute(simd_select_bitmask(k, pack, zero))
4706 }
4707 
4708 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4709 ///
4710 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packus_epi16&expand=4116)
4711 #[inline]
4712 #[target_feature(enable = "avx512bw,avx512vl")]
4713 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm256_mask_packus_epi16( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i4714 pub unsafe fn _mm256_mask_packus_epi16(
4715     src: __m256i,
4716     k: __mmask32,
4717     a: __m256i,
4718     b: __m256i,
4719 ) -> __m256i {
4720     let pack = _mm256_packus_epi16(a, b).as_i8x32();
4721     transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
4722 }
4723 
4724 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4725 ///
4726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packus_epi16&expand=4117)
4727 #[inline]
4728 #[target_feature(enable = "avx512bw,avx512vl")]
4729 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i4730 pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
4731     let pack = _mm256_packus_epi16(a, b).as_i8x32();
4732     let zero = _mm256_setzero_si256().as_i8x32();
4733     transmute(simd_select_bitmask(k, pack, zero))
4734 }
4735 
4736 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4737 ///
4738 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packus_epi16&expand=4113)
4739 #[inline]
4740 #[target_feature(enable = "avx512bw,avx512vl")]
4741 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i4742 pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4743     let pack = _mm_packus_epi16(a, b).as_i8x16();
4744     transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
4745 }
4746 
4747 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4748 ///
4749 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packus_epi16&expand=4114)
4750 #[inline]
4751 #[target_feature(enable = "avx512bw,avx512vl")]
4752 #[cfg_attr(test, assert_instr(vpackuswb))]
_mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i4753 pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4754     let pack = _mm_packus_epi16(a, b).as_i8x16();
4755     let zero = _mm_setzero_si128().as_i8x16();
4756     transmute(simd_select_bitmask(k, pack, zero))
4757 }
4758 
4759 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
4760 ///
4761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu16&expand=388)
4762 #[inline]
4763 #[target_feature(enable = "avx512bw")]
4764 #[cfg_attr(test, assert_instr(vpavgw))]
_mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i4765 pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
4766     transmute(vpavgw(a.as_u16x32(), b.as_u16x32()))
4767 }
4768 
4769 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4770 ///
4771 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu16&expand=389)
4772 #[inline]
4773 #[target_feature(enable = "avx512bw")]
4774 #[cfg_attr(test, assert_instr(vpavgw))]
_mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i4775 pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4776     let avg = _mm512_avg_epu16(a, b).as_u16x32();
4777     transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
4778 }
4779 
4780 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4781 ///
4782 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu16&expand=390)
4783 #[inline]
4784 #[target_feature(enable = "avx512bw")]
4785 #[cfg_attr(test, assert_instr(vpavgw))]
_mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i4786 pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4787     let avg = _mm512_avg_epu16(a, b).as_u16x32();
4788     let zero = _mm512_setzero_si512().as_u16x32();
4789     transmute(simd_select_bitmask(k, avg, zero))
4790 }
4791 
4792 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4793 ///
4794 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_avg_epu16&expand=386)
4795 #[inline]
4796 #[target_feature(enable = "avx512bw,avx512vl")]
4797 #[cfg_attr(test, assert_instr(vpavgw))]
_mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i4798 pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4799     let avg = _mm256_avg_epu16(a, b).as_u16x16();
4800     transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
4801 }
4802 
4803 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4804 ///
4805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_avg_epu16&expand=387)
4806 #[inline]
4807 #[target_feature(enable = "avx512bw,avx512vl")]
4808 #[cfg_attr(test, assert_instr(vpavgw))]
_mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i4809 pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4810     let avg = _mm256_avg_epu16(a, b).as_u16x16();
4811     let zero = _mm256_setzero_si256().as_u16x16();
4812     transmute(simd_select_bitmask(k, avg, zero))
4813 }
4814 
4815 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4816 ///
4817 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_avg_epu16&expand=383)
4818 #[inline]
4819 #[target_feature(enable = "avx512bw,avx512vl")]
4820 #[cfg_attr(test, assert_instr(vpavgw))]
_mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i4821 pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4822     let avg = _mm_avg_epu16(a, b).as_u16x8();
4823     transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
4824 }
4825 
4826 /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4827 ///
4828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_avg_epu16&expand=384)
4829 #[inline]
4830 #[target_feature(enable = "avx512bw,avx512vl")]
4831 #[cfg_attr(test, assert_instr(vpavgw))]
_mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i4832 pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4833     let avg = _mm_avg_epu16(a, b).as_u16x8();
4834     let zero = _mm_setzero_si128().as_u16x8();
4835     transmute(simd_select_bitmask(k, avg, zero))
4836 }
4837 
4838 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
4839 ///
4840 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu8&expand=397)
4841 #[inline]
4842 #[target_feature(enable = "avx512bw")]
4843 #[cfg_attr(test, assert_instr(vpavgb))]
_mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i4844 pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
4845     transmute(vpavgb(a.as_u8x64(), b.as_u8x64()))
4846 }
4847 
4848 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4849 ///
4850 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu8&expand=398)
4851 #[inline]
4852 #[target_feature(enable = "avx512bw")]
4853 #[cfg_attr(test, assert_instr(vpavgb))]
_mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i4854 pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
4855     let avg = _mm512_avg_epu8(a, b).as_u8x64();
4856     transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
4857 }
4858 
4859 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4860 ///
4861 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu8&expand=399)
4862 #[inline]
4863 #[target_feature(enable = "avx512bw")]
4864 #[cfg_attr(test, assert_instr(vpavgb))]
_mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i4865 pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
4866     let avg = _mm512_avg_epu8(a, b).as_u8x64();
4867     let zero = _mm512_setzero_si512().as_u8x64();
4868     transmute(simd_select_bitmask(k, avg, zero))
4869 }
4870 
4871 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4872 ///
4873 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_avg_epu8&expand=395)
4874 #[inline]
4875 #[target_feature(enable = "avx512bw,avx512vl")]
4876 #[cfg_attr(test, assert_instr(vpavgb))]
_mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i4877 pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
4878     let avg = _mm256_avg_epu8(a, b).as_u8x32();
4879     transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
4880 }
4881 
4882 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4883 ///
4884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_avg_epu8&expand=396)
4885 #[inline]
4886 #[target_feature(enable = "avx512bw,avx512vl")]
4887 #[cfg_attr(test, assert_instr(vpavgb))]
_mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i4888 pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
4889     let avg = _mm256_avg_epu8(a, b).as_u8x32();
4890     let zero = _mm256_setzero_si256().as_u8x32();
4891     transmute(simd_select_bitmask(k, avg, zero))
4892 }
4893 
4894 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4895 ///
4896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_avg_epu8&expand=392)
4897 #[inline]
4898 #[target_feature(enable = "avx512bw,avx512vl")]
4899 #[cfg_attr(test, assert_instr(vpavgb))]
_mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i4900 pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4901     let avg = _mm_avg_epu8(a, b).as_u8x16();
4902     transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
4903 }
4904 
4905 /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4906 ///
4907 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_avg_epu8&expand=393)
4908 #[inline]
4909 #[target_feature(enable = "avx512bw,avx512vl")]
4910 #[cfg_attr(test, assert_instr(vpavgb))]
_mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i4911 pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4912     let avg = _mm_avg_epu8(a, b).as_u8x16();
4913     let zero = _mm_setzero_si128().as_u8x16();
4914     transmute(simd_select_bitmask(k, avg, zero))
4915 }
4916 
4917 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
4918 ///
4919 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi16&expand=5271)
4920 #[inline]
4921 #[target_feature(enable = "avx512bw")]
4922 #[cfg_attr(test, assert_instr(vpsllw))]
_mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i4923 pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
4924     transmute(vpsllw(a.as_i16x32(), count.as_i16x8()))
4925 }
4926 
4927 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4928 ///
4929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi16&expand=5269)
4930 #[inline]
4931 #[target_feature(enable = "avx512bw")]
4932 #[cfg_attr(test, assert_instr(vpsllw))]
_mm512_mask_sll_epi16( src: __m512i, k: __mmask32, a: __m512i, count: __m128i, ) -> __m512i4933 pub unsafe fn _mm512_mask_sll_epi16(
4934     src: __m512i,
4935     k: __mmask32,
4936     a: __m512i,
4937     count: __m128i,
4938 ) -> __m512i {
4939     let shf = _mm512_sll_epi16(a, count).as_i16x32();
4940     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
4941 }
4942 
4943 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4944 ///
4945 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi16&expand=5270)
4946 #[inline]
4947 #[target_feature(enable = "avx512bw")]
4948 #[cfg_attr(test, assert_instr(vpsllw))]
_mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i4949 pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
4950     let shf = _mm512_sll_epi16(a, count).as_i16x32();
4951     let zero = _mm512_setzero_si512().as_i16x32();
4952     transmute(simd_select_bitmask(k, shf, zero))
4953 }
4954 
4955 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4956 ///
4957 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sll_epi16&expand=5266)
4958 #[inline]
4959 #[target_feature(enable = "avx512bw,avx512vl")]
4960 #[cfg_attr(test, assert_instr(vpsllw))]
_mm256_mask_sll_epi16( src: __m256i, k: __mmask16, a: __m256i, count: __m128i, ) -> __m256i4961 pub unsafe fn _mm256_mask_sll_epi16(
4962     src: __m256i,
4963     k: __mmask16,
4964     a: __m256i,
4965     count: __m128i,
4966 ) -> __m256i {
4967     let shf = _mm256_sll_epi16(a, count).as_i16x16();
4968     transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
4969 }
4970 
4971 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4972 ///
4973 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sll_epi16&expand=5267)
4974 #[inline]
4975 #[target_feature(enable = "avx512bw,avx512vl")]
4976 #[cfg_attr(test, assert_instr(vpsllw))]
_mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i4977 pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
4978     let shf = _mm256_sll_epi16(a, count).as_i16x16();
4979     let zero = _mm256_setzero_si256().as_i16x16();
4980     transmute(simd_select_bitmask(k, shf, zero))
4981 }
4982 
4983 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4984 ///
4985 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sll_epi16&expand=5263)
4986 #[inline]
4987 #[target_feature(enable = "avx512bw,avx512vl")]
4988 #[cfg_attr(test, assert_instr(vpsllw))]
_mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i4989 pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
4990     let shf = _mm_sll_epi16(a, count).as_i16x8();
4991     transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
4992 }
4993 
4994 /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4995 ///
4996 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sll_epi16&expand=5264)
4997 #[inline]
4998 #[target_feature(enable = "avx512bw,avx512vl")]
4999 #[cfg_attr(test, assert_instr(vpsllw))]
_mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i5000 pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5001     let shf = _mm_sll_epi16(a, count).as_i16x8();
5002     let zero = _mm_setzero_si128().as_i16x8();
5003     transmute(simd_select_bitmask(k, shf, zero))
5004 }
5005 
5006 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
5007 ///
5008 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi16&expand=5301)
5009 #[inline]
5010 #[target_feature(enable = "avx512bw")]
5011 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5012 #[rustc_legacy_const_generics(1)]
_mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i5013 pub unsafe fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
5014     static_assert_imm_u8!(IMM8);
5015     let a = a.as_i16x32();
5016     let r = vpslliw(a, IMM8);
5017     transmute(r)
5018 }
5019 
5020 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5021 ///
5022 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi16&expand=5299)
5023 #[inline]
5024 #[target_feature(enable = "avx512bw")]
5025 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5026 #[rustc_legacy_const_generics(3)]
_mm512_mask_slli_epi16<const IMM8: u32>( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i5027 pub unsafe fn _mm512_mask_slli_epi16<const IMM8: u32>(
5028     src: __m512i,
5029     k: __mmask32,
5030     a: __m512i,
5031 ) -> __m512i {
5032     static_assert_imm_u8!(IMM8);
5033     let a = a.as_i16x32();
5034     let shf = vpslliw(a, IMM8);
5035     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5036 }
5037 
5038 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5039 ///
5040 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi16&expand=5300)
5041 #[inline]
5042 #[target_feature(enable = "avx512bw")]
5043 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5044 #[rustc_legacy_const_generics(2)]
_mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i5045 pub unsafe fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
5046     static_assert_imm_u8!(IMM8);
5047     let a = a.as_i16x32();
5048     let shf = vpslliw(a, IMM8);
5049     let zero = _mm512_setzero_si512().as_i16x32();
5050     transmute(simd_select_bitmask(k, shf, zero))
5051 }
5052 
5053 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5054 ///
5055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_slli_epi16&expand=5296)
5056 #[inline]
5057 #[target_feature(enable = "avx512bw,avx512vl")]
5058 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5059 #[rustc_legacy_const_generics(3)]
_mm256_mask_slli_epi16<const IMM8: u32>( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i5060 pub unsafe fn _mm256_mask_slli_epi16<const IMM8: u32>(
5061     src: __m256i,
5062     k: __mmask16,
5063     a: __m256i,
5064 ) -> __m256i {
5065     static_assert_imm_u8!(IMM8);
5066     let imm8 = IMM8 as i32;
5067     let r = pslliw256(a.as_i16x16(), imm8);
5068     transmute(simd_select_bitmask(k, r, src.as_i16x16()))
5069 }
5070 
5071 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5072 ///
5073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_slli_epi16&expand=5297)
5074 #[inline]
5075 #[target_feature(enable = "avx512bw,avx512vl")]
5076 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5077 #[rustc_legacy_const_generics(2)]
_mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i5078 pub unsafe fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
5079     static_assert_imm_u8!(IMM8);
5080     let imm8 = IMM8 as i32;
5081     let r = pslliw256(a.as_i16x16(), imm8);
5082     let zero = _mm256_setzero_si256().as_i16x16();
5083     transmute(simd_select_bitmask(k, r, zero))
5084 }
5085 
5086 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5087 ///
5088 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_slli_epi16&expand=5293)
5089 #[inline]
5090 #[target_feature(enable = "avx512bw,avx512vl")]
5091 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5092 #[rustc_legacy_const_generics(3)]
_mm_mask_slli_epi16<const IMM8: u32>( src: __m128i, k: __mmask8, a: __m128i, ) -> __m128i5093 pub unsafe fn _mm_mask_slli_epi16<const IMM8: u32>(
5094     src: __m128i,
5095     k: __mmask8,
5096     a: __m128i,
5097 ) -> __m128i {
5098     static_assert_imm_u8!(IMM8);
5099     let imm8 = IMM8 as i32;
5100     let r = pslliw128(a.as_i16x8(), imm8);
5101     transmute(simd_select_bitmask(k, r, src.as_i16x8()))
5102 }
5103 
5104 /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5105 ///
5106 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_slli_epi16&expand=5294)
5107 #[inline]
5108 #[target_feature(enable = "avx512bw,avx512vl")]
5109 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5110 #[rustc_legacy_const_generics(2)]
_mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i5111 pub unsafe fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
5112     static_assert_imm_u8!(IMM8);
5113     let imm8 = IMM8 as i32;
5114     let r = pslliw128(a.as_i16x8(), imm8);
5115     let zero = _mm_setzero_si128().as_i16x8();
5116     transmute(simd_select_bitmask(k, r, zero))
5117 }
5118 
5119 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5120 ///
5121 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi16&expand=5333)
5122 #[inline]
5123 #[target_feature(enable = "avx512bw")]
5124 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i5125 pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
5126     transmute(vpsllvw(a.as_i16x32(), count.as_i16x32()))
5127 }
5128 
5129 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5130 ///
5131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi16&expand=5331)
5132 #[inline]
5133 #[target_feature(enable = "avx512bw")]
5134 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm512_mask_sllv_epi16( src: __m512i, k: __mmask32, a: __m512i, count: __m512i, ) -> __m512i5135 pub unsafe fn _mm512_mask_sllv_epi16(
5136     src: __m512i,
5137     k: __mmask32,
5138     a: __m512i,
5139     count: __m512i,
5140 ) -> __m512i {
5141     let shf = _mm512_sllv_epi16(a, count).as_i16x32();
5142     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5143 }
5144 
5145 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5146 ///
5147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi16&expand=5332)
5148 #[inline]
5149 #[target_feature(enable = "avx512bw")]
5150 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i5151 pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
5152     let shf = _mm512_sllv_epi16(a, count).as_i16x32();
5153     let zero = _mm512_setzero_si512().as_i16x32();
5154     transmute(simd_select_bitmask(k, shf, zero))
5155 }
5156 
5157 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5158 ///
5159 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi16&expand=5330)
5160 #[inline]
5161 #[target_feature(enable = "avx512bw,avx512vl")]
5162 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i5163 pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
5164     transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16()))
5165 }
5166 
5167 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5168 ///
5169 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sllv_epi16&expand=5328)
5170 #[inline]
5171 #[target_feature(enable = "avx512bw,avx512vl")]
5172 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm256_mask_sllv_epi16( src: __m256i, k: __mmask16, a: __m256i, count: __m256i, ) -> __m256i5173 pub unsafe fn _mm256_mask_sllv_epi16(
5174     src: __m256i,
5175     k: __mmask16,
5176     a: __m256i,
5177     count: __m256i,
5178 ) -> __m256i {
5179     let shf = _mm256_sllv_epi16(a, count).as_i16x16();
5180     transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5181 }
5182 
5183 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5184 ///
5185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sllv_epi16&expand=5329)
5186 #[inline]
5187 #[target_feature(enable = "avx512bw,avx512vl")]
5188 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i5189 pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
5190     let shf = _mm256_sllv_epi16(a, count).as_i16x16();
5191     let zero = _mm256_setzero_si256().as_i16x16();
5192     transmute(simd_select_bitmask(k, shf, zero))
5193 }
5194 
5195 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5196 ///
5197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi16&expand=5327)
5198 #[inline]
5199 #[target_feature(enable = "avx512bw,avx512vl")]
5200 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i5201 pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
5202     transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8()))
5203 }
5204 
5205 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5206 ///
5207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sllv_epi16&expand=5325)
5208 #[inline]
5209 #[target_feature(enable = "avx512bw,avx512vl")]
5210 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm_mask_sllv_epi16( src: __m128i, k: __mmask8, a: __m128i, count: __m128i, ) -> __m128i5211 pub unsafe fn _mm_mask_sllv_epi16(
5212     src: __m128i,
5213     k: __mmask8,
5214     a: __m128i,
5215     count: __m128i,
5216 ) -> __m128i {
5217     let shf = _mm_sllv_epi16(a, count).as_i16x8();
5218     transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5219 }
5220 
5221 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5222 ///
5223 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sllv_epi16&expand=5326)
5224 #[inline]
5225 #[target_feature(enable = "avx512bw,avx512vl")]
5226 #[cfg_attr(test, assert_instr(vpsllvw))]
_mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i5227 pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5228     let shf = _mm_sllv_epi16(a, count).as_i16x8();
5229     let zero = _mm_setzero_si128().as_i16x8();
5230     transmute(simd_select_bitmask(k, shf, zero))
5231 }
5232 
5233 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
5234 ///
5235 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi16&expand=5483)
5236 #[inline]
5237 #[target_feature(enable = "avx512bw")]
5238 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i5239 pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
5240     transmute(vpsrlw(a.as_i16x32(), count.as_i16x8()))
5241 }
5242 
5243 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5244 ///
5245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi16&expand=5481)
5246 #[inline]
5247 #[target_feature(enable = "avx512bw")]
5248 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm512_mask_srl_epi16( src: __m512i, k: __mmask32, a: __m512i, count: __m128i, ) -> __m512i5249 pub unsafe fn _mm512_mask_srl_epi16(
5250     src: __m512i,
5251     k: __mmask32,
5252     a: __m512i,
5253     count: __m128i,
5254 ) -> __m512i {
5255     let shf = _mm512_srl_epi16(a, count).as_i16x32();
5256     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5257 }
5258 
5259 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5260 ///
5261 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi16&expand=5482)
5262 #[inline]
5263 #[target_feature(enable = "avx512bw")]
5264 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i5265 pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
5266     let shf = _mm512_srl_epi16(a, count).as_i16x32();
5267     let zero = _mm512_setzero_si512().as_i16x32();
5268     transmute(simd_select_bitmask(k, shf, zero))
5269 }
5270 
5271 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5272 ///
5273 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srl_epi16&expand=5478)
5274 #[inline]
5275 #[target_feature(enable = "avx512bw,avx512vl")]
5276 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm256_mask_srl_epi16( src: __m256i, k: __mmask16, a: __m256i, count: __m128i, ) -> __m256i5277 pub unsafe fn _mm256_mask_srl_epi16(
5278     src: __m256i,
5279     k: __mmask16,
5280     a: __m256i,
5281     count: __m128i,
5282 ) -> __m256i {
5283     let shf = _mm256_srl_epi16(a, count).as_i16x16();
5284     transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5285 }
5286 
5287 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5288 ///
5289 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srl_epi16&expand=5479)
5290 #[inline]
5291 #[target_feature(enable = "avx512bw,avx512vl")]
5292 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i5293 pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
5294     let shf = _mm256_srl_epi16(a, count).as_i16x16();
5295     let zero = _mm256_setzero_si256().as_i16x16();
5296     transmute(simd_select_bitmask(k, shf, zero))
5297 }
5298 
5299 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5300 ///
5301 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srl_epi16&expand=5475)
5302 #[inline]
5303 #[target_feature(enable = "avx512bw,avx512vl")]
5304 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i5305 pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5306     let shf = _mm_srl_epi16(a, count).as_i16x8();
5307     transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5308 }
5309 
5310 /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5311 ///
5312 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srl_epi16&expand=5476)
5313 #[inline]
5314 #[target_feature(enable = "avx512bw,avx512vl")]
5315 #[cfg_attr(test, assert_instr(vpsrlw))]
_mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i5316 pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5317     let shf = _mm_srl_epi16(a, count).as_i16x8();
5318     let zero = _mm_setzero_si128().as_i16x8();
5319     transmute(simd_select_bitmask(k, shf, zero))
5320 }
5321 
5322 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
5323 ///
5324 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi16&expand=5513)
5325 #[inline]
5326 #[target_feature(enable = "avx512bw")]
5327 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5328 #[rustc_legacy_const_generics(1)]
_mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i5329 pub unsafe fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
5330     static_assert_imm_u8!(IMM8);
5331     let a = a.as_i16x32();
5332     let r = vpsrliw(a, IMM8);
5333     transmute(r)
5334 }
5335 
5336 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5337 ///
5338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi16&expand=5511)
5339 #[inline]
5340 #[target_feature(enable = "avx512bw")]
5341 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5342 #[rustc_legacy_const_generics(3)]
_mm512_mask_srli_epi16<const IMM8: u32>( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i5343 pub unsafe fn _mm512_mask_srli_epi16<const IMM8: u32>(
5344     src: __m512i,
5345     k: __mmask32,
5346     a: __m512i,
5347 ) -> __m512i {
5348     static_assert_imm_u8!(IMM8);
5349     let a = a.as_i16x32();
5350     let shf = vpsrliw(a, IMM8);
5351     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5352 }
5353 
5354 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5355 ///
5356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi16&expand=5512)
5357 #[inline]
5358 #[target_feature(enable = "avx512bw")]
5359 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5360 #[rustc_legacy_const_generics(2)]
_mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i5361 pub unsafe fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
5362     static_assert_imm8!(IMM8);
5363     //imm8 should be u32, it seems the document to verify is incorrect
5364     let a = a.as_i16x32();
5365     let shf = vpsrliw(a, IMM8 as u32);
5366     let zero = _mm512_setzero_si512().as_i16x32();
5367     transmute(simd_select_bitmask(k, shf, zero))
5368 }
5369 
5370 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5371 ///
5372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srli_epi16&expand=5508)
5373 #[inline]
5374 #[target_feature(enable = "avx512bw,avx512vl")]
5375 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5376 #[rustc_legacy_const_generics(3)]
_mm256_mask_srli_epi16<const IMM8: i32>( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i5377 pub unsafe fn _mm256_mask_srli_epi16<const IMM8: i32>(
5378     src: __m256i,
5379     k: __mmask16,
5380     a: __m256i,
5381 ) -> __m256i {
5382     static_assert_imm8!(IMM8);
5383     let shf = _mm256_srli_epi16::<IMM8>(a);
5384     transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
5385 }
5386 
5387 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5388 ///
5389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srli_epi16&expand=5509)
5390 #[inline]
5391 #[target_feature(enable = "avx512bw,avx512vl")]
5392 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5393 #[rustc_legacy_const_generics(2)]
_mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i5394 pub unsafe fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
5395     static_assert_imm8!(IMM8);
5396     let shf = _mm256_srli_epi16::<IMM8>(a);
5397     let zero = _mm256_setzero_si256().as_i16x16();
5398     transmute(simd_select_bitmask(k, shf.as_i16x16(), zero))
5399 }
5400 
5401 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5402 ///
5403 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srli_epi16&expand=5505)
5404 #[inline]
5405 #[target_feature(enable = "avx512bw,avx512vl")]
5406 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5407 #[rustc_legacy_const_generics(3)]
_mm_mask_srli_epi16<const IMM8: i32>( src: __m128i, k: __mmask8, a: __m128i, ) -> __m128i5408 pub unsafe fn _mm_mask_srli_epi16<const IMM8: i32>(
5409     src: __m128i,
5410     k: __mmask8,
5411     a: __m128i,
5412 ) -> __m128i {
5413     static_assert_imm8!(IMM8);
5414     let shf = _mm_srli_epi16::<IMM8>(a);
5415     transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
5416 }
5417 
5418 /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5419 ///
5420 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srli_epi16&expand=5506)
5421 #[inline]
5422 #[target_feature(enable = "avx512bw,avx512vl")]
5423 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5424 #[rustc_legacy_const_generics(2)]
_mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i5425 pub unsafe fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
5426     static_assert_imm8!(IMM8);
5427     let shf = _mm_srli_epi16::<IMM8>(a);
5428     let zero = _mm_setzero_si128().as_i16x8();
5429     transmute(simd_select_bitmask(k, shf.as_i16x8(), zero))
5430 }
5431 
5432 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5433 ///
5434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi16&expand=5545)
5435 #[inline]
5436 #[target_feature(enable = "avx512bw")]
5437 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i5438 pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
5439     transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32()))
5440 }
5441 
5442 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5443 ///
5444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi16&expand=5543)
5445 #[inline]
5446 #[target_feature(enable = "avx512bw")]
5447 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm512_mask_srlv_epi16( src: __m512i, k: __mmask32, a: __m512i, count: __m512i, ) -> __m512i5448 pub unsafe fn _mm512_mask_srlv_epi16(
5449     src: __m512i,
5450     k: __mmask32,
5451     a: __m512i,
5452     count: __m512i,
5453 ) -> __m512i {
5454     let shf = _mm512_srlv_epi16(a, count).as_i16x32();
5455     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5456 }
5457 
5458 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5459 ///
5460 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi16&expand=5544)
5461 #[inline]
5462 #[target_feature(enable = "avx512bw")]
5463 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i5464 pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
5465     let shf = _mm512_srlv_epi16(a, count).as_i16x32();
5466     let zero = _mm512_setzero_si512().as_i16x32();
5467     transmute(simd_select_bitmask(k, shf, zero))
5468 }
5469 
5470 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5471 ///
5472 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi16&expand=5542)
5473 #[inline]
5474 #[target_feature(enable = "avx512bw,avx512vl")]
5475 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i5476 pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
5477     transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16()))
5478 }
5479 
5480 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5481 ///
5482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srlv_epi16&expand=5540)
5483 #[inline]
5484 #[target_feature(enable = "avx512bw,avx512vl")]
5485 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm256_mask_srlv_epi16( src: __m256i, k: __mmask16, a: __m256i, count: __m256i, ) -> __m256i5486 pub unsafe fn _mm256_mask_srlv_epi16(
5487     src: __m256i,
5488     k: __mmask16,
5489     a: __m256i,
5490     count: __m256i,
5491 ) -> __m256i {
5492     let shf = _mm256_srlv_epi16(a, count).as_i16x16();
5493     transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5494 }
5495 
5496 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5497 ///
5498 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srlv_epi16&expand=5541)
5499 #[inline]
5500 #[target_feature(enable = "avx512bw,avx512vl")]
5501 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i5502 pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
5503     let shf = _mm256_srlv_epi16(a, count).as_i16x16();
5504     let zero = _mm256_setzero_si256().as_i16x16();
5505     transmute(simd_select_bitmask(k, shf, zero))
5506 }
5507 
5508 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5509 ///
5510 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi16&expand=5539)
5511 #[inline]
5512 #[target_feature(enable = "avx512bw,avx512vl")]
5513 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i5514 pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
5515     transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8()))
5516 }
5517 
5518 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5519 ///
5520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srlv_epi16&expand=5537)
5521 #[inline]
5522 #[target_feature(enable = "avx512bw,avx512vl")]
5523 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm_mask_srlv_epi16( src: __m128i, k: __mmask8, a: __m128i, count: __m128i, ) -> __m128i5524 pub unsafe fn _mm_mask_srlv_epi16(
5525     src: __m128i,
5526     k: __mmask8,
5527     a: __m128i,
5528     count: __m128i,
5529 ) -> __m128i {
5530     let shf = _mm_srlv_epi16(a, count).as_i16x8();
5531     transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5532 }
5533 
5534 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5535 ///
5536 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srlv_epi16&expand=5538)
5537 #[inline]
5538 #[target_feature(enable = "avx512bw,avx512vl")]
5539 #[cfg_attr(test, assert_instr(vpsrlvw))]
_mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i5540 pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5541     let shf = _mm_srlv_epi16(a, count).as_i16x8();
5542     let zero = _mm_setzero_si128().as_i16x8();
5543     transmute(simd_select_bitmask(k, shf, zero))
5544 }
5545 
5546 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
5547 ///
5548 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi16&expand=5398)
5549 #[inline]
5550 #[target_feature(enable = "avx512bw")]
5551 #[cfg_attr(test, assert_instr(vpsraw))]
_mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i5552 pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
5553     transmute(vpsraw(a.as_i16x32(), count.as_i16x8()))
5554 }
5555 
5556 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5557 ///
5558 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi16&expand=5396)
5559 #[inline]
5560 #[target_feature(enable = "avx512bw")]
5561 #[cfg_attr(test, assert_instr(vpsraw))]
_mm512_mask_sra_epi16( src: __m512i, k: __mmask32, a: __m512i, count: __m128i, ) -> __m512i5562 pub unsafe fn _mm512_mask_sra_epi16(
5563     src: __m512i,
5564     k: __mmask32,
5565     a: __m512i,
5566     count: __m128i,
5567 ) -> __m512i {
5568     let shf = _mm512_sra_epi16(a, count).as_i16x32();
5569     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5570 }
5571 
5572 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5573 ///
5574 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi16&expand=5397)
5575 #[inline]
5576 #[target_feature(enable = "avx512bw")]
5577 #[cfg_attr(test, assert_instr(vpsraw))]
_mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i5578 pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
5579     let shf = _mm512_sra_epi16(a, count).as_i16x32();
5580     let zero = _mm512_setzero_si512().as_i16x32();
5581     transmute(simd_select_bitmask(k, shf, zero))
5582 }
5583 
5584 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5585 ///
5586 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sra_epi16&expand=5393)
5587 #[inline]
5588 #[target_feature(enable = "avx512bw,avx512vl")]
5589 #[cfg_attr(test, assert_instr(vpsraw))]
_mm256_mask_sra_epi16( src: __m256i, k: __mmask16, a: __m256i, count: __m128i, ) -> __m256i5590 pub unsafe fn _mm256_mask_sra_epi16(
5591     src: __m256i,
5592     k: __mmask16,
5593     a: __m256i,
5594     count: __m128i,
5595 ) -> __m256i {
5596     let shf = _mm256_sra_epi16(a, count).as_i16x16();
5597     transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5598 }
5599 
5600 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5601 ///
5602 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sra_epi16&expand=5394)
5603 #[inline]
5604 #[target_feature(enable = "avx512bw,avx512vl")]
5605 #[cfg_attr(test, assert_instr(vpsraw))]
_mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i5606 pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
5607     let shf = _mm256_sra_epi16(a, count).as_i16x16();
5608     let zero = _mm256_setzero_si256().as_i16x16();
5609     transmute(simd_select_bitmask(k, shf, zero))
5610 }
5611 
5612 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5613 ///
5614 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sra_epi16&expand=5390)
5615 #[inline]
5616 #[target_feature(enable = "avx512bw,avx512vl")]
5617 #[cfg_attr(test, assert_instr(vpsraw))]
_mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i5618 pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5619     let shf = _mm_sra_epi16(a, count).as_i16x8();
5620     transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5621 }
5622 
5623 /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5624 ///
5625 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sra_epi16&expand=5391)
5626 #[inline]
5627 #[target_feature(enable = "avx512bw,avx512vl")]
5628 #[cfg_attr(test, assert_instr(vpsraw))]
_mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i5629 pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5630     let shf = _mm_sra_epi16(a, count).as_i16x8();
5631     let zero = _mm_setzero_si128().as_i16x8();
5632     transmute(simd_select_bitmask(k, shf, zero))
5633 }
5634 
5635 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
5636 ///
5637 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi16&expand=5427)
5638 #[inline]
5639 #[target_feature(enable = "avx512bw")]
5640 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5641 #[rustc_legacy_const_generics(1)]
_mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i5642 pub unsafe fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
5643     static_assert_imm_u8!(IMM8);
5644     let a = a.as_i16x32();
5645     let r = vpsraiw(a, IMM8);
5646     transmute(r)
5647 }
5648 
5649 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5650 ///
5651 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi16&expand=5425)
5652 #[inline]
5653 #[target_feature(enable = "avx512bw")]
5654 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5655 #[rustc_legacy_const_generics(3)]
_mm512_mask_srai_epi16<const IMM8: u32>( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i5656 pub unsafe fn _mm512_mask_srai_epi16<const IMM8: u32>(
5657     src: __m512i,
5658     k: __mmask32,
5659     a: __m512i,
5660 ) -> __m512i {
5661     static_assert_imm_u8!(IMM8);
5662     let a = a.as_i16x32();
5663     let shf = vpsraiw(a, IMM8);
5664     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5665 }
5666 
5667 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5668 ///
5669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi16&expand=5426)
5670 #[inline]
5671 #[target_feature(enable = "avx512bw")]
5672 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5673 #[rustc_legacy_const_generics(2)]
_mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i5674 pub unsafe fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
5675     static_assert_imm_u8!(IMM8);
5676     let a = a.as_i16x32();
5677     let shf = vpsraiw(a, IMM8);
5678     let zero = _mm512_setzero_si512().as_i16x32();
5679     transmute(simd_select_bitmask(k, shf, zero))
5680 }
5681 
5682 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5683 ///
5684 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srai_epi16&expand=5422)
5685 #[inline]
5686 #[target_feature(enable = "avx512bw,avx512vl")]
5687 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5688 #[rustc_legacy_const_generics(3)]
_mm256_mask_srai_epi16<const IMM8: u32>( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i5689 pub unsafe fn _mm256_mask_srai_epi16<const IMM8: u32>(
5690     src: __m256i,
5691     k: __mmask16,
5692     a: __m256i,
5693 ) -> __m256i {
5694     static_assert_imm_u8!(IMM8);
5695     let imm8 = IMM8 as i32;
5696     let r = psraiw256(a.as_i16x16(), imm8);
5697     transmute(simd_select_bitmask(k, r, src.as_i16x16()))
5698 }
5699 
5700 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5701 ///
5702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srai_epi16&expand=5423)
5703 #[inline]
5704 #[target_feature(enable = "avx512bw,avx512vl")]
5705 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5706 #[rustc_legacy_const_generics(2)]
_mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i5707 pub unsafe fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
5708     static_assert_imm_u8!(IMM8);
5709     let imm8 = IMM8 as i32;
5710     let r = psraiw256(a.as_i16x16(), imm8);
5711     let zero = _mm256_setzero_si256().as_i16x16();
5712     transmute(simd_select_bitmask(k, r, zero))
5713 }
5714 
5715 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5716 ///
5717 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srai_epi16&expand=5419)
5718 #[inline]
5719 #[target_feature(enable = "avx512bw,avx512vl")]
5720 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5721 #[rustc_legacy_const_generics(3)]
_mm_mask_srai_epi16<const IMM8: u32>( src: __m128i, k: __mmask8, a: __m128i, ) -> __m128i5722 pub unsafe fn _mm_mask_srai_epi16<const IMM8: u32>(
5723     src: __m128i,
5724     k: __mmask8,
5725     a: __m128i,
5726 ) -> __m128i {
5727     static_assert_imm_u8!(IMM8);
5728     let imm8 = IMM8 as i32;
5729     let r = psraiw128(a.as_i16x8(), imm8);
5730     transmute(simd_select_bitmask(k, r, src.as_i16x8()))
5731 }
5732 
5733 /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5734 ///
5735 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srai_epi16&expand=5420)
5736 #[inline]
5737 #[target_feature(enable = "avx512bw,avx512vl")]
5738 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5739 #[rustc_legacy_const_generics(2)]
_mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i5740 pub unsafe fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
5741     static_assert_imm_u8!(IMM8);
5742     let imm8 = IMM8 as i32;
5743     let r = psraiw128(a.as_i16x8(), imm8);
5744     let zero = _mm_setzero_si128().as_i16x8();
5745     transmute(simd_select_bitmask(k, r, zero))
5746 }
5747 
5748 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
5749 ///
5750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi16&expand=5456)
5751 #[inline]
5752 #[target_feature(enable = "avx512bw")]
5753 #[cfg_attr(test, assert_instr(vpsravw))]
_mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i5754 pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
5755     transmute(vpsravw(a.as_i16x32(), count.as_i16x32()))
5756 }
5757 
5758 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5759 ///
5760 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi16&expand=5454)
5761 #[inline]
5762 #[target_feature(enable = "avx512bw")]
5763 #[cfg_attr(test, assert_instr(vpsravw))]
_mm512_mask_srav_epi16( src: __m512i, k: __mmask32, a: __m512i, count: __m512i, ) -> __m512i5764 pub unsafe fn _mm512_mask_srav_epi16(
5765     src: __m512i,
5766     k: __mmask32,
5767     a: __m512i,
5768     count: __m512i,
5769 ) -> __m512i {
5770     let shf = _mm512_srav_epi16(a, count).as_i16x32();
5771     transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5772 }
5773 
5774 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5775 ///
5776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi16&expand=5455)
5777 #[inline]
5778 #[target_feature(enable = "avx512bw")]
5779 #[cfg_attr(test, assert_instr(vpsravw))]
_mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i5780 pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
5781     let shf = _mm512_srav_epi16(a, count).as_i16x32();
5782     let zero = _mm512_setzero_si512().as_i16x32();
5783     transmute(simd_select_bitmask(k, shf, zero))
5784 }
5785 
5786 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
5787 ///
5788 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi16&expand=5453)
5789 #[inline]
5790 #[target_feature(enable = "avx512bw,avx512vl")]
5791 #[cfg_attr(test, assert_instr(vpsravw))]
_mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i5792 pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
5793     transmute(vpsravw256(a.as_i16x16(), count.as_i16x16()))
5794 }
5795 
5796 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5797 ///
5798 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srav_epi16&expand=5451)
5799 #[inline]
5800 #[target_feature(enable = "avx512bw,avx512vl")]
5801 #[cfg_attr(test, assert_instr(vpsravw))]
_mm256_mask_srav_epi16( src: __m256i, k: __mmask16, a: __m256i, count: __m256i, ) -> __m256i5802 pub unsafe fn _mm256_mask_srav_epi16(
5803     src: __m256i,
5804     k: __mmask16,
5805     a: __m256i,
5806     count: __m256i,
5807 ) -> __m256i {
5808     let shf = _mm256_srav_epi16(a, count).as_i16x16();
5809     transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5810 }
5811 
5812 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5813 ///
5814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srav_epi16&expand=5452)
5815 #[inline]
5816 #[target_feature(enable = "avx512bw,avx512vl")]
5817 #[cfg_attr(test, assert_instr(vpsravw))]
_mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i5818 pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
5819     let shf = _mm256_srav_epi16(a, count).as_i16x16();
5820     let zero = _mm256_setzero_si256().as_i16x16();
5821     transmute(simd_select_bitmask(k, shf, zero))
5822 }
5823 
5824 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
5825 ///
5826 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi16&expand=5450)
5827 #[inline]
5828 #[target_feature(enable = "avx512bw,avx512vl")]
5829 #[cfg_attr(test, assert_instr(vpsravw))]
_mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i5830 pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
5831     transmute(vpsravw128(a.as_i16x8(), count.as_i16x8()))
5832 }
5833 
5834 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5835 ///
5836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srav_epi16&expand=5448)
5837 #[inline]
5838 #[target_feature(enable = "avx512bw,avx512vl")]
5839 #[cfg_attr(test, assert_instr(vpsravw))]
_mm_mask_srav_epi16( src: __m128i, k: __mmask8, a: __m128i, count: __m128i, ) -> __m128i5840 pub unsafe fn _mm_mask_srav_epi16(
5841     src: __m128i,
5842     k: __mmask8,
5843     a: __m128i,
5844     count: __m128i,
5845 ) -> __m128i {
5846     let shf = _mm_srav_epi16(a, count).as_i16x8();
5847     transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5848 }
5849 
5850 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5851 ///
5852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srav_epi16&expand=5449)
5853 #[inline]
5854 #[target_feature(enable = "avx512bw,avx512vl")]
5855 #[cfg_attr(test, assert_instr(vpsravw))]
_mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i5856 pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5857     let shf = _mm_srav_epi16(a, count).as_i16x8();
5858     let zero = _mm_setzero_si128().as_i16x8();
5859     transmute(simd_select_bitmask(k, shf, zero))
5860 }
5861 
5862 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
5863 ///
5864 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi16&expand=4226)
5865 #[inline]
5866 #[target_feature(enable = "avx512bw")]
5867 #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
_mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i5868 pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
5869     transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32()))
5870 }
5871 
5872 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5873 ///
5874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi16&expand=4223)
5875 #[inline]
5876 #[target_feature(enable = "avx512bw")]
5877 #[cfg_attr(test, assert_instr(vpermt2w))]
_mm512_mask_permutex2var_epi16( a: __m512i, k: __mmask32, idx: __m512i, b: __m512i, ) -> __m512i5878 pub unsafe fn _mm512_mask_permutex2var_epi16(
5879     a: __m512i,
5880     k: __mmask32,
5881     idx: __m512i,
5882     b: __m512i,
5883 ) -> __m512i {
5884     let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
5885     transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
5886 }
5887 
5888 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5889 ///
5890 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi16&expand=4225)
5891 #[inline]
5892 #[target_feature(enable = "avx512bw")]
5893 #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
_mm512_maskz_permutex2var_epi16( k: __mmask32, a: __m512i, idx: __m512i, b: __m512i, ) -> __m512i5894 pub unsafe fn _mm512_maskz_permutex2var_epi16(
5895     k: __mmask32,
5896     a: __m512i,
5897     idx: __m512i,
5898     b: __m512i,
5899 ) -> __m512i {
5900     let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
5901     let zero = _mm512_setzero_si512().as_i16x32();
5902     transmute(simd_select_bitmask(k, permute, zero))
5903 }
5904 
5905 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
5906 ///
5907 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi16&expand=4224)
5908 #[inline]
5909 #[target_feature(enable = "avx512bw")]
5910 #[cfg_attr(test, assert_instr(vpermi2w))]
_mm512_mask2_permutex2var_epi16( a: __m512i, idx: __m512i, k: __mmask32, b: __m512i, ) -> __m512i5911 pub unsafe fn _mm512_mask2_permutex2var_epi16(
5912     a: __m512i,
5913     idx: __m512i,
5914     k: __mmask32,
5915     b: __m512i,
5916 ) -> __m512i {
5917     let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
5918     transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
5919 }
5920 
5921 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
5922 ///
5923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_epi16&expand=4222)
5924 #[inline]
5925 #[target_feature(enable = "avx512bw,avx512vl")]
5926 #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
_mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i5927 pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
5928     transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16()))
5929 }
5930 
5931 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5932 ///
5933 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutex2var_epi16&expand=4219)
5934 #[inline]
5935 #[target_feature(enable = "avx512bw,avx512vl")]
5936 #[cfg_attr(test, assert_instr(vpermt2w))]
_mm256_mask_permutex2var_epi16( a: __m256i, k: __mmask16, idx: __m256i, b: __m256i, ) -> __m256i5937 pub unsafe fn _mm256_mask_permutex2var_epi16(
5938     a: __m256i,
5939     k: __mmask16,
5940     idx: __m256i,
5941     b: __m256i,
5942 ) -> __m256i {
5943     let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
5944     transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
5945 }
5946 
5947 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5948 ///
5949 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutex2var_epi16&expand=4221)
5950 #[inline]
5951 #[target_feature(enable = "avx512bw,avx512vl")]
5952 #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
_mm256_maskz_permutex2var_epi16( k: __mmask16, a: __m256i, idx: __m256i, b: __m256i, ) -> __m256i5953 pub unsafe fn _mm256_maskz_permutex2var_epi16(
5954     k: __mmask16,
5955     a: __m256i,
5956     idx: __m256i,
5957     b: __m256i,
5958 ) -> __m256i {
5959     let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
5960     let zero = _mm256_setzero_si256().as_i16x16();
5961     transmute(simd_select_bitmask(k, permute, zero))
5962 }
5963 
5964 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
5965 ///
5966 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask2_permutex2var_epi16&expand=4220)
5967 #[inline]
5968 #[target_feature(enable = "avx512bw,avx512vl")]
5969 #[cfg_attr(test, assert_instr(vpermi2w))]
_mm256_mask2_permutex2var_epi16( a: __m256i, idx: __m256i, k: __mmask16, b: __m256i, ) -> __m256i5970 pub unsafe fn _mm256_mask2_permutex2var_epi16(
5971     a: __m256i,
5972     idx: __m256i,
5973     k: __mmask16,
5974     b: __m256i,
5975 ) -> __m256i {
5976     let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
5977     transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
5978 }
5979 
5980 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
5981 ///
5982 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_epi16&expand=4218)
5983 #[inline]
5984 #[target_feature(enable = "avx512bw,avx512vl")]
5985 #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
_mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i5986 pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
5987     transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8()))
5988 }
5989 
5990 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5991 ///
5992 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutex2var_epi16&expand=4215)
5993 #[inline]
5994 #[target_feature(enable = "avx512bw,avx512vl")]
5995 #[cfg_attr(test, assert_instr(vpermt2w))]
_mm_mask_permutex2var_epi16( a: __m128i, k: __mmask8, idx: __m128i, b: __m128i, ) -> __m128i5996 pub unsafe fn _mm_mask_permutex2var_epi16(
5997     a: __m128i,
5998     k: __mmask8,
5999     idx: __m128i,
6000     b: __m128i,
6001 ) -> __m128i {
6002     let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
6003     transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
6004 }
6005 
6006 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6007 ///
6008 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutex2var_epi16&expand=4217)
6009 #[inline]
6010 #[target_feature(enable = "avx512bw,avx512vl")]
6011 #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
_mm_maskz_permutex2var_epi16( k: __mmask8, a: __m128i, idx: __m128i, b: __m128i, ) -> __m128i6012 pub unsafe fn _mm_maskz_permutex2var_epi16(
6013     k: __mmask8,
6014     a: __m128i,
6015     idx: __m128i,
6016     b: __m128i,
6017 ) -> __m128i {
6018     let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
6019     let zero = _mm_setzero_si128().as_i16x8();
6020     transmute(simd_select_bitmask(k, permute, zero))
6021 }
6022 
6023 /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
6024 ///
6025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask2_permutex2var_epi16&expand=4216)
6026 #[inline]
6027 #[target_feature(enable = "avx512bw,avx512vl")]
6028 #[cfg_attr(test, assert_instr(vpermi2w))]
_mm_mask2_permutex2var_epi16( a: __m128i, idx: __m128i, k: __mmask8, b: __m128i, ) -> __m128i6029 pub unsafe fn _mm_mask2_permutex2var_epi16(
6030     a: __m128i,
6031     idx: __m128i,
6032     k: __mmask8,
6033     b: __m128i,
6034 ) -> __m128i {
6035     let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
6036     transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
6037 }
6038 
6039 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
6040 ///
6041 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi16&expand=4295)
6042 #[inline]
6043 #[target_feature(enable = "avx512bw")]
6044 #[cfg_attr(test, assert_instr(vpermw))]
_mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i6045 pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
6046     transmute(vpermw(a.as_i16x32(), idx.as_i16x32()))
6047 }
6048 
6049 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6050 ///
6051 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi16&expand=4293)
6052 #[inline]
6053 #[target_feature(enable = "avx512bw")]
6054 #[cfg_attr(test, assert_instr(vpermw))]
_mm512_mask_permutexvar_epi16( src: __m512i, k: __mmask32, idx: __m512i, a: __m512i, ) -> __m512i6055 pub unsafe fn _mm512_mask_permutexvar_epi16(
6056     src: __m512i,
6057     k: __mmask32,
6058     idx: __m512i,
6059     a: __m512i,
6060 ) -> __m512i {
6061     let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
6062     transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
6063 }
6064 
6065 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6066 ///
6067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi16&expand=4294)
6068 #[inline]
6069 #[target_feature(enable = "avx512bw")]
6070 #[cfg_attr(test, assert_instr(vpermw))]
_mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i6071 pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
6072     let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
6073     let zero = _mm512_setzero_si512().as_i16x32();
6074     transmute(simd_select_bitmask(k, permute, zero))
6075 }
6076 
6077 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
6078 ///
6079 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_epi16&expand=4292)
6080 #[inline]
6081 #[target_feature(enable = "avx512bw,avx512vl")]
6082 #[cfg_attr(test, assert_instr(vpermw))]
_mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i6083 pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
6084     transmute(vpermw256(a.as_i16x16(), idx.as_i16x16()))
6085 }
6086 
6087 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6088 ///
6089 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutexvar_epi16&expand=4290)
6090 #[inline]
6091 #[target_feature(enable = "avx512bw,avx512vl")]
6092 #[cfg_attr(test, assert_instr(vpermw))]
_mm256_mask_permutexvar_epi16( src: __m256i, k: __mmask16, idx: __m256i, a: __m256i, ) -> __m256i6093 pub unsafe fn _mm256_mask_permutexvar_epi16(
6094     src: __m256i,
6095     k: __mmask16,
6096     idx: __m256i,
6097     a: __m256i,
6098 ) -> __m256i {
6099     let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
6100     transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
6101 }
6102 
6103 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6104 ///
6105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutexvar_epi16&expand=4291)
6106 #[inline]
6107 #[target_feature(enable = "avx512bw,avx512vl")]
6108 #[cfg_attr(test, assert_instr(vpermw))]
_mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i6109 pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
6110     let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
6111     let zero = _mm256_setzero_si256().as_i16x16();
6112     transmute(simd_select_bitmask(k, permute, zero))
6113 }
6114 
6115 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
6116 ///
6117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_epi16&expand=4289)
6118 #[inline]
6119 #[target_feature(enable = "avx512bw,avx512vl")]
6120 #[cfg_attr(test, assert_instr(vpermw))]
_mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i6121 pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
6122     transmute(vpermw128(a.as_i16x8(), idx.as_i16x8()))
6123 }
6124 
6125 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6126 ///
6127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutexvar_epi16&expand=4287)
6128 #[inline]
6129 #[target_feature(enable = "avx512bw,avx512vl")]
6130 #[cfg_attr(test, assert_instr(vpermw))]
_mm_mask_permutexvar_epi16( src: __m128i, k: __mmask8, idx: __m128i, a: __m128i, ) -> __m128i6131 pub unsafe fn _mm_mask_permutexvar_epi16(
6132     src: __m128i,
6133     k: __mmask8,
6134     idx: __m128i,
6135     a: __m128i,
6136 ) -> __m128i {
6137     let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
6138     transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
6139 }
6140 
6141 /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6142 ///
6143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutexvar_epi16&expand=4288)
6144 #[inline]
6145 #[target_feature(enable = "avx512bw,avx512vl")]
6146 #[cfg_attr(test, assert_instr(vpermw))]
_mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i6147 pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
6148     let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
6149     let zero = _mm_setzero_si128().as_i16x8();
6150     transmute(simd_select_bitmask(k, permute, zero))
6151 }
6152 
6153 /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
6154 ///
6155 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi16&expand=430)
6156 #[inline]
6157 #[target_feature(enable = "avx512bw")]
6158 #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
_mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i6159 pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6160     transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32()))
6161 }
6162 
6163 /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
6164 ///
6165 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_epi16&expand=429)
6166 #[inline]
6167 #[target_feature(enable = "avx512bw,avx512vl")]
6168 #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
_mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i6169 pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6170     transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16()))
6171 }
6172 
6173 /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
6174 ///
6175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_epi16&expand=427)
6176 #[inline]
6177 #[target_feature(enable = "avx512bw,avx512vl")]
6178 #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
_mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i6179 pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6180     transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8()))
6181 }
6182 
6183 /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
6184 ///
6185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi8&expand=441)
6186 #[inline]
6187 #[target_feature(enable = "avx512bw")]
6188 #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
_mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i6189 pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6190     transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64()))
6191 }
6192 
6193 /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
6194 ///
6195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_epi8&expand=440)
6196 #[inline]
6197 #[target_feature(enable = "avx512bw,avx512vl")]
6198 #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
_mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i6199 pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6200     transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32()))
6201 }
6202 
6203 /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
6204 ///
6205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_epi8&expand=439)
6206 #[inline]
6207 #[target_feature(enable = "avx512bw,avx512vl")]
6208 #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
_mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i6209 pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6210     transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16()))
6211 }
6212 
6213 /// Broadcast the low packed 16-bit integer from a to all elements of dst.
6214 ///
6215 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastw_epi16&expand=587)
6216 #[inline]
6217 #[target_feature(enable = "avx512bw")]
6218 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm512_broadcastw_epi16(a: __m128i) -> __m512i6219 pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
6220     let a = _mm512_castsi128_si512(a).as_i16x32();
6221     let ret: i16x32 = simd_shuffle32!(
6222         a,
6223         a,
6224         [
6225             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6226             0, 0, 0,
6227         ],
6228     );
6229     transmute(ret)
6230 }
6231 
6232 /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6233 ///
6234 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastw_epi16&expand=588)
6235 #[inline]
6236 #[target_feature(enable = "avx512bw")]
6237 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i6238 pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
6239     let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
6240     transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
6241 }
6242 
6243 /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6244 ///
6245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastw_epi16&expand=589)
6246 #[inline]
6247 #[target_feature(enable = "avx512bw")]
6248 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i6249 pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
6250     let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
6251     let zero = _mm512_setzero_si512().as_i16x32();
6252     transmute(simd_select_bitmask(k, broadcast, zero))
6253 }
6254 
6255 /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6256 ///
6257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_broadcastw_epi16&expand=585)
6258 #[inline]
6259 #[target_feature(enable = "avx512bw,avx512vl")]
6260 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i6261 pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
6262     let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
6263     transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
6264 }
6265 
6266 /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6267 ///
6268 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_broadcastw_epi16&expand=586)
6269 #[inline]
6270 #[target_feature(enable = "avx512bw,avx512vl")]
6271 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i6272 pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
6273     let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
6274     let zero = _mm256_setzero_si256().as_i16x16();
6275     transmute(simd_select_bitmask(k, broadcast, zero))
6276 }
6277 
6278 /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6279 ///
6280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_broadcastw_epi16&expand=582)
6281 #[inline]
6282 #[target_feature(enable = "avx512bw,avx512vl")]
6283 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i6284 pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6285     let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
6286     transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
6287 }
6288 
6289 /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6290 ///
6291 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_broadcastw_epi16&expand=583)
6292 #[inline]
6293 #[target_feature(enable = "avx512bw,avx512vl")]
6294 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i6295 pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
6296     let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
6297     let zero = _mm_setzero_si128().as_i16x8();
6298     transmute(simd_select_bitmask(k, broadcast, zero))
6299 }
6300 
6301 /// Broadcast the low packed 8-bit integer from a to all elements of dst.
6302 ///
6303 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastb_epi8&expand=536)
6304 #[inline]
6305 #[target_feature(enable = "avx512bw")]
6306 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm512_broadcastb_epi8(a: __m128i) -> __m512i6307 pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
6308     let a = _mm512_castsi128_si512(a).as_i8x64();
6309     let ret: i8x64 = simd_shuffle64!(
6310         a,
6311         a,
6312         [
6313             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6314             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6315             0, 0, 0, 0, 0, 0,
6316         ],
6317     );
6318     transmute(ret)
6319 }
6320 
6321 /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6322 ///
6323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastb_epi8&expand=537)
6324 #[inline]
6325 #[target_feature(enable = "avx512bw")]
6326 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i6327 pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
6328     let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
6329     transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
6330 }
6331 
6332 /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6333 ///
6334 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastb_epi8&expand=538)
6335 #[inline]
6336 #[target_feature(enable = "avx512bw")]
6337 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i6338 pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
6339     let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
6340     let zero = _mm512_setzero_si512().as_i8x64();
6341     transmute(simd_select_bitmask(k, broadcast, zero))
6342 }
6343 
6344 /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6345 ///
6346 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_broadcastb_epi8&expand=534)
6347 #[inline]
6348 #[target_feature(enable = "avx512bw,avx512vl")]
6349 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i6350 pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
6351     let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
6352     transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
6353 }
6354 
6355 /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6356 ///
6357 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_broadcastb_epi8&expand=535)
6358 #[inline]
6359 #[target_feature(enable = "avx512bw,avx512vl")]
6360 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i6361 pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
6362     let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
6363     let zero = _mm256_setzero_si256().as_i8x32();
6364     transmute(simd_select_bitmask(k, broadcast, zero))
6365 }
6366 
6367 /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6368 ///
6369 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_broadcastb_epi8&expand=531)
6370 #[inline]
6371 #[target_feature(enable = "avx512bw,avx512vl")]
6372 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i6373 pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
6374     let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
6375     transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
6376 }
6377 
6378 /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6379 ///
6380 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_broadcastb_epi8&expand=532)
6381 #[inline]
6382 #[target_feature(enable = "avx512bw,avx512vl")]
6383 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i6384 pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
6385     let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
6386     let zero = _mm_setzero_si128().as_i8x16();
6387     transmute(simd_select_bitmask(k, broadcast, zero))
6388 }
6389 
6390 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
6391 ///
6392 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi16&expand=6012)
6393 #[inline]
6394 #[target_feature(enable = "avx512bw")]
6395 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i6396 pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
6397     let a = a.as_i16x32();
6398     let b = b.as_i16x32();
6399     #[rustfmt::skip]
6400     let r: i16x32 = simd_shuffle32!(
6401         a,
6402         b,
6403         [
6404             4, 32 + 4, 5, 32 + 5,
6405             6, 32 + 6, 7, 32 + 7,
6406             12, 32 + 12, 13, 32 + 13,
6407             14, 32 + 14, 15, 32 + 15,
6408             20, 32 + 20, 21, 32 + 21,
6409             22, 32 + 22, 23, 32 + 23,
6410             28, 32 + 28, 29, 32 + 29,
6411             30, 32 + 30, 31, 32 + 31,
6412         ],
6413     );
6414     transmute(r)
6415 }
6416 
6417 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6418 ///
6419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi16&expand=6010)
6420 #[inline]
6421 #[target_feature(enable = "avx512bw")]
6422 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm512_mask_unpackhi_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i6423 pub unsafe fn _mm512_mask_unpackhi_epi16(
6424     src: __m512i,
6425     k: __mmask32,
6426     a: __m512i,
6427     b: __m512i,
6428 ) -> __m512i {
6429     let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
6430     transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
6431 }
6432 
6433 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6434 ///
6435 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi16&expand=6011)
6436 #[inline]
6437 #[target_feature(enable = "avx512bw")]
6438 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i6439 pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6440     let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
6441     let zero = _mm512_setzero_si512().as_i16x32();
6442     transmute(simd_select_bitmask(k, unpackhi, zero))
6443 }
6444 
6445 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6446 ///
6447 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpackhi_epi16&expand=6007)
6448 #[inline]
6449 #[target_feature(enable = "avx512bw,avx512vl")]
6450 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm256_mask_unpackhi_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i6451 pub unsafe fn _mm256_mask_unpackhi_epi16(
6452     src: __m256i,
6453     k: __mmask16,
6454     a: __m256i,
6455     b: __m256i,
6456 ) -> __m256i {
6457     let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
6458     transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
6459 }
6460 
6461 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6462 ///
6463 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpackhi_epi16&expand=6008)
6464 #[inline]
6465 #[target_feature(enable = "avx512bw,avx512vl")]
6466 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i6467 pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6468     let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
6469     let zero = _mm256_setzero_si256().as_i16x16();
6470     transmute(simd_select_bitmask(k, unpackhi, zero))
6471 }
6472 
6473 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6474 ///
6475 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpackhi_epi16&expand=6004)
6476 #[inline]
6477 #[target_feature(enable = "avx512bw,avx512vl")]
6478 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm_mask_unpackhi_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i6479 pub unsafe fn _mm_mask_unpackhi_epi16(
6480     src: __m128i,
6481     k: __mmask8,
6482     a: __m128i,
6483     b: __m128i,
6484 ) -> __m128i {
6485     let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
6486     transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
6487 }
6488 
6489 /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6490 ///
6491 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpackhi_epi16&expand=6005)
6492 #[inline]
6493 #[target_feature(enable = "avx512bw,avx512vl")]
6494 #[cfg_attr(test, assert_instr(vpunpckhwd))]
_mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i6495 pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6496     let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
6497     let zero = _mm_setzero_si128().as_i16x8();
6498     transmute(simd_select_bitmask(k, unpackhi, zero))
6499 }
6500 
6501 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
6502 ///
6503 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi8&expand=6039)
6504 #[inline]
6505 #[target_feature(enable = "avx512bw")]
6506 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i6507 pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
6508     let a = a.as_i8x64();
6509     let b = b.as_i8x64();
6510     #[rustfmt::skip]
6511     let r: i8x64 = simd_shuffle64!(
6512         a,
6513         b,
6514         [
6515             8,  64+8,   9, 64+9,
6516             10, 64+10, 11, 64+11,
6517             12, 64+12, 13, 64+13,
6518             14, 64+14, 15, 64+15,
6519             24, 64+24, 25, 64+25,
6520             26, 64+26, 27, 64+27,
6521             28, 64+28, 29, 64+29,
6522             30, 64+30, 31, 64+31,
6523             40, 64+40, 41, 64+41,
6524             42, 64+42, 43, 64+43,
6525             44, 64+44, 45, 64+45,
6526             46, 64+46, 47, 64+47,
6527             56, 64+56, 57, 64+57,
6528             58, 64+58, 59, 64+59,
6529             60, 64+60, 61, 64+61,
6530             62, 64+62, 63, 64+63,
6531         ],
6532     );
6533     transmute(r)
6534 }
6535 
6536 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6537 ///
6538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi8&expand=6037)
6539 #[inline]
6540 #[target_feature(enable = "avx512bw")]
6541 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm512_mask_unpackhi_epi8( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i6542 pub unsafe fn _mm512_mask_unpackhi_epi8(
6543     src: __m512i,
6544     k: __mmask64,
6545     a: __m512i,
6546     b: __m512i,
6547 ) -> __m512i {
6548     let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
6549     transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
6550 }
6551 
6552 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6553 ///
6554 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi8&expand=6038)
6555 #[inline]
6556 #[target_feature(enable = "avx512bw")]
6557 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i6558 pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6559     let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
6560     let zero = _mm512_setzero_si512().as_i8x64();
6561     transmute(simd_select_bitmask(k, unpackhi, zero))
6562 }
6563 
6564 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6565 ///
6566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpackhi_epi8&expand=6034)
6567 #[inline]
6568 #[target_feature(enable = "avx512bw,avx512vl")]
6569 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm256_mask_unpackhi_epi8( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i6570 pub unsafe fn _mm256_mask_unpackhi_epi8(
6571     src: __m256i,
6572     k: __mmask32,
6573     a: __m256i,
6574     b: __m256i,
6575 ) -> __m256i {
6576     let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
6577     transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
6578 }
6579 
6580 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6581 ///
6582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpackhi_epi8&expand=6035)
6583 #[inline]
6584 #[target_feature(enable = "avx512bw,avx512vl")]
6585 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i6586 pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6587     let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
6588     let zero = _mm256_setzero_si256().as_i8x32();
6589     transmute(simd_select_bitmask(k, unpackhi, zero))
6590 }
6591 
6592 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6593 ///
6594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpackhi_epi8&expand=6031)
6595 #[inline]
6596 #[target_feature(enable = "avx512bw,avx512vl")]
6597 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm_mask_unpackhi_epi8( src: __m128i, k: __mmask16, a: __m128i, b: __m128i, ) -> __m128i6598 pub unsafe fn _mm_mask_unpackhi_epi8(
6599     src: __m128i,
6600     k: __mmask16,
6601     a: __m128i,
6602     b: __m128i,
6603 ) -> __m128i {
6604     let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
6605     transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
6606 }
6607 
6608 /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6609 ///
6610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpackhi_epi8&expand=6032)
6611 #[inline]
6612 #[target_feature(enable = "avx512bw,avx512vl")]
6613 #[cfg_attr(test, assert_instr(vpunpckhbw))]
_mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i6614 pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6615     let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
6616     let zero = _mm_setzero_si128().as_i8x16();
6617     transmute(simd_select_bitmask(k, unpackhi, zero))
6618 }
6619 
6620 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
6621 ///
6622 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi16&expand=6069)
6623 #[inline]
6624 #[target_feature(enable = "avx512bw")]
6625 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i6626 pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
6627     let a = a.as_i16x32();
6628     let b = b.as_i16x32();
6629     #[rustfmt::skip]
6630     let r: i16x32 = simd_shuffle32!(
6631         a,
6632         b,
6633         [
6634             0,  32+0,   1, 32+1,
6635             2,  32+2,   3, 32+3,
6636             8,  32+8,   9, 32+9,
6637             10, 32+10, 11, 32+11,
6638             16, 32+16, 17, 32+17,
6639             18, 32+18, 19, 32+19,
6640             24, 32+24, 25, 32+25,
6641             26, 32+26, 27, 32+27
6642         ],
6643     );
6644     transmute(r)
6645 }
6646 
6647 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6648 ///
6649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi16&expand=6067)
6650 #[inline]
6651 #[target_feature(enable = "avx512bw")]
6652 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm512_mask_unpacklo_epi16( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i6653 pub unsafe fn _mm512_mask_unpacklo_epi16(
6654     src: __m512i,
6655     k: __mmask32,
6656     a: __m512i,
6657     b: __m512i,
6658 ) -> __m512i {
6659     let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
6660     transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
6661 }
6662 
6663 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6664 ///
6665 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi16&expand=6068)
6666 #[inline]
6667 #[target_feature(enable = "avx512bw")]
6668 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i6669 pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6670     let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
6671     let zero = _mm512_setzero_si512().as_i16x32();
6672     transmute(simd_select_bitmask(k, unpacklo, zero))
6673 }
6674 
6675 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6676 ///
6677 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpacklo_epi16&expand=6064)
6678 #[inline]
6679 #[target_feature(enable = "avx512bw,avx512vl")]
6680 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm256_mask_unpacklo_epi16( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i6681 pub unsafe fn _mm256_mask_unpacklo_epi16(
6682     src: __m256i,
6683     k: __mmask16,
6684     a: __m256i,
6685     b: __m256i,
6686 ) -> __m256i {
6687     let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
6688     transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
6689 }
6690 
6691 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6692 ///
6693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpacklo_epi16&expand=6065)
6694 #[inline]
6695 #[target_feature(enable = "avx512bw,avx512vl")]
6696 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i6697 pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6698     let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
6699     let zero = _mm256_setzero_si256().as_i16x16();
6700     transmute(simd_select_bitmask(k, unpacklo, zero))
6701 }
6702 
6703 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6704 ///
6705 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpacklo_epi16&expand=6061)
6706 #[inline]
6707 #[target_feature(enable = "avx512bw,avx512vl")]
6708 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm_mask_unpacklo_epi16( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i6709 pub unsafe fn _mm_mask_unpacklo_epi16(
6710     src: __m128i,
6711     k: __mmask8,
6712     a: __m128i,
6713     b: __m128i,
6714 ) -> __m128i {
6715     let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
6716     transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
6717 }
6718 
6719 /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6720 ///
6721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpacklo_epi16&expand=6062)
6722 #[inline]
6723 #[target_feature(enable = "avx512bw,avx512vl")]
6724 #[cfg_attr(test, assert_instr(vpunpcklwd))]
_mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i6725 pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6726     let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
6727     let zero = _mm_setzero_si128().as_i16x8();
6728     transmute(simd_select_bitmask(k, unpacklo, zero))
6729 }
6730 
6731 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
6732 ///
6733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi8&expand=6096)
6734 #[inline]
6735 #[target_feature(enable = "avx512bw")]
6736 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i6737 pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
6738     let a = a.as_i8x64();
6739     let b = b.as_i8x64();
6740     #[rustfmt::skip]
6741     let r: i8x64 = simd_shuffle64!(
6742         a,
6743         b,
6744         [
6745             0,  64+0,   1, 64+1,
6746             2,  64+2,   3, 64+3,
6747             4,  64+4,   5, 64+5,
6748             6,  64+6,   7, 64+7,
6749             16, 64+16, 17, 64+17,
6750             18, 64+18, 19, 64+19,
6751             20, 64+20, 21, 64+21,
6752             22, 64+22, 23, 64+23,
6753             32, 64+32, 33, 64+33,
6754             34, 64+34, 35, 64+35,
6755             36, 64+36, 37, 64+37,
6756             38, 64+38, 39, 64+39,
6757             48, 64+48, 49, 64+49,
6758             50, 64+50, 51, 64+51,
6759             52, 64+52, 53, 64+53,
6760             54, 64+54, 55, 64+55,
6761         ],
6762     );
6763     transmute(r)
6764 }
6765 
6766 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6767 ///
6768 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi8&expand=6094)
6769 #[inline]
6770 #[target_feature(enable = "avx512bw")]
6771 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm512_mask_unpacklo_epi8( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i6772 pub unsafe fn _mm512_mask_unpacklo_epi8(
6773     src: __m512i,
6774     k: __mmask64,
6775     a: __m512i,
6776     b: __m512i,
6777 ) -> __m512i {
6778     let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
6779     transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
6780 }
6781 
6782 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6783 ///
6784 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi8&expand=6095)
6785 #[inline]
6786 #[target_feature(enable = "avx512bw")]
6787 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i6788 pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6789     let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
6790     let zero = _mm512_setzero_si512().as_i8x64();
6791     transmute(simd_select_bitmask(k, unpacklo, zero))
6792 }
6793 
6794 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6795 ///
6796 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpacklo_epi8&expand=6091)
6797 #[inline]
6798 #[target_feature(enable = "avx512bw,avx512vl")]
6799 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm256_mask_unpacklo_epi8( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i6800 pub unsafe fn _mm256_mask_unpacklo_epi8(
6801     src: __m256i,
6802     k: __mmask32,
6803     a: __m256i,
6804     b: __m256i,
6805 ) -> __m256i {
6806     let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
6807     transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
6808 }
6809 
6810 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6811 ///
6812 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpacklo_epi8&expand=6092)
6813 #[inline]
6814 #[target_feature(enable = "avx512bw,avx512vl")]
6815 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i6816 pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6817     let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
6818     let zero = _mm256_setzero_si256().as_i8x32();
6819     transmute(simd_select_bitmask(k, unpacklo, zero))
6820 }
6821 
6822 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6823 ///
6824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpacklo_epi8&expand=6088)
6825 #[inline]
6826 #[target_feature(enable = "avx512bw,avx512vl")]
6827 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm_mask_unpacklo_epi8( src: __m128i, k: __mmask16, a: __m128i, b: __m128i, ) -> __m128i6828 pub unsafe fn _mm_mask_unpacklo_epi8(
6829     src: __m128i,
6830     k: __mmask16,
6831     a: __m128i,
6832     b: __m128i,
6833 ) -> __m128i {
6834     let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
6835     transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
6836 }
6837 
6838 /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6839 ///
6840 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpacklo_epi8&expand=6089)
6841 #[inline]
6842 #[target_feature(enable = "avx512bw,avx512vl")]
6843 #[cfg_attr(test, assert_instr(vpunpcklbw))]
_mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i6844 pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6845     let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
6846     let zero = _mm_setzero_si128().as_i8x16();
6847     transmute(simd_select_bitmask(k, unpacklo, zero))
6848 }
6849 
6850 /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6851 ///
6852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi16&expand=3795)
6853 #[inline]
6854 #[target_feature(enable = "avx512bw")]
6855 #[cfg_attr(test, assert_instr(vmovdqu16))]
_mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i6856 pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
6857     let mov = a.as_i16x32();
6858     transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
6859 }
6860 
6861 /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6862 ///
6863 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi16&expand=3796)
6864 #[inline]
6865 #[target_feature(enable = "avx512bw")]
6866 #[cfg_attr(test, assert_instr(vmovdqu16))]
_mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i6867 pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
6868     let mov = a.as_i16x32();
6869     let zero = _mm512_setzero_si512().as_i16x32();
6870     transmute(simd_select_bitmask(k, mov, zero))
6871 }
6872 
6873 /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6874 ///
6875 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mov_epi16&expand=3793)
6876 #[inline]
6877 #[target_feature(enable = "avx512bw,avx512vl")]
6878 #[cfg_attr(test, assert_instr(vmovdqu16))]
_mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i6879 pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
6880     let mov = a.as_i16x16();
6881     transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
6882 }
6883 
6884 /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6885 ///
6886 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mov_epi16&expand=3794)
6887 #[inline]
6888 #[target_feature(enable = "avx512bw,avx512vl")]
6889 #[cfg_attr(test, assert_instr(vmovdqu16))]
_mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i6890 pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
6891     let mov = a.as_i16x16();
6892     let zero = _mm256_setzero_si256().as_i16x16();
6893     transmute(simd_select_bitmask(k, mov, zero))
6894 }
6895 
6896 /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6897 ///
6898 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mov_epi16&expand=3791)
6899 #[inline]
6900 #[target_feature(enable = "avx512bw,avx512vl")]
6901 #[cfg_attr(test, assert_instr(vmovdqu16))]
_mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i6902 pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6903     let mov = a.as_i16x8();
6904     transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
6905 }
6906 
6907 /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6908 ///
6909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mov_epi16&expand=3792)
6910 #[inline]
6911 #[target_feature(enable = "avx512bw,avx512vl")]
6912 #[cfg_attr(test, assert_instr(vmovdqu16))]
_mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i6913 pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
6914     let mov = a.as_i16x8();
6915     let zero = _mm_setzero_si128().as_i16x8();
6916     transmute(simd_select_bitmask(k, mov, zero))
6917 }
6918 
6919 /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6920 ///
6921 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi8&expand=3813)
6922 #[inline]
6923 #[target_feature(enable = "avx512bw")]
6924 #[cfg_attr(test, assert_instr(vmovdqu8))]
_mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i6925 pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
6926     let mov = a.as_i8x64();
6927     transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
6928 }
6929 
6930 /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6931 ///
6932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi8&expand=3814)
6933 #[inline]
6934 #[target_feature(enable = "avx512bw")]
6935 #[cfg_attr(test, assert_instr(vmovdqu8))]
_mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i6936 pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
6937     let mov = a.as_i8x64();
6938     let zero = _mm512_setzero_si512().as_i8x64();
6939     transmute(simd_select_bitmask(k, mov, zero))
6940 }
6941 
6942 /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6943 ///
6944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mov_epi8&expand=3811)
6945 #[inline]
6946 #[target_feature(enable = "avx512bw,avx512vl")]
6947 #[cfg_attr(test, assert_instr(vmovdqu8))]
_mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i6948 pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
6949     let mov = a.as_i8x32();
6950     transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
6951 }
6952 
6953 /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6954 ///
6955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mov_epi8&expand=3812)
6956 #[inline]
6957 #[target_feature(enable = "avx512bw,avx512vl")]
6958 #[cfg_attr(test, assert_instr(vmovdqu8))]
_mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i6959 pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
6960     let mov = a.as_i8x32();
6961     let zero = _mm256_setzero_si256().as_i8x32();
6962     transmute(simd_select_bitmask(k, mov, zero))
6963 }
6964 
6965 /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6966 ///
6967 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mov_epi8&expand=3809)
6968 #[inline]
6969 #[target_feature(enable = "avx512bw,avx512vl")]
6970 #[cfg_attr(test, assert_instr(vmovdqu8))]
_mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i6971 pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
6972     let mov = a.as_i8x16();
6973     transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
6974 }
6975 
6976 /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6977 ///
6978 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mov_epi8&expand=3810)
6979 #[inline]
6980 #[target_feature(enable = "avx512bw,avx512vl")]
6981 #[cfg_attr(test, assert_instr(vmovdqu8))]
_mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i6982 pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
6983     let mov = a.as_i8x16();
6984     let zero = _mm_setzero_si128().as_i8x16();
6985     transmute(simd_select_bitmask(k, mov, zero))
6986 }
6987 
6988 /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6989 ///
6990 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi16&expand=4942)
6991 #[inline]
6992 #[target_feature(enable = "avx512bw")]
6993 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i6994 pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
6995     let r = _mm512_set1_epi16(a).as_i16x32();
6996     transmute(simd_select_bitmask(k, r, src.as_i16x32()))
6997 }
6998 
6999 /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7000 ///
7001 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi16&expand=4943)
7002 #[inline]
7003 #[target_feature(enable = "avx512bw")]
7004 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i7005 pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
7006     let r = _mm512_set1_epi16(a).as_i16x32();
7007     let zero = _mm512_setzero_si512().as_i16x32();
7008     transmute(simd_select_bitmask(k, r, zero))
7009 }
7010 
7011 /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7012 ///
7013 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_set1_epi16&expand=4939)
7014 #[inline]
7015 #[target_feature(enable = "avx512bw,avx512vl")]
7016 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i7017 pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
7018     let r = _mm256_set1_epi16(a).as_i16x16();
7019     transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7020 }
7021 
7022 /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7023 ///
7024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_set1_epi16&expand=4940)
7025 #[inline]
7026 #[target_feature(enable = "avx512bw,avx512vl")]
7027 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i7028 pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
7029     let r = _mm256_set1_epi16(a).as_i16x16();
7030     let zero = _mm256_setzero_si256().as_i16x16();
7031     transmute(simd_select_bitmask(k, r, zero))
7032 }
7033 
7034 /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7035 ///
7036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_set1_epi16&expand=4936)
7037 #[inline]
7038 #[target_feature(enable = "avx512bw,avx512vl")]
7039 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i7040 pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
7041     let r = _mm_set1_epi16(a).as_i16x8();
7042     transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7043 }
7044 
7045 /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7046 ///
7047 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_set1_epi16&expand=4937)
7048 #[inline]
7049 #[target_feature(enable = "avx512bw,avx512vl")]
7050 #[cfg_attr(test, assert_instr(vpbroadcastw))]
_mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i7051 pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
7052     let r = _mm_set1_epi16(a).as_i16x8();
7053     let zero = _mm_setzero_si128().as_i16x8();
7054     transmute(simd_select_bitmask(k, r, zero))
7055 }
7056 
7057 /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7058 ///
7059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi8&expand=4970)
7060 #[inline]
7061 #[target_feature(enable = "avx512bw")]
7062 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i7063 pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
7064     let r = _mm512_set1_epi8(a).as_i8x64();
7065     transmute(simd_select_bitmask(k, r, src.as_i8x64()))
7066 }
7067 
7068 /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7069 ///
7070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi8&expand=4971)
7071 #[inline]
7072 #[target_feature(enable = "avx512bw")]
7073 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i7074 pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
7075     let r = _mm512_set1_epi8(a).as_i8x64();
7076     let zero = _mm512_setzero_si512().as_i8x64();
7077     transmute(simd_select_bitmask(k, r, zero))
7078 }
7079 
7080 /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7081 ///
7082 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_set1_epi8&expand=4967)
7083 #[inline]
7084 #[target_feature(enable = "avx512bw,avx512vl")]
7085 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i7086 pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
7087     let r = _mm256_set1_epi8(a).as_i8x32();
7088     transmute(simd_select_bitmask(k, r, src.as_i8x32()))
7089 }
7090 
7091 /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7092 ///
7093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_set1_epi8&expand=4968)
7094 #[inline]
7095 #[target_feature(enable = "avx512bw,avx512vl")]
7096 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i7097 pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
7098     let r = _mm256_set1_epi8(a).as_i8x32();
7099     let zero = _mm256_setzero_si256().as_i8x32();
7100     transmute(simd_select_bitmask(k, r, zero))
7101 }
7102 
7103 /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7104 ///
7105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_set1_epi8&expand=4964)
7106 #[inline]
7107 #[target_feature(enable = "avx512bw,avx512vl")]
7108 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i7109 pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
7110     let r = _mm_set1_epi8(a).as_i8x16();
7111     transmute(simd_select_bitmask(k, r, src.as_i8x16()))
7112 }
7113 
7114 /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7115 ///
7116 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_set1_epi8&expand=4965)
7117 #[inline]
7118 #[target_feature(enable = "avx512bw,avx512vl")]
7119 #[cfg_attr(test, assert_instr(vpbroadcastb))]
_mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i7120 pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
7121     let r = _mm_set1_epi8(a).as_i8x16();
7122     let zero = _mm_setzero_si128().as_i8x16();
7123     transmute(simd_select_bitmask(k, r, zero))
7124 }
7125 
7126 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst.
7127 ///
7128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221)
7129 #[inline]
7130 #[target_feature(enable = "avx512bw")]
7131 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
7132 #[rustc_legacy_const_generics(1)]
_mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i7133 pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
7134     static_assert_imm8!(IMM8);
7135     let a = a.as_i16x32();
7136     let r: i16x32 = simd_shuffle32!(
7137         a,
7138         a,
7139         <const IMM8: i32> [
7140             IMM8 as u32 & 0b11,
7141             (IMM8 as u32 >> 2) & 0b11,
7142             (IMM8 as u32 >> 4) & 0b11,
7143             (IMM8 as u32 >> 6) & 0b11,
7144             4,
7145             5,
7146             6,
7147             7,
7148             (IMM8 as u32 & 0b11) + 8,
7149             ((IMM8 as u32 >> 2) & 0b11) + 8,
7150             ((IMM8 as u32 >> 4) & 0b11) + 8,
7151             ((IMM8 as u32 >> 6) & 0b11) + 8,
7152             12,
7153             13,
7154             14,
7155             15,
7156             (IMM8 as u32 & 0b11) + 16,
7157             ((IMM8 as u32 >> 2) & 0b11) + 16,
7158             ((IMM8 as u32 >> 4) & 0b11) + 16,
7159             ((IMM8 as u32 >> 6) & 0b11) + 16,
7160             20,
7161             21,
7162             22,
7163             23,
7164             (IMM8 as u32 & 0b11) + 24,
7165             ((IMM8 as u32 >> 2) & 0b11) + 24,
7166             ((IMM8 as u32 >> 4) & 0b11) + 24,
7167             ((IMM8 as u32 >> 6) & 0b11) + 24,
7168             28,
7169             29,
7170             30,
7171             31,
7172         ],
7173     );
7174     transmute(r)
7175 }
7176 
7177 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7178 ///
7179 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219)
7180 #[inline]
7181 #[target_feature(enable = "avx512bw")]
7182 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
7183 #[rustc_legacy_const_generics(3)]
_mm512_mask_shufflelo_epi16<const IMM8: i32>( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i7184 pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
7185     src: __m512i,
7186     k: __mmask32,
7187     a: __m512i,
7188 ) -> __m512i {
7189     static_assert_imm8!(IMM8);
7190     let r = _mm512_shufflelo_epi16::<IMM8>(a);
7191     transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
7192 }
7193 
7194 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7195 ///
7196 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220)
7197 #[inline]
7198 #[target_feature(enable = "avx512bw")]
7199 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
7200 #[rustc_legacy_const_generics(2)]
_mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i7201 pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7202     static_assert_imm8!(IMM8);
7203     let r = _mm512_shufflelo_epi16::<IMM8>(a);
7204     let zero = _mm512_setzero_si512().as_i16x32();
7205     transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
7206 }
7207 
7208 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7209 ///
7210 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216)
7211 #[inline]
7212 #[target_feature(enable = "avx512bw,avx512vl")]
7213 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7214 #[rustc_legacy_const_generics(3)]
_mm256_mask_shufflelo_epi16<const IMM8: i32>( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i7215 pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
7216     src: __m256i,
7217     k: __mmask16,
7218     a: __m256i,
7219 ) -> __m256i {
7220     static_assert_imm8!(IMM8);
7221     let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
7222     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
7223 }
7224 
7225 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7226 ///
7227 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217)
7228 #[inline]
7229 #[target_feature(enable = "avx512bw,avx512vl")]
7230 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7231 #[rustc_legacy_const_generics(2)]
_mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i7232 pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7233     static_assert_imm8!(IMM8);
7234     let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
7235     let zero = _mm256_setzero_si256().as_i16x16();
7236     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
7237 }
7238 
7239 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7240 ///
7241 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213)
7242 #[inline]
7243 #[target_feature(enable = "avx512bw,avx512vl")]
7244 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7245 #[rustc_legacy_const_generics(3)]
_mm_mask_shufflelo_epi16<const IMM8: i32>( src: __m128i, k: __mmask8, a: __m128i, ) -> __m128i7246 pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
7247     src: __m128i,
7248     k: __mmask8,
7249     a: __m128i,
7250 ) -> __m128i {
7251     static_assert_imm8!(IMM8);
7252     let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
7253     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
7254 }
7255 
7256 /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7257 ///
7258 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214)
7259 #[inline]
7260 #[target_feature(enable = "avx512bw,avx512vl")]
7261 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7262 #[rustc_legacy_const_generics(2)]
_mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i7263 pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7264     static_assert_imm8!(IMM8);
7265     let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
7266     let zero = _mm_setzero_si128().as_i16x8();
7267     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero))
7268 }
7269 
7270 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst.
7271 ///
7272 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212)
7273 #[inline]
7274 #[target_feature(enable = "avx512bw")]
7275 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
7276 #[rustc_legacy_const_generics(1)]
_mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i7277 pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
7278     static_assert_imm8!(IMM8);
7279     let a = a.as_i16x32();
7280     let r: i16x32 = simd_shuffle32!(
7281         a,
7282         a,
7283         <const IMM8: i32> [
7284             0,
7285             1,
7286             2,
7287             3,
7288             (IMM8 as u32 & 0b11) + 4,
7289             ((IMM8 as u32 >> 2) & 0b11) + 4,
7290             ((IMM8 as u32 >> 4) & 0b11) + 4,
7291             ((IMM8 as u32 >> 6) & 0b11) + 4,
7292             8,
7293             9,
7294             10,
7295             11,
7296             (IMM8 as u32 & 0b11) + 12,
7297             ((IMM8 as u32 >> 2) & 0b11) + 12,
7298             ((IMM8 as u32 >> 4) & 0b11) + 12,
7299             ((IMM8 as u32 >> 6) & 0b11) + 12,
7300             16,
7301             17,
7302             18,
7303             19,
7304             (IMM8 as u32 & 0b11) + 20,
7305             ((IMM8 as u32 >> 2) & 0b11) + 20,
7306             ((IMM8 as u32 >> 4) & 0b11) + 20,
7307             ((IMM8 as u32 >> 6) & 0b11) + 20,
7308             24,
7309             25,
7310             26,
7311             27,
7312             (IMM8 as u32 & 0b11) + 28,
7313             ((IMM8 as u32 >> 2) & 0b11) + 28,
7314             ((IMM8 as u32 >> 4) & 0b11) + 28,
7315             ((IMM8 as u32 >> 6) & 0b11) + 28,
7316         ],
7317     );
7318     transmute(r)
7319 }
7320 
7321 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7322 ///
7323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210)
7324 #[inline]
7325 #[target_feature(enable = "avx512bw")]
7326 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
7327 #[rustc_legacy_const_generics(3)]
_mm512_mask_shufflehi_epi16<const IMM8: i32>( src: __m512i, k: __mmask32, a: __m512i, ) -> __m512i7328 pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
7329     src: __m512i,
7330     k: __mmask32,
7331     a: __m512i,
7332 ) -> __m512i {
7333     static_assert_imm8!(IMM8);
7334     let r = _mm512_shufflehi_epi16::<IMM8>(a);
7335     transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
7336 }
7337 
7338 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7339 ///
7340 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211)
7341 #[inline]
7342 #[target_feature(enable = "avx512bw")]
7343 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
7344 #[rustc_legacy_const_generics(2)]
_mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i7345 pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7346     static_assert_imm8!(IMM8);
7347     let r = _mm512_shufflehi_epi16::<IMM8>(a);
7348     let zero = _mm512_setzero_si512().as_i16x32();
7349     transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
7350 }
7351 
7352 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7353 ///
7354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207)
7355 #[inline]
7356 #[target_feature(enable = "avx512bw,avx512vl")]
7357 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7358 #[rustc_legacy_const_generics(3)]
_mm256_mask_shufflehi_epi16<const IMM8: i32>( src: __m256i, k: __mmask16, a: __m256i, ) -> __m256i7359 pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
7360     src: __m256i,
7361     k: __mmask16,
7362     a: __m256i,
7363 ) -> __m256i {
7364     static_assert_imm8!(IMM8);
7365     let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
7366     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
7367 }
7368 
7369 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7370 ///
7371 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208)
7372 #[inline]
7373 #[target_feature(enable = "avx512bw,avx512vl")]
7374 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7375 #[rustc_legacy_const_generics(2)]
_mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i7376 pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7377     static_assert_imm8!(IMM8);
7378     let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
7379     let zero = _mm256_setzero_si256().as_i16x16();
7380     transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
7381 }
7382 
7383 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7384 ///
7385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204)
7386 #[inline]
7387 #[target_feature(enable = "avx512bw,avx512vl")]
7388 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7389 #[rustc_legacy_const_generics(3)]
_mm_mask_shufflehi_epi16<const IMM8: i32>( src: __m128i, k: __mmask8, a: __m128i, ) -> __m128i7390 pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
7391     src: __m128i,
7392     k: __mmask8,
7393     a: __m128i,
7394 ) -> __m128i {
7395     static_assert_imm8!(IMM8);
7396     let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
7397     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
7398 }
7399 
7400 /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7401 ///
7402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205)
7403 #[inline]
7404 #[target_feature(enable = "avx512bw,avx512vl")]
7405 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7406 #[rustc_legacy_const_generics(2)]
_mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i7407 pub unsafe fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7408     static_assert_imm8!(IMM8);
7409     let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
7410     let zero = _mm_setzero_si128().as_i16x8();
7411     transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero))
7412 }
7413 
7414 /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
7415 ///
7416 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_epi8&expand=5159)
7417 #[inline]
7418 #[target_feature(enable = "avx512bw")]
7419 #[cfg_attr(test, assert_instr(vpshufb))]
_mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i7420 pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
7421     transmute(vpshufb(a.as_i8x64(), b.as_i8x64()))
7422 }
7423 
7424 /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7425 ///
7426 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_epi8&expand=5157)
7427 #[inline]
7428 #[target_feature(enable = "avx512bw")]
7429 #[cfg_attr(test, assert_instr(vpshufb))]
_mm512_mask_shuffle_epi8( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i7430 pub unsafe fn _mm512_mask_shuffle_epi8(
7431     src: __m512i,
7432     k: __mmask64,
7433     a: __m512i,
7434     b: __m512i,
7435 ) -> __m512i {
7436     let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
7437     transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
7438 }
7439 
7440 /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7441 ///
7442 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_epi8&expand=5158)
7443 #[inline]
7444 #[target_feature(enable = "avx512bw")]
7445 #[cfg_attr(test, assert_instr(vpshufb))]
_mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i7446 pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7447     let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
7448     let zero = _mm512_setzero_si512().as_i8x64();
7449     transmute(simd_select_bitmask(k, shuffle, zero))
7450 }
7451 
7452 /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7453 ///
7454 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_epi8&expand=5154)
7455 #[inline]
7456 #[target_feature(enable = "avx512bw,avx512vl")]
7457 #[cfg_attr(test, assert_instr(vpshufb))]
_mm256_mask_shuffle_epi8( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i7458 pub unsafe fn _mm256_mask_shuffle_epi8(
7459     src: __m256i,
7460     k: __mmask32,
7461     a: __m256i,
7462     b: __m256i,
7463 ) -> __m256i {
7464     let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
7465     transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
7466 }
7467 
7468 /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7469 ///
7470 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_epi8&expand=5155)
7471 #[inline]
7472 #[target_feature(enable = "avx512bw,avx512vl")]
7473 #[cfg_attr(test, assert_instr(vpshufb))]
_mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i7474 pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7475     let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
7476     let zero = _mm256_setzero_si256().as_i8x32();
7477     transmute(simd_select_bitmask(k, shuffle, zero))
7478 }
7479 
7480 /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7481 ///
7482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_epi8&expand=5151)
7483 #[inline]
7484 #[target_feature(enable = "avx512bw,avx512vl")]
7485 #[cfg_attr(test, assert_instr(vpshufb))]
_mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i7486 pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7487     let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
7488     transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
7489 }
7490 
7491 /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7492 ///
7493 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_epi8&expand=5152)
7494 #[inline]
7495 #[target_feature(enable = "avx512bw,avx512vl")]
7496 #[cfg_attr(test, assert_instr(vpshufb))]
_mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i7497 pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7498     let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
7499     let zero = _mm_setzero_si128().as_i8x16();
7500     transmute(simd_select_bitmask(k, shuffle, zero))
7501 }
7502 
7503 /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7504 ///
7505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi16_mask&expand=5884)
7506 #[inline]
7507 #[target_feature(enable = "avx512bw")]
7508 #[cfg_attr(test, assert_instr(vptestmw))]
_mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask327509 pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
7510     let and = _mm512_and_si512(a, b);
7511     let zero = _mm512_setzero_si512();
7512     _mm512_cmpneq_epi16_mask(and, zero)
7513 }
7514 
7515 /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7516 ///
7517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi16_mask&expand=5883)
7518 #[inline]
7519 #[target_feature(enable = "avx512bw")]
7520 #[cfg_attr(test, assert_instr(vptestmw))]
_mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask327521 pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
7522     let and = _mm512_and_si512(a, b);
7523     let zero = _mm512_setzero_si512();
7524     _mm512_mask_cmpneq_epi16_mask(k, and, zero)
7525 }
7526 
7527 /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7528 ///
7529 // [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_test_epi16_mask&expand=5882)
7530 #[inline]
7531 #[target_feature(enable = "avx512bw,avx512vl")]
7532 #[cfg_attr(test, assert_instr(vptestmw))]
_mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask167533 pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
7534     let and = _mm256_and_si256(a, b);
7535     let zero = _mm256_setzero_si256();
7536     _mm256_cmpneq_epi16_mask(and, zero)
7537 }
7538 
7539 /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7540 ///
7541 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_test_epi16_mask&expand=5881)
7542 #[inline]
7543 #[target_feature(enable = "avx512bw,avx512vl")]
7544 #[cfg_attr(test, assert_instr(vptestmw))]
_mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask167545 pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
7546     let and = _mm256_and_si256(a, b);
7547     let zero = _mm256_setzero_si256();
7548     _mm256_mask_cmpneq_epi16_mask(k, and, zero)
7549 }
7550 
7551 /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7552 ///
7553 // [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_epi16_mask&expand=5880)
7554 #[inline]
7555 #[target_feature(enable = "avx512bw,avx512vl")]
7556 #[cfg_attr(test, assert_instr(vptestmw))]
_mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask87557 pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
7558     let and = _mm_and_si128(a, b);
7559     let zero = _mm_setzero_si128();
7560     _mm_cmpneq_epi16_mask(and, zero)
7561 }
7562 
7563 /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7564 ///
7565 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_test_epi16_mask&expand=5879)
7566 #[inline]
7567 #[target_feature(enable = "avx512bw,avx512vl")]
7568 #[cfg_attr(test, assert_instr(vptestmw))]
_mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask87569 pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
7570     let and = _mm_and_si128(a, b);
7571     let zero = _mm_setzero_si128();
7572     _mm_mask_cmpneq_epi16_mask(k, and, zero)
7573 }
7574 
7575 /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7576 ///
7577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi8_mask&expand=5902)
7578 #[inline]
7579 #[target_feature(enable = "avx512bw")]
7580 #[cfg_attr(test, assert_instr(vptestmb))]
_mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask647581 pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
7582     let and = _mm512_and_si512(a, b);
7583     let zero = _mm512_setzero_si512();
7584     _mm512_cmpneq_epi8_mask(and, zero)
7585 }
7586 
7587 /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7588 ///
7589 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi8_mask&expand=5901)
7590 #[inline]
7591 #[target_feature(enable = "avx512bw")]
7592 #[cfg_attr(test, assert_instr(vptestmb))]
_mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask647593 pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
7594     let and = _mm512_and_si512(a, b);
7595     let zero = _mm512_setzero_si512();
7596     _mm512_mask_cmpneq_epi8_mask(k, and, zero)
7597 }
7598 
7599 /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7600 ///
7601 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_test_epi8_mask&expand=5900)
7602 #[inline]
7603 #[target_feature(enable = "avx512bw,avx512vl")]
7604 #[cfg_attr(test, assert_instr(vptestmb))]
_mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask327605 pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
7606     let and = _mm256_and_si256(a, b);
7607     let zero = _mm256_setzero_si256();
7608     _mm256_cmpneq_epi8_mask(and, zero)
7609 }
7610 
7611 /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7612 ///
7613 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_test_epi8_mask&expand=5899)
7614 #[inline]
7615 #[target_feature(enable = "avx512bw,avx512vl")]
7616 #[cfg_attr(test, assert_instr(vptestmb))]
_mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask327617 pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
7618     let and = _mm256_and_si256(a, b);
7619     let zero = _mm256_setzero_si256();
7620     _mm256_mask_cmpneq_epi8_mask(k, and, zero)
7621 }
7622 
7623 /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7624 ///
7625 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_epi8_mask&expand=5898)
7626 #[inline]
7627 #[target_feature(enable = "avx512bw,avx512vl")]
7628 #[cfg_attr(test, assert_instr(vptestmb))]
_mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask167629 pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
7630     let and = _mm_and_si128(a, b);
7631     let zero = _mm_setzero_si128();
7632     _mm_cmpneq_epi8_mask(and, zero)
7633 }
7634 
7635 /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7636 ///
7637 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_test_epi8_mask&expand=5897)
7638 #[inline]
7639 #[target_feature(enable = "avx512bw,avx512vl")]
7640 #[cfg_attr(test, assert_instr(vptestmb))]
_mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask167641 pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
7642     let and = _mm_and_si128(a, b);
7643     let zero = _mm_setzero_si128();
7644     _mm_mask_cmpneq_epi8_mask(k, and, zero)
7645 }
7646 
7647 /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7648 ///
7649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi16_mask&expand=5915)
7650 #[inline]
7651 #[target_feature(enable = "avx512bw")]
7652 #[cfg_attr(test, assert_instr(vptestnmw))]
_mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask327653 pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
7654     let and = _mm512_and_si512(a, b);
7655     let zero = _mm512_setzero_si512();
7656     _mm512_cmpeq_epi16_mask(and, zero)
7657 }
7658 
7659 /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7660 ///
7661 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi16&expand=5914)
7662 #[inline]
7663 #[target_feature(enable = "avx512bw")]
7664 #[cfg_attr(test, assert_instr(vptestnmw))]
_mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask327665 pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
7666     let and = _mm512_and_si512(a, b);
7667     let zero = _mm512_setzero_si512();
7668     _mm512_mask_cmpeq_epi16_mask(k, and, zero)
7669 }
7670 
7671 /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7672 ///
7673 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testn_epi16_mask&expand=5913)
7674 #[inline]
7675 #[target_feature(enable = "avx512bw,avx512vl")]
7676 #[cfg_attr(test, assert_instr(vptestnmw))]
_mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask167677 pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
7678     let and = _mm256_and_si256(a, b);
7679     let zero = _mm256_setzero_si256();
7680     _mm256_cmpeq_epi16_mask(and, zero)
7681 }
7682 
7683 /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7684 ///
7685 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_testn_epi16_mask&expand=5912)
7686 #[inline]
7687 #[target_feature(enable = "avx512bw,avx512vl")]
7688 #[cfg_attr(test, assert_instr(vptestnmw))]
_mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask167689 pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
7690     let and = _mm256_and_si256(a, b);
7691     let zero = _mm256_setzero_si256();
7692     _mm256_mask_cmpeq_epi16_mask(k, and, zero)
7693 }
7694 
7695 /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7696 ///
7697 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testn_epi16_mask&expand=5911)
7698 #[inline]
7699 #[target_feature(enable = "avx512bw,avx512vl")]
7700 #[cfg_attr(test, assert_instr(vptestnmw))]
_mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask87701 pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
7702     let and = _mm_and_si128(a, b);
7703     let zero = _mm_setzero_si128();
7704     _mm_cmpeq_epi16_mask(and, zero)
7705 }
7706 
7707 /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7708 ///
7709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_testn_epi16_mask&expand=5910)
7710 #[inline]
7711 #[target_feature(enable = "avx512bw,avx512vl")]
7712 #[cfg_attr(test, assert_instr(vptestnmw))]
_mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask87713 pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
7714     let and = _mm_and_si128(a, b);
7715     let zero = _mm_setzero_si128();
7716     _mm_mask_cmpeq_epi16_mask(k, and, zero)
7717 }
7718 
7719 /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7720 ///
7721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi8_mask&expand=5933)
7722 #[inline]
7723 #[target_feature(enable = "avx512bw")]
7724 #[cfg_attr(test, assert_instr(vptestnmb))]
_mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask647725 pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
7726     let and = _mm512_and_si512(a, b);
7727     let zero = _mm512_setzero_si512();
7728     _mm512_cmpeq_epi8_mask(and, zero)
7729 }
7730 
7731 /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7732 ///
7733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi8_mask&expand=5932)
7734 #[inline]
7735 #[target_feature(enable = "avx512bw")]
7736 #[cfg_attr(test, assert_instr(vptestnmb))]
_mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask647737 pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
7738     let and = _mm512_and_si512(a, b);
7739     let zero = _mm512_setzero_si512();
7740     _mm512_mask_cmpeq_epi8_mask(k, and, zero)
7741 }
7742 
7743 /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7744 ///
7745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testn_epi8_mask&expand=5931)
7746 #[inline]
7747 #[target_feature(enable = "avx512bw,avx512vl")]
7748 #[cfg_attr(test, assert_instr(vptestnmb))]
_mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask327749 pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
7750     let and = _mm256_and_si256(a, b);
7751     let zero = _mm256_setzero_si256();
7752     _mm256_cmpeq_epi8_mask(and, zero)
7753 }
7754 
7755 /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7756 ///
7757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_testn_epi8_mask&expand=5930)
7758 #[inline]
7759 #[target_feature(enable = "avx512bw,avx512vl")]
7760 #[cfg_attr(test, assert_instr(vptestnmb))]
_mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask327761 pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
7762     let and = _mm256_and_si256(a, b);
7763     let zero = _mm256_setzero_si256();
7764     _mm256_mask_cmpeq_epi8_mask(k, and, zero)
7765 }
7766 
7767 /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7768 ///
7769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testn_epi8_mask&expand=5929)
7770 #[inline]
7771 #[target_feature(enable = "avx512bw,avx512vl")]
7772 #[cfg_attr(test, assert_instr(vptestnmb))]
_mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask167773 pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
7774     let and = _mm_and_si128(a, b);
7775     let zero = _mm_setzero_si128();
7776     _mm_cmpeq_epi8_mask(and, zero)
7777 }
7778 
7779 /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7780 ///
7781 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_testn_epi8_mask&expand=5928)
7782 #[inline]
7783 #[target_feature(enable = "avx512bw,avx512vl")]
7784 #[cfg_attr(test, assert_instr(vptestnmb))]
_mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask167785 pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
7786     let and = _mm_and_si128(a, b);
7787     let zero = _mm_setzero_si128();
7788     _mm_mask_cmpeq_epi8_mask(k, and, zero)
7789 }
7790 
7791 /// Store 64-bit mask from a into memory.
7792 ///
7793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask64&expand=5578)
7794 #[inline]
7795 #[target_feature(enable = "avx512bw")]
7796 #[cfg_attr(test, assert_instr(mov))] //should be kmovq
_store_mask64(mem_addr: *mut u64, a: __mmask64)7797 pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
7798     ptr::write(mem_addr as *mut __mmask64, a);
7799 }
7800 
7801 /// Store 32-bit mask from a into memory.
7802 ///
7803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask32&expand=5577)
7804 #[inline]
7805 #[target_feature(enable = "avx512bw")]
7806 #[cfg_attr(test, assert_instr(mov))] //should be kmovd
_store_mask32(mem_addr: *mut u32, a: __mmask32)7807 pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
7808     ptr::write(mem_addr as *mut __mmask32, a);
7809 }
7810 
7811 /// Load 64-bit mask from memory into k.
7812 ///
7813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask64&expand=3318)
7814 #[inline]
7815 #[target_feature(enable = "avx512bw")]
7816 #[cfg_attr(test, assert_instr(mov))] //should be kmovq
_load_mask64(mem_addr: *const u64) -> __mmask647817 pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
7818     ptr::read(mem_addr as *const __mmask64)
7819 }
7820 
7821 /// Load 32-bit mask from memory into k.
7822 ///
7823 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask32&expand=3317)
7824 #[inline]
7825 #[target_feature(enable = "avx512bw")]
7826 #[cfg_attr(test, assert_instr(mov))] //should be kmovd
_load_mask32(mem_addr: *const u32) -> __mmask327827 pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 {
7828     ptr::read(mem_addr as *const __mmask32)
7829 }
7830 
7831 /// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
7832 ///
7833 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sad_epu8&expand=4855)
7834 #[inline]
7835 #[target_feature(enable = "avx512bw")]
7836 #[cfg_attr(test, assert_instr(vpsadbw))]
_mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i7837 pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
7838     transmute(vpsadbw(a.as_u8x64(), b.as_u8x64()))
7839 }
7840 
7841 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7842 ///
7843 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dbsad_epu8&expand=2114)
7844 #[inline]
7845 #[target_feature(enable = "avx512bw")]
7846 #[rustc_legacy_const_generics(2)]
7847 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i7848 pub unsafe fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
7849     static_assert_imm8!(IMM8);
7850     let a = a.as_u8x64();
7851     let b = b.as_u8x64();
7852     let r = vdbpsadbw(a, b, IMM8);
7853     transmute(r)
7854 }
7855 
7856 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7857 ///
7858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dbsad_epu8&expand=2115)
7859 #[inline]
7860 #[target_feature(enable = "avx512bw")]
7861 #[rustc_legacy_const_generics(4)]
7862 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm512_mask_dbsad_epu8<const IMM8: i32>( src: __m512i, k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i7863 pub unsafe fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
7864     src: __m512i,
7865     k: __mmask32,
7866     a: __m512i,
7867     b: __m512i,
7868 ) -> __m512i {
7869     static_assert_imm8!(IMM8);
7870     let a = a.as_u8x64();
7871     let b = b.as_u8x64();
7872     let r = vdbpsadbw(a, b, IMM8);
7873     transmute(simd_select_bitmask(k, r, src.as_u16x32()))
7874 }
7875 
7876 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7877 ///
7878 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dbsad_epu8&expand=2116)
7879 #[inline]
7880 #[target_feature(enable = "avx512bw")]
7881 #[rustc_legacy_const_generics(3)]
7882 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm512_maskz_dbsad_epu8<const IMM8: i32>( k: __mmask32, a: __m512i, b: __m512i, ) -> __m512i7883 pub unsafe fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(
7884     k: __mmask32,
7885     a: __m512i,
7886     b: __m512i,
7887 ) -> __m512i {
7888     static_assert_imm8!(IMM8);
7889     let a = a.as_u8x64();
7890     let b = b.as_u8x64();
7891     let r = vdbpsadbw(a, b, IMM8);
7892     transmute(simd_select_bitmask(
7893         k,
7894         r,
7895         _mm512_setzero_si512().as_u16x32(),
7896     ))
7897 }
7898 
7899 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7900 ///
7901 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dbsad_epu8&expand=2111)
7902 #[inline]
7903 #[target_feature(enable = "avx512bw,avx512vl")]
7904 #[rustc_legacy_const_generics(2)]
7905 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i7906 pub unsafe fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
7907     static_assert_imm8!(IMM8);
7908     let a = a.as_u8x32();
7909     let b = b.as_u8x32();
7910     let r = vdbpsadbw256(a, b, IMM8);
7911     transmute(r)
7912 }
7913 
7914 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7915 ///
7916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dbsad_epu8&expand=2112)
7917 #[inline]
7918 #[target_feature(enable = "avx512bw,avx512vl")]
7919 #[rustc_legacy_const_generics(4)]
7920 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm256_mask_dbsad_epu8<const IMM8: i32>( src: __m256i, k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i7921 pub unsafe fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
7922     src: __m256i,
7923     k: __mmask16,
7924     a: __m256i,
7925     b: __m256i,
7926 ) -> __m256i {
7927     static_assert_imm8!(IMM8);
7928     let a = a.as_u8x32();
7929     let b = b.as_u8x32();
7930     let r = vdbpsadbw256(a, b, IMM8);
7931     transmute(simd_select_bitmask(k, r, src.as_u16x16()))
7932 }
7933 
7934 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7935 ///
7936 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dbsad_epu8&expand=2113)
7937 #[inline]
7938 #[target_feature(enable = "avx512bw,avx512vl")]
7939 #[rustc_legacy_const_generics(3)]
7940 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm256_maskz_dbsad_epu8<const IMM8: i32>( k: __mmask16, a: __m256i, b: __m256i, ) -> __m256i7941 pub unsafe fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(
7942     k: __mmask16,
7943     a: __m256i,
7944     b: __m256i,
7945 ) -> __m256i {
7946     static_assert_imm8!(IMM8);
7947     let a = a.as_u8x32();
7948     let b = b.as_u8x32();
7949     let r = vdbpsadbw256(a, b, IMM8);
7950     transmute(simd_select_bitmask(
7951         k,
7952         r,
7953         _mm256_setzero_si256().as_u16x16(),
7954     ))
7955 }
7956 
7957 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7958 ///
7959 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dbsad_epu8&expand=2108)
7960 #[inline]
7961 #[target_feature(enable = "avx512bw,avx512vl")]
7962 #[rustc_legacy_const_generics(2)]
7963 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i7964 pub unsafe fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
7965     static_assert_imm8!(IMM8);
7966     let a = a.as_u8x16();
7967     let b = b.as_u8x16();
7968     let r = vdbpsadbw128(a, b, IMM8);
7969     transmute(r)
7970 }
7971 
7972 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7973 ///
7974 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dbsad_epu8&expand=2109)
7975 #[inline]
7976 #[target_feature(enable = "avx512bw,avx512vl")]
7977 #[rustc_legacy_const_generics(4)]
7978 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm_mask_dbsad_epu8<const IMM8: i32>( src: __m128i, k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i7979 pub unsafe fn _mm_mask_dbsad_epu8<const IMM8: i32>(
7980     src: __m128i,
7981     k: __mmask8,
7982     a: __m128i,
7983     b: __m128i,
7984 ) -> __m128i {
7985     static_assert_imm8!(IMM8);
7986     let a = a.as_u8x16();
7987     let b = b.as_u8x16();
7988     let r = vdbpsadbw128(a, b, IMM8);
7989     transmute(simd_select_bitmask(k, r, src.as_u16x8()))
7990 }
7991 
7992 /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
7993 ///
7994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dbsad_epu8&expand=2110)
7995 #[inline]
7996 #[target_feature(enable = "avx512bw,avx512vl")]
7997 #[rustc_legacy_const_generics(3)]
7998 #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
_mm_maskz_dbsad_epu8<const IMM8: i32>( k: __mmask8, a: __m128i, b: __m128i, ) -> __m128i7999 pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>(
8000     k: __mmask8,
8001     a: __m128i,
8002     b: __m128i,
8003 ) -> __m128i {
8004     static_assert_imm8!(IMM8);
8005     let a = a.as_u8x16();
8006     let b = b.as_u8x16();
8007     let r = vdbpsadbw128(a, b, IMM8);
8008     transmute(simd_select_bitmask(k, r, _mm_setzero_si128().as_u16x8()))
8009 }
8010 
8011 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
8012 ///
8013 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi16_mask&expand=3873)
8014 #[inline]
8015 #[target_feature(enable = "avx512bw")]
8016 #[cfg_attr(test, assert_instr(vpmovw2m))]
_mm512_movepi16_mask(a: __m512i) -> __mmask328017 pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
8018     let filter = _mm512_set1_epi16(1 << 15);
8019     let a = _mm512_and_si512(a, filter);
8020     _mm512_cmpeq_epi16_mask(a, filter)
8021 }
8022 
8023 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
8024 ///
8025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi16_mask&expand=3872)
8026 #[inline]
8027 #[target_feature(enable = "avx512bw,avx512vl")]
8028 #[cfg_attr(test, assert_instr(vpmovw2m))]
_mm256_movepi16_mask(a: __m256i) -> __mmask168029 pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
8030     let filter = _mm256_set1_epi16(1 << 15);
8031     let a = _mm256_and_si256(a, filter);
8032     _mm256_cmpeq_epi16_mask(a, filter)
8033 }
8034 
8035 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
8036 ///
8037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi16_mask&expand=3871)
8038 #[inline]
8039 #[target_feature(enable = "avx512bw,avx512vl")]
8040 #[cfg_attr(test, assert_instr(vpmovw2m))]
_mm_movepi16_mask(a: __m128i) -> __mmask88041 pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
8042     let filter = _mm_set1_epi16(1 << 15);
8043     let a = _mm_and_si128(a, filter);
8044     _mm_cmpeq_epi16_mask(a, filter)
8045 }
8046 
8047 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
8048 ///
8049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi8_mask&expand=3883)
8050 #[inline]
8051 #[target_feature(enable = "avx512bw")]
8052 #[cfg_attr(test, assert_instr(vpmovb2m))]
_mm512_movepi8_mask(a: __m512i) -> __mmask648053 pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
8054     let filter = _mm512_set1_epi8(1 << 7);
8055     let a = _mm512_and_si512(a, filter);
8056     _mm512_cmpeq_epi8_mask(a, filter)
8057 }
8058 
8059 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
8060 ///
8061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882)
8062 #[inline]
8063 #[target_feature(enable = "avx512bw,avx512vl")]
8064 #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8065                                            // using vpmovb2m plus converting the mask register to a standard register.
_mm256_movepi8_mask(a: __m256i) -> __mmask328066 pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
8067     let filter = _mm256_set1_epi8(1 << 7);
8068     let a = _mm256_and_si256(a, filter);
8069     _mm256_cmpeq_epi8_mask(a, filter)
8070 }
8071 
8072 /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
8073 ///
8074 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881)
8075 #[inline]
8076 #[target_feature(enable = "avx512bw,avx512vl")]
8077 #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8078                                            // using vpmovb2m plus converting the mask register to a standard register.
_mm_movepi8_mask(a: __m128i) -> __mmask168079 pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
8080     let filter = _mm_set1_epi8(1 << 7);
8081     let a = _mm_and_si128(a, filter);
8082     _mm_cmpeq_epi8_mask(a, filter)
8083 }
8084 
8085 /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8086 ///
8087 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movm_epi16&expand=3886)
8088 #[inline]
8089 #[target_feature(enable = "avx512bw")]
8090 #[cfg_attr(test, assert_instr(vpmovm2w))]
_mm512_movm_epi16(k: __mmask32) -> __m512i8091 pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
8092     let one = _mm512_set1_epi16(
8093         1 << 15
8094             | 1 << 14
8095             | 1 << 13
8096             | 1 << 12
8097             | 1 << 11
8098             | 1 << 10
8099             | 1 << 9
8100             | 1 << 8
8101             | 1 << 7
8102             | 1 << 6
8103             | 1 << 5
8104             | 1 << 4
8105             | 1 << 3
8106             | 1 << 2
8107             | 1 << 1
8108             | 1 << 0,
8109     )
8110     .as_i16x32();
8111     let zero = _mm512_setzero_si512().as_i16x32();
8112     transmute(simd_select_bitmask(k, one, zero))
8113 }
8114 
8115 /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8116 ///
8117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movm_epi16&expand=3885)
8118 #[inline]
8119 #[target_feature(enable = "avx512bw,avx512vl")]
8120 #[cfg_attr(test, assert_instr(vpmovm2w))]
_mm256_movm_epi16(k: __mmask16) -> __m256i8121 pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
8122     let one = _mm256_set1_epi16(
8123         1 << 15
8124             | 1 << 14
8125             | 1 << 13
8126             | 1 << 12
8127             | 1 << 11
8128             | 1 << 10
8129             | 1 << 9
8130             | 1 << 8
8131             | 1 << 7
8132             | 1 << 6
8133             | 1 << 5
8134             | 1 << 4
8135             | 1 << 3
8136             | 1 << 2
8137             | 1 << 1
8138             | 1 << 0,
8139     )
8140     .as_i16x16();
8141     let zero = _mm256_setzero_si256().as_i16x16();
8142     transmute(simd_select_bitmask(k, one, zero))
8143 }
8144 
8145 /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8146 ///
8147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movm_epi16&expand=3884)
8148 #[inline]
8149 #[target_feature(enable = "avx512bw,avx512vl")]
8150 #[cfg_attr(test, assert_instr(vpmovm2w))]
_mm_movm_epi16(k: __mmask8) -> __m128i8151 pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i {
8152     let one = _mm_set1_epi16(
8153         1 << 15
8154             | 1 << 14
8155             | 1 << 13
8156             | 1 << 12
8157             | 1 << 11
8158             | 1 << 10
8159             | 1 << 9
8160             | 1 << 8
8161             | 1 << 7
8162             | 1 << 6
8163             | 1 << 5
8164             | 1 << 4
8165             | 1 << 3
8166             | 1 << 2
8167             | 1 << 1
8168             | 1 << 0,
8169     )
8170     .as_i16x8();
8171     let zero = _mm_setzero_si128().as_i16x8();
8172     transmute(simd_select_bitmask(k, one, zero))
8173 }
8174 
8175 /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8176 ///
8177 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movm_epi8&expand=3895)
8178 #[inline]
8179 #[target_feature(enable = "avx512bw")]
8180 #[cfg_attr(test, assert_instr(vpmovm2b))]
_mm512_movm_epi8(k: __mmask64) -> __m512i8181 pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
8182     let one =
8183         _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
8184             .as_i8x64();
8185     let zero = _mm512_setzero_si512().as_i8x64();
8186     transmute(simd_select_bitmask(k, one, zero))
8187 }
8188 
8189 /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8190 ///
8191 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movm_epi8&expand=3894)
8192 #[inline]
8193 #[target_feature(enable = "avx512bw,avx512vl")]
8194 #[cfg_attr(test, assert_instr(vpmovm2b))]
_mm256_movm_epi8(k: __mmask32) -> __m256i8195 pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
8196     let one =
8197         _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
8198             .as_i8x32();
8199     let zero = _mm256_setzero_si256().as_i8x32();
8200     transmute(simd_select_bitmask(k, one, zero))
8201 }
8202 
8203 /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8204 ///
8205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movm_epi8&expand=3893)
8206 #[inline]
8207 #[target_feature(enable = "avx512bw,avx512vl")]
8208 #[cfg_attr(test, assert_instr(vpmovm2b))]
_mm_movm_epi8(k: __mmask16) -> __m128i8209 pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
8210     let one = _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
8211         .as_i8x16();
8212     let zero = _mm_setzero_si128().as_i8x16();
8213     transmute(simd_select_bitmask(k, one, zero))
8214 }
8215 
8216 /// Add 32-bit masks in a and b, and store the result in k.
8217 ///
8218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207)
8219 #[inline]
8220 #[target_feature(enable = "avx512bw")]
8221 #[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
8222 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
8223                                                                   //llvm.x86.avx512.kadd.d
_kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask328224 pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8225     transmute(a + b)
8226 }
8227 
8228 /// Add 64-bit masks in a and b, and store the result in k.
8229 ///
8230 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208)
8231 #[inline]
8232 #[target_feature(enable = "avx512bw")]
8233 #[cfg_attr(all(test, target_arch = "x86"), assert_instr(add))]
8234 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(lea))] // generate normal lea/add code instead of kaddd
8235                                                                   //llvm.x86.avx512.kadd.d
_kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask648236 pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8237     transmute(a + b)
8238 }
8239 
8240 /// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
8241 ///
8242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask32&expand=3213)
8243 #[inline]
8244 #[target_feature(enable = "avx512bw")]
8245 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandd
_kand_mask32(a: __mmask32, b: __mmask32) -> __mmask328246 pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8247     transmute(a & b)
8248 }
8249 
8250 /// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
8251 ///
8252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask64&expand=3214)
8253 #[inline]
8254 #[target_feature(enable = "avx512bw")]
8255 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandq
_kand_mask64(a: __mmask64, b: __mmask64) -> __mmask648256 pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8257     transmute(a & b)
8258 }
8259 
8260 /// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
8261 ///
8262 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_knot_mask32&expand=3234)
8263 #[inline]
8264 #[target_feature(enable = "avx512bw")]
_knot_mask32(a: __mmask32) -> __mmask328265 pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
8266     transmute(a ^ 0b11111111_11111111_11111111_11111111)
8267 }
8268 
8269 /// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
8270 ///
8271 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_knot_mask64&expand=3235)
8272 #[inline]
8273 #[target_feature(enable = "avx512bw")]
_knot_mask64(a: __mmask64) -> __mmask648274 pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
8275     transmute(a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111)
8276 }
8277 
8278 /// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
8279 ///
8280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask32&expand=3219)
8281 #[inline]
8282 #[target_feature(enable = "avx512bw")]
8283 #[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandnd
_kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask328284 pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8285     transmute(_knot_mask32(a) & b)
8286 }
8287 
8288 /// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
8289 ///
8290 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask64&expand=3220)
8291 #[inline]
8292 #[target_feature(enable = "avx512bw")]
8293 #[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandnq
_kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask648294 pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8295     transmute(_knot_mask64(a) & b)
8296 }
8297 
8298 /// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
8299 ///
8300 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask32&expand=3240)
8301 #[inline]
8302 #[target_feature(enable = "avx512bw")]
8303 #[cfg_attr(test, assert_instr(or))] // generate normal and code instead of kord
_kor_mask32(a: __mmask32, b: __mmask32) -> __mmask328304 pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8305     transmute(a | b)
8306 }
8307 
8308 /// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
8309 ///
8310 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask64&expand=3241)
8311 #[inline]
8312 #[target_feature(enable = "avx512bw")]
8313 #[cfg_attr(test, assert_instr(or))] // generate normal and code instead of korq
_kor_mask64(a: __mmask64, b: __mmask64) -> __mmask648314 pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8315     transmute(a | b)
8316 }
8317 
8318 /// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
8319 ///
8320 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask32&expand=3292)
8321 #[inline]
8322 #[target_feature(enable = "avx512bw")]
8323 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxord
_kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask328324 pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8325     transmute(a ^ b)
8326 }
8327 
8328 /// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
8329 ///
8330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask64&expand=3293)
8331 #[inline]
8332 #[target_feature(enable = "avx512bw")]
8333 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxorq
_kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask648334 pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8335     transmute(a ^ b)
8336 }
8337 
8338 /// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
8339 ///
8340 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask32&expand=3286)
8341 #[inline]
8342 #[target_feature(enable = "avx512bw")]
8343 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxnord
_kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask328344 pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8345     transmute(_knot_mask32(a ^ b))
8346 }
8347 
8348 /// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
8349 ///
8350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask64&expand=3287)
8351 #[inline]
8352 #[target_feature(enable = "avx512bw")]
8353 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kxnorq
_kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask648354 pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8355     transmute(_knot_mask64(a ^ b))
8356 }
8357 
8358 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
8359 ///
8360 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi8&expand=1407)
8361 #[inline]
8362 #[target_feature(enable = "avx512bw")]
8363 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm512_cvtepi16_epi8(a: __m512i) -> __m256i8364 pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
8365     let a = a.as_i16x32();
8366     transmute::<i8x32, _>(simd_cast(a))
8367 }
8368 
8369 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8370 ///
8371 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_epi8&expand=1408)
8372 #[inline]
8373 #[target_feature(enable = "avx512bw")]
8374 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i8375 pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
8376     let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
8377     transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
8378 }
8379 
8380 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8381 ///
8382 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
8383 #[inline]
8384 #[target_feature(enable = "avx512bw")]
8385 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i8386 pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
8387     let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
8388     transmute(simd_select_bitmask(
8389         k,
8390         convert,
8391         _mm256_setzero_si256().as_i8x32(),
8392     ))
8393 }
8394 
8395 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
8396 ///
8397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi8&expand=1404)
8398 #[inline]
8399 #[target_feature(enable = "avx512bw,avx512vl")]
8400 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm256_cvtepi16_epi8(a: __m256i) -> __m128i8401 pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
8402     let a = a.as_i16x16();
8403     transmute::<i8x16, _>(simd_cast(a))
8404 }
8405 
8406 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8407 ///
8408 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_epi8&expand=1405)
8409 #[inline]
8410 #[target_feature(enable = "avx512bw,avx512vl")]
8411 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i8412 pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
8413     let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
8414     transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
8415 }
8416 
8417 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8418 ///
8419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
8420 #[inline]
8421 #[target_feature(enable = "avx512bw,avx512vl")]
8422 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i8423 pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
8424     let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
8425     transmute(simd_select_bitmask(
8426         k,
8427         convert,
8428         _mm_setzero_si128().as_i8x16(),
8429     ))
8430 }
8431 
8432 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
8433 ///
8434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi8&expand=1401)
8435 #[inline]
8436 #[target_feature(enable = "avx512bw,avx512vl")]
8437 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm_cvtepi16_epi8(a: __m128i) -> __m128i8438 pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
8439     let a = a.as_i16x8();
8440     let zero = _mm_setzero_si128().as_i16x8();
8441     let v256: i16x16 = simd_shuffle16!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]);
8442     transmute::<i8x16, _>(simd_cast(v256))
8443 }
8444 
8445 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8446 ///
8447 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_epi8&expand=1402)
8448 #[inline]
8449 #[target_feature(enable = "avx512bw,avx512vl")]
8450 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i8451 pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8452     let convert = _mm_cvtepi16_epi8(a).as_i8x16();
8453     let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
8454     transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
8455 }
8456 
8457 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8458 ///
8459 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_epi8&expand=1403)
8460 #[inline]
8461 #[target_feature(enable = "avx512bw,avx512vl")]
8462 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i8463 pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
8464     let convert = _mm_cvtepi16_epi8(a).as_i8x16();
8465     let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
8466     let zero = _mm_setzero_si128().as_i8x16();
8467     transmute(simd_select_bitmask(k, convert, zero))
8468 }
8469 
8470 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
8471 ///
8472 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi16_epi8&expand=1807)
8473 #[inline]
8474 #[target_feature(enable = "avx512bw")]
8475 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm512_cvtsepi16_epi8(a: __m512i) -> __m256i8476 pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
8477     transmute(vpmovswb(
8478         a.as_i16x32(),
8479         _mm256_setzero_si256().as_i8x32(),
8480         0b11111111_11111111_11111111_11111111,
8481     ))
8482 }
8483 
8484 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8485 ///
8486 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
8487 #[inline]
8488 #[target_feature(enable = "avx512bw")]
8489 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i8490 pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
8491     transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k))
8492 }
8493 
8494 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8495 ///
8496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
8497 #[inline]
8498 #[target_feature(enable = "avx512bw")]
8499 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i8500 pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
8501     transmute(vpmovswb(
8502         a.as_i16x32(),
8503         _mm256_setzero_si256().as_i8x32(),
8504         k,
8505     ))
8506 }
8507 
8508 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
8509 ///
8510 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi16_epi8&expand=1804)
8511 #[inline]
8512 #[target_feature(enable = "avx512bw,avx512vl")]
8513 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm256_cvtsepi16_epi8(a: __m256i) -> __m128i8514 pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
8515     transmute(vpmovswb256(
8516         a.as_i16x16(),
8517         _mm_setzero_si128().as_i8x16(),
8518         0b11111111_11111111,
8519     ))
8520 }
8521 
8522 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8523 ///
8524 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
8525 #[inline]
8526 #[target_feature(enable = "avx512bw,avx512vl")]
8527 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i8528 pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
8529     transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k))
8530 }
8531 
8532 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8533 ///
8534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
8535 #[inline]
8536 #[target_feature(enable = "avx512bw,avx512vl")]
8537 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i8538 pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
8539     transmute(vpmovswb256(
8540         a.as_i16x16(),
8541         _mm_setzero_si128().as_i8x16(),
8542         k,
8543     ))
8544 }
8545 
8546 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
8547 ///
8548 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi16_epi8&expand=1801)
8549 #[inline]
8550 #[target_feature(enable = "avx512bw,avx512vl")]
8551 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm_cvtsepi16_epi8(a: __m128i) -> __m128i8552 pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
8553     transmute(vpmovswb128(
8554         a.as_i16x8(),
8555         _mm_setzero_si128().as_i8x16(),
8556         0b11111111,
8557     ))
8558 }
8559 
8560 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8561 ///
8562 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_epi8&expand=1802)
8563 #[inline]
8564 #[target_feature(enable = "avx512bw,avx512vl")]
8565 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i8566 pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8567     transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k))
8568 }
8569 
8570 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8571 ///
8572 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
8573 #[inline]
8574 #[target_feature(enable = "avx512bw,avx512vl")]
8575 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i8576 pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
8577     transmute(vpmovswb128(a.as_i16x8(), _mm_setzero_si128().as_i8x16(), k))
8578 }
8579 
8580 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
8581 ///
8582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi16_epi8&expand=2042)
8583 #[inline]
8584 #[target_feature(enable = "avx512bw")]
8585 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm512_cvtusepi16_epi8(a: __m512i) -> __m256i8586 pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
8587     transmute(vpmovuswb(
8588         a.as_u16x32(),
8589         _mm256_setzero_si256().as_u8x32(),
8590         0b11111111_11111111_11111111_11111111,
8591     ))
8592 }
8593 
8594 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8595 ///
8596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
8597 #[inline]
8598 #[target_feature(enable = "avx512bw")]
8599 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i8600 pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
8601     transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k))
8602 }
8603 
8604 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8605 ///
8606 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
8607 #[inline]
8608 #[target_feature(enable = "avx512bw")]
8609 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i8610 pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
8611     transmute(vpmovuswb(
8612         a.as_u16x32(),
8613         _mm256_setzero_si256().as_u8x32(),
8614         k,
8615     ))
8616 }
8617 
8618 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
8619 ///
8620 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi16_epi8&expand=2039)
8621 #[inline]
8622 #[target_feature(enable = "avx512bw,avx512vl")]
8623 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm256_cvtusepi16_epi8(a: __m256i) -> __m128i8624 pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
8625     transmute(vpmovuswb256(
8626         a.as_u16x16(),
8627         _mm_setzero_si128().as_u8x16(),
8628         0b11111111_11111111,
8629     ))
8630 }
8631 
8632 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8633 ///
8634 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
8635 #[inline]
8636 #[target_feature(enable = "avx512bw,avx512vl")]
8637 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i8638 pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
8639     transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k))
8640 }
8641 
8642 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8643 ///
8644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
8645 #[inline]
8646 #[target_feature(enable = "avx512bw,avx512vl")]
8647 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i8648 pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
8649     transmute(vpmovuswb256(
8650         a.as_u16x16(),
8651         _mm_setzero_si128().as_u8x16(),
8652         k,
8653     ))
8654 }
8655 
8656 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
8657 ///
8658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi16_epi8&expand=2036)
8659 #[inline]
8660 #[target_feature(enable = "avx512bw,avx512vl")]
8661 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm_cvtusepi16_epi8(a: __m128i) -> __m128i8662 pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
8663     transmute(vpmovuswb128(
8664         a.as_u16x8(),
8665         _mm_setzero_si128().as_u8x16(),
8666         0b11111111,
8667     ))
8668 }
8669 
8670 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8671 ///
8672 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_epi8&expand=2037)
8673 #[inline]
8674 #[target_feature(enable = "avx512bw,avx512vl")]
8675 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i8676 pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8677     transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k))
8678 }
8679 
8680 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8681 ///
8682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
8683 #[inline]
8684 #[target_feature(enable = "avx512bw,avx512vl")]
8685 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i8686 pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
8687     transmute(vpmovuswb128(
8688         a.as_u16x8(),
8689         _mm_setzero_si128().as_u8x16(),
8690         k,
8691     ))
8692 }
8693 
8694 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
8695 ///
8696 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi8_epi16&expand=1526)
8697 #[inline]
8698 #[target_feature(enable = "avx512bw")]
8699 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm512_cvtepi8_epi16(a: __m256i) -> __m512i8700 pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
8701     let a = a.as_i8x32();
8702     transmute::<i16x32, _>(simd_cast(a))
8703 }
8704 
8705 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8706 ///
8707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi8_epi16&expand=1527)
8708 #[inline]
8709 #[target_feature(enable = "avx512bw")]
8710 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i8711 pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
8712     let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
8713     transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
8714 }
8715 
8716 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8717 ///
8718 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
8719 #[inline]
8720 #[target_feature(enable = "avx512bw")]
8721 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i8722 pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
8723     let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
8724     transmute(simd_select_bitmask(
8725         k,
8726         convert,
8727         _mm512_setzero_si512().as_i16x32(),
8728     ))
8729 }
8730 
8731 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8732 ///
8733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi8_epi16&expand=1524)
8734 #[inline]
8735 #[target_feature(enable = "avx512bw,avx512vl")]
8736 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i8737 pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8738     let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
8739     transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
8740 }
8741 
8742 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8743 ///
8744 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
8745 #[inline]
8746 #[target_feature(enable = "avx512bw,avx512vl")]
8747 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i8748 pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
8749     let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
8750     transmute(simd_select_bitmask(
8751         k,
8752         convert,
8753         _mm256_setzero_si256().as_i16x16(),
8754     ))
8755 }
8756 
8757 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8758 ///
8759 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi8_epi16&expand=1521)
8760 #[inline]
8761 #[target_feature(enable = "avx512bw,avx512vl")]
8762 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i8763 pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8764     let convert = _mm_cvtepi8_epi16(a).as_i16x8();
8765     transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
8766 }
8767 
8768 /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8769 ///
8770 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi8_epi16&expand=1522)
8771 #[inline]
8772 #[target_feature(enable = "avx512bw,avx512vl")]
8773 #[cfg_attr(test, assert_instr(vpmovsxbw))]
_mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i8774 pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
8775     let convert = _mm_cvtepi8_epi16(a).as_i16x8();
8776     transmute(simd_select_bitmask(
8777         k,
8778         convert,
8779         _mm_setzero_si128().as_i16x8(),
8780     ))
8781 }
8782 
8783 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
8784 ///
8785 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi16&expand=1612)
8786 #[inline]
8787 #[target_feature(enable = "avx512bw")]
8788 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm512_cvtepu8_epi16(a: __m256i) -> __m512i8789 pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
8790     let a = a.as_u8x32();
8791     transmute::<i16x32, _>(simd_cast(a))
8792 }
8793 
8794 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8795 ///
8796 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu8_epi16&expand=1613)
8797 #[inline]
8798 #[target_feature(enable = "avx512bw")]
8799 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i8800 pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
8801     let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
8802     transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
8803 }
8804 
8805 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8806 ///
8807 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
8808 #[inline]
8809 #[target_feature(enable = "avx512bw")]
8810 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i8811 pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
8812     let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
8813     transmute(simd_select_bitmask(
8814         k,
8815         convert,
8816         _mm512_setzero_si512().as_i16x32(),
8817     ))
8818 }
8819 
8820 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8821 ///
8822 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi16&expand=1610)
8823 #[inline]
8824 #[target_feature(enable = "avx512bw,avx512vl")]
8825 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i8826 pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
8827     let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
8828     transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
8829 }
8830 
8831 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8832 ///
8833 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
8834 #[inline]
8835 #[target_feature(enable = "avx512bw,avx512vl")]
8836 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i8837 pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
8838     let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
8839     transmute(simd_select_bitmask(
8840         k,
8841         convert,
8842         _mm256_setzero_si256().as_i16x16(),
8843     ))
8844 }
8845 
8846 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8847 ///
8848 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi16&expand=1607)
8849 #[inline]
8850 #[target_feature(enable = "avx512bw,avx512vl")]
8851 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i8852 pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8853     let convert = _mm_cvtepu8_epi16(a).as_i16x8();
8854     transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
8855 }
8856 
8857 /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8858 ///
8859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi16&expand=1608)
8860 #[inline]
8861 #[target_feature(enable = "avx512bw,avx512vl")]
8862 #[cfg_attr(test, assert_instr(vpmovzxbw))]
_mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i8863 pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
8864     let convert = _mm_cvtepu8_epi16(a).as_i16x8();
8865     transmute(simd_select_bitmask(
8866         k,
8867         convert,
8868         _mm_setzero_si128().as_i16x8(),
8869     ))
8870 }
8871 
8872 /// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
8873 ///
8874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bslli_epi128&expand=591)
8875 #[inline]
8876 #[target_feature(enable = "avx512bw")]
8877 #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
8878 #[rustc_legacy_const_generics(1)]
_mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i8879 pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
8880     static_assert_imm8!(IMM8);
8881     const fn mask(shift: i32, i: u32) -> u32 {
8882         let shift = shift as u32 & 0xff;
8883         if shift > 15 || i % 16 < shift {
8884             0
8885         } else {
8886             64 + (i - shift)
8887         }
8888     }
8889     let a = a.as_i8x64();
8890     let zero = _mm512_setzero_si512().as_i8x64();
8891     let r: i8x64 = simd_shuffle64!(
8892         zero,
8893         a,
8894         <const IMM8: i32> [
8895             mask(IMM8, 0),
8896             mask(IMM8, 1),
8897             mask(IMM8, 2),
8898             mask(IMM8, 3),
8899             mask(IMM8, 4),
8900             mask(IMM8, 5),
8901             mask(IMM8, 6),
8902             mask(IMM8, 7),
8903             mask(IMM8, 8),
8904             mask(IMM8, 9),
8905             mask(IMM8, 10),
8906             mask(IMM8, 11),
8907             mask(IMM8, 12),
8908             mask(IMM8, 13),
8909             mask(IMM8, 14),
8910             mask(IMM8, 15),
8911             mask(IMM8, 16),
8912             mask(IMM8, 17),
8913             mask(IMM8, 18),
8914             mask(IMM8, 19),
8915             mask(IMM8, 20),
8916             mask(IMM8, 21),
8917             mask(IMM8, 22),
8918             mask(IMM8, 23),
8919             mask(IMM8, 24),
8920             mask(IMM8, 25),
8921             mask(IMM8, 26),
8922             mask(IMM8, 27),
8923             mask(IMM8, 28),
8924             mask(IMM8, 29),
8925             mask(IMM8, 30),
8926             mask(IMM8, 31),
8927             mask(IMM8, 32),
8928             mask(IMM8, 33),
8929             mask(IMM8, 34),
8930             mask(IMM8, 35),
8931             mask(IMM8, 36),
8932             mask(IMM8, 37),
8933             mask(IMM8, 38),
8934             mask(IMM8, 39),
8935             mask(IMM8, 40),
8936             mask(IMM8, 41),
8937             mask(IMM8, 42),
8938             mask(IMM8, 43),
8939             mask(IMM8, 44),
8940             mask(IMM8, 45),
8941             mask(IMM8, 46),
8942             mask(IMM8, 47),
8943             mask(IMM8, 48),
8944             mask(IMM8, 49),
8945             mask(IMM8, 50),
8946             mask(IMM8, 51),
8947             mask(IMM8, 52),
8948             mask(IMM8, 53),
8949             mask(IMM8, 54),
8950             mask(IMM8, 55),
8951             mask(IMM8, 56),
8952             mask(IMM8, 57),
8953             mask(IMM8, 58),
8954             mask(IMM8, 59),
8955             mask(IMM8, 60),
8956             mask(IMM8, 61),
8957             mask(IMM8, 62),
8958             mask(IMM8, 63),
8959         ],
8960     );
8961     transmute(r)
8962 }
8963 
8964 /// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
8965 ///
8966 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bsrli_epi128&expand=594)
8967 #[inline]
8968 #[target_feature(enable = "avx512bw")]
8969 #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
8970 #[rustc_legacy_const_generics(1)]
_mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i8971 pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
8972     static_assert_imm8!(IMM8);
8973     let a = a.as_i8x64();
8974     let zero = _mm512_setzero_si512().as_i8x64();
8975     let r: i8x64 = match IMM8 % 16 {
8976         0 => simd_shuffle64!(
8977             a,
8978             zero,
8979             [
8980                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
8981                 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
8982                 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
8983             ],
8984         ),
8985         1 => simd_shuffle64!(
8986             a,
8987             zero,
8988             [
8989                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
8990                 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
8991                 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
8992             ],
8993         ),
8994         2 => simd_shuffle64!(
8995             a,
8996             zero,
8997             [
8998                 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
8999                 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9000                 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9001             ],
9002         ),
9003         3 => simd_shuffle64!(
9004             a,
9005             zero,
9006             [
9007                 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
9008                 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9009                 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9010                 114,
9011             ],
9012         ),
9013         4 => simd_shuffle64!(
9014             a,
9015             zero,
9016             [
9017                 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
9018                 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
9019                 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9020                 115,
9021             ],
9022         ),
9023         5 => simd_shuffle64!(
9024             a,
9025             zero,
9026             [
9027                 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
9028                 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
9029                 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9030                 115, 116,
9031             ],
9032         ),
9033         6 => simd_shuffle64!(
9034             a,
9035             zero,
9036             [
9037                 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
9038                 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9039                 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9040                 116, 117,
9041             ],
9042         ),
9043         7 => simd_shuffle64!(
9044             a,
9045             zero,
9046             [
9047                 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
9048                 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9049                 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9050                 116, 117, 118,
9051             ],
9052         ),
9053         8 => simd_shuffle64!(
9054             a,
9055             zero,
9056             [
9057                 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
9058                 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
9059                 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9060                 116, 117, 118, 119,
9061             ],
9062         ),
9063         9 => simd_shuffle64!(
9064             a,
9065             zero,
9066             [
9067                 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
9068                 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
9069                 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9070                 117, 118, 119, 120,
9071             ],
9072         ),
9073         10 => simd_shuffle64!(
9074             a,
9075             zero,
9076             [
9077                 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
9078                 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9079                 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9080                 118, 119, 120, 121,
9081             ],
9082         ),
9083         11 => simd_shuffle64!(
9084             a,
9085             zero,
9086             [
9087                 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
9088                 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9089                 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9090                 117, 118, 119, 120, 121, 122,
9091             ],
9092         ),
9093         12 => simd_shuffle64!(
9094             a,
9095             zero,
9096             [
9097                 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
9098                 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
9099                 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9100                 118, 119, 120, 121, 122, 123,
9101             ],
9102         ),
9103         13 => simd_shuffle64!(
9104             a,
9105             zero,
9106             [
9107                 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
9108                 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
9109                 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
9110                 119, 120, 121, 122, 123, 124,
9111             ],
9112         ),
9113         14 => simd_shuffle64!(
9114             a,
9115             zero,
9116             [
9117                 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
9118                 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
9119                 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
9120                 120, 121, 122, 123, 124, 125,
9121             ],
9122         ),
9123         15 => simd_shuffle64!(
9124             a,
9125             zero,
9126             [
9127                 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
9128                 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
9129                 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
9130                 121, 122, 123, 124, 125, 126,
9131             ],
9132         ),
9133         _ => zero,
9134     };
9135     transmute(r)
9136 }
9137 
9138 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
9139 ///
9140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi8&expand=263)
9141 #[inline]
9142 #[target_feature(enable = "avx512bw")]
9143 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
9144 #[rustc_legacy_const_generics(2)]
_mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i9145 pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
9146     // If palignr is shifting the pair of vectors more than the size of two
9147     // lanes, emit zero.
9148     if IMM8 > 32 {
9149         return _mm512_set1_epi8(0);
9150     }
9151     // If palignr is shifting the pair of input vectors more than one lane,
9152     // but less than two lanes, convert to shifting in zeroes.
9153     let (a, b) = if IMM8 > 16 {
9154         (_mm512_set1_epi8(0), a)
9155     } else {
9156         (a, b)
9157     };
9158     let a = a.as_i8x64();
9159     let b = b.as_i8x64();
9160 
9161     let r: i8x64 = match IMM8 % 16 {
9162         0 => simd_shuffle64!(
9163             b,
9164             a,
9165             [
9166                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
9167                 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
9168                 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
9169             ],
9170         ),
9171         1 => simd_shuffle64!(
9172             b,
9173             a,
9174             [
9175                 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
9176                 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
9177                 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
9178             ],
9179         ),
9180         2 => simd_shuffle64!(
9181             b,
9182             a,
9183             [
9184                 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
9185                 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9186                 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9187             ],
9188         ),
9189         3 => simd_shuffle64!(
9190             b,
9191             a,
9192             [
9193                 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
9194                 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9195                 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9196                 114,
9197             ],
9198         ),
9199         4 => simd_shuffle64!(
9200             b,
9201             a,
9202             [
9203                 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
9204                 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
9205                 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9206                 115,
9207             ],
9208         ),
9209         5 => simd_shuffle64!(
9210             b,
9211             a,
9212             [
9213                 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
9214                 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
9215                 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9216                 115, 116,
9217             ],
9218         ),
9219         6 => simd_shuffle64!(
9220             b,
9221             a,
9222             [
9223                 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
9224                 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9225                 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9226                 116, 117,
9227             ],
9228         ),
9229         7 => simd_shuffle64!(
9230             b,
9231             a,
9232             [
9233                 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
9234                 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9235                 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9236                 116, 117, 118,
9237             ],
9238         ),
9239         8 => simd_shuffle64!(
9240             b,
9241             a,
9242             [
9243                 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
9244                 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
9245                 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9246                 116, 117, 118, 119,
9247             ],
9248         ),
9249         9 => simd_shuffle64!(
9250             b,
9251             a,
9252             [
9253                 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
9254                 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
9255                 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9256                 117, 118, 119, 120,
9257             ],
9258         ),
9259         10 => simd_shuffle64!(
9260             b,
9261             a,
9262             [
9263                 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
9264                 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9265                 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9266                 118, 119, 120, 121,
9267             ],
9268         ),
9269         11 => simd_shuffle64!(
9270             b,
9271             a,
9272             [
9273                 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
9274                 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9275                 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9276                 117, 118, 119, 120, 121, 122,
9277             ],
9278         ),
9279         12 => simd_shuffle64!(
9280             b,
9281             a,
9282             [
9283                 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
9284                 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
9285                 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9286                 118, 119, 120, 121, 122, 123,
9287             ],
9288         ),
9289         13 => simd_shuffle64!(
9290             b,
9291             a,
9292             [
9293                 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
9294                 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
9295                 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
9296                 119, 120, 121, 122, 123, 124,
9297             ],
9298         ),
9299         14 => simd_shuffle64!(
9300             b,
9301             a,
9302             [
9303                 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
9304                 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
9305                 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
9306                 120, 121, 122, 123, 124, 125,
9307             ],
9308         ),
9309         15 => simd_shuffle64!(
9310             b,
9311             a,
9312             [
9313                 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
9314                 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
9315                 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
9316                 121, 122, 123, 124, 125, 126,
9317             ],
9318         ),
9319         _ => b,
9320     };
9321     transmute(r)
9322 }
9323 
9324 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9325 ///
9326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi8&expand=264)
9327 #[inline]
9328 #[target_feature(enable = "avx512bw")]
9329 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
9330 #[rustc_legacy_const_generics(4)]
_mm512_mask_alignr_epi8<const IMM8: i32>( src: __m512i, k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i9331 pub unsafe fn _mm512_mask_alignr_epi8<const IMM8: i32>(
9332     src: __m512i,
9333     k: __mmask64,
9334     a: __m512i,
9335     b: __m512i,
9336 ) -> __m512i {
9337     static_assert_imm8!(IMM8);
9338     let r = _mm512_alignr_epi8::<IMM8>(a, b);
9339     transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
9340 }
9341 
9342 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9343 ///
9344 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi8&expand=265)
9345 #[inline]
9346 #[target_feature(enable = "avx512bw")]
9347 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
9348 #[rustc_legacy_const_generics(3)]
_mm512_maskz_alignr_epi8<const IMM8: i32>( k: __mmask64, a: __m512i, b: __m512i, ) -> __m512i9349 pub unsafe fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
9350     k: __mmask64,
9351     a: __m512i,
9352     b: __m512i,
9353 ) -> __m512i {
9354     static_assert_imm8!(IMM8);
9355     let r = _mm512_alignr_epi8::<IMM8>(a, b);
9356     let zero = _mm512_setzero_si512().as_i8x64();
9357     transmute(simd_select_bitmask(k, r.as_i8x64(), zero))
9358 }
9359 
9360 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9361 ///
9362 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_alignr_epi8&expand=261)
9363 #[inline]
9364 #[target_feature(enable = "avx512bw,avx512vl")]
9365 #[rustc_legacy_const_generics(4)]
9366 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
_mm256_mask_alignr_epi8<const IMM8: i32>( src: __m256i, k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i9367 pub unsafe fn _mm256_mask_alignr_epi8<const IMM8: i32>(
9368     src: __m256i,
9369     k: __mmask32,
9370     a: __m256i,
9371     b: __m256i,
9372 ) -> __m256i {
9373     static_assert_imm8!(IMM8);
9374     let r = _mm256_alignr_epi8::<IMM8>(a, b);
9375     transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
9376 }
9377 
9378 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9379 ///
9380 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_alignr_epi8&expand=262)
9381 #[inline]
9382 #[target_feature(enable = "avx512bw,avx512vl")]
9383 #[rustc_legacy_const_generics(3)]
9384 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
_mm256_maskz_alignr_epi8<const IMM8: i32>( k: __mmask32, a: __m256i, b: __m256i, ) -> __m256i9385 pub unsafe fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
9386     k: __mmask32,
9387     a: __m256i,
9388     b: __m256i,
9389 ) -> __m256i {
9390     static_assert_imm8!(IMM8);
9391     let r = _mm256_alignr_epi8::<IMM8>(a, b);
9392     transmute(simd_select_bitmask(
9393         k,
9394         r.as_i8x32(),
9395         _mm256_setzero_si256().as_i8x32(),
9396     ))
9397 }
9398 
9399 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9400 ///
9401 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_alignr_epi8&expand=258)
9402 #[inline]
9403 #[target_feature(enable = "avx512bw,avx512vl")]
9404 #[rustc_legacy_const_generics(4)]
9405 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
_mm_mask_alignr_epi8<const IMM8: i32>( src: __m128i, k: __mmask16, a: __m128i, b: __m128i, ) -> __m128i9406 pub unsafe fn _mm_mask_alignr_epi8<const IMM8: i32>(
9407     src: __m128i,
9408     k: __mmask16,
9409     a: __m128i,
9410     b: __m128i,
9411 ) -> __m128i {
9412     static_assert_imm8!(IMM8);
9413     let r = _mm_alignr_epi8::<IMM8>(a, b);
9414     transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
9415 }
9416 
9417 /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9418 ///
9419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_alignr_epi8&expand=259)
9420 #[inline]
9421 #[target_feature(enable = "avx512bw,avx512vl")]
9422 #[rustc_legacy_const_generics(3)]
9423 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
_mm_maskz_alignr_epi8<const IMM8: i32>( k: __mmask16, a: __m128i, b: __m128i, ) -> __m128i9424 pub unsafe fn _mm_maskz_alignr_epi8<const IMM8: i32>(
9425     k: __mmask16,
9426     a: __m128i,
9427     b: __m128i,
9428 ) -> __m128i {
9429     static_assert_imm8!(IMM8);
9430     let r = _mm_alignr_epi8::<IMM8>(a, b);
9431     let zero = _mm_setzero_si128().as_i8x16();
9432     transmute(simd_select_bitmask(k, r.as_i8x16(), zero))
9433 }
9434 
9435 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9436 ///
9437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
9438 #[inline]
9439 #[target_feature(enable = "avx512bw")]
9440 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i)9441 pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9442     vpmovswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9443 }
9444 
9445 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9446 ///
9447 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
9448 #[inline]
9449 #[target_feature(enable = "avx512bw,avx512vl")]
9450 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i)9451 pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9452     vpmovswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9453 }
9454 
9455 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9456 ///
9457 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
9458 #[inline]
9459 #[target_feature(enable = "avx512bw,avx512vl")]
9460 #[cfg_attr(test, assert_instr(vpmovswb))]
_mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i)9461 pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9462     vpmovswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9463 }
9464 
9465 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9466 ///
9467 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
9468 #[inline]
9469 #[target_feature(enable = "avx512bw")]
9470 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i)9471 pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9472     vpmovwbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9473 }
9474 
9475 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9476 ///
9477 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
9478 #[inline]
9479 #[target_feature(enable = "avx512bw,avx512vl")]
9480 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i)9481 pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9482     vpmovwbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9483 }
9484 
9485 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9486 ///
9487 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
9488 #[inline]
9489 #[target_feature(enable = "avx512bw,avx512vl")]
9490 #[cfg_attr(test, assert_instr(vpmovwb))]
_mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i)9491 pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9492     vpmovwbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9493 }
9494 
9495 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9496 ///
9497 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
9498 #[inline]
9499 #[target_feature(enable = "avx512bw")]
9500 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i)9501 pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9502     vpmovuswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9503 }
9504 
9505 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9506 ///
9507 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
9508 #[inline]
9509 #[target_feature(enable = "avx512bw,avx512vl")]
9510 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i)9511 pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9512     vpmovuswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9513 }
9514 
9515 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9516 ///
9517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
9518 #[inline]
9519 #[target_feature(enable = "avx512bw,avx512vl")]
9520 #[cfg_attr(test, assert_instr(vpmovuswb))]
_mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i)9521 pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9522     vpmovuswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9523 }
9524 
9525 #[allow(improper_ctypes)]
9526 extern "C" {
9527     #[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x329528     fn vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
9529     #[link_name = "llvm.x86.avx512.mask.paddus.w.256"]
vpaddusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x169530     fn vpaddusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16;
9531     #[link_name = "llvm.x86.avx512.mask.paddus.w.128"]
vpaddusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x89532     fn vpaddusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8;
9533 
9534     #[link_name = "llvm.x86.avx512.mask.paddus.b.512"]
vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x649535     fn vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
9536     #[link_name = "llvm.x86.avx512.mask.paddus.b.256"]
vpaddusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x329537     fn vpaddusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32;
9538     #[link_name = "llvm.x86.avx512.mask.paddus.b.128"]
vpaddusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x169539     fn vpaddusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16;
9540 
9541     #[link_name = "llvm.x86.avx512.mask.padds.w.512"]
vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x329542     fn vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
9543     #[link_name = "llvm.x86.avx512.mask.padds.w.256"]
vpaddsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x169544     fn vpaddsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16;
9545     #[link_name = "llvm.x86.avx512.mask.padds.w.128"]
vpaddsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x89546     fn vpaddsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8;
9547 
9548     #[link_name = "llvm.x86.avx512.mask.padds.b.512"]
vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x649549     fn vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
9550     #[link_name = "llvm.x86.avx512.mask.padds.b.256"]
vpaddsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x329551     fn vpaddsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32;
9552     #[link_name = "llvm.x86.avx512.mask.padds.b.128"]
vpaddsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x169553     fn vpaddsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16;
9554 
9555     #[link_name = "llvm.x86.avx512.mask.psubus.w.512"]
vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x329556     fn vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
9557     #[link_name = "llvm.x86.avx512.mask.psubus.w.256"]
vpsubusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x169558     fn vpsubusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16;
9559     #[link_name = "llvm.x86.avx512.mask.psubus.w.128"]
vpsubusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x89560     fn vpsubusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8;
9561 
9562     #[link_name = "llvm.x86.avx512.mask.psubus.b.512"]
vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x649563     fn vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
9564     #[link_name = "llvm.x86.avx512.mask.psubus.b.256"]
vpsubusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x329565     fn vpsubusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32;
9566     #[link_name = "llvm.x86.avx512.mask.psubus.b.128"]
vpsubusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x169567     fn vpsubusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16;
9568 
9569     #[link_name = "llvm.x86.avx512.mask.psubs.w.512"]
vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x329570     fn vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
9571     #[link_name = "llvm.x86.avx512.mask.psubs.w.256"]
vpsubsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x169572     fn vpsubsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16;
9573     #[link_name = "llvm.x86.avx512.mask.psubs.w.128"]
vpsubsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x89574     fn vpsubsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8;
9575 
9576     #[link_name = "llvm.x86.avx512.mask.psubs.b.512"]
vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x649577     fn vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
9578     #[link_name = "llvm.x86.avx512.mask.psubs.b.256"]
vpsubsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x329579     fn vpsubsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32;
9580     #[link_name = "llvm.x86.avx512.mask.psubs.b.128"]
vpsubsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x169581     fn vpsubsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16;
9582 
9583     #[link_name = "llvm.x86.avx512.pmulhu.w.512"]
vpmulhuw(a: u16x32, b: u16x32) -> u16x329584     fn vpmulhuw(a: u16x32, b: u16x32) -> u16x32;
9585     #[link_name = "llvm.x86.avx512.pmulh.w.512"]
vpmulhw(a: i16x32, b: i16x32) -> i16x329586     fn vpmulhw(a: i16x32, b: i16x32) -> i16x32;
9587     #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
vpmulhrsw(a: i16x32, b: i16x32) -> i16x329588     fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
9589 
9590     #[link_name = "llvm.x86.avx512.mask.ucmp.w.512"]
vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u329591     fn vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u32;
9592     #[link_name = "llvm.x86.avx512.mask.ucmp.w.256"]
vpcmpuw256(a: u16x16, b: u16x16, op: i32, mask: u16) -> u169593     fn vpcmpuw256(a: u16x16, b: u16x16, op: i32, mask: u16) -> u16;
9594     #[link_name = "llvm.x86.avx512.mask.ucmp.w.128"]
vpcmpuw128(a: u16x8, b: u16x8, op: i32, mask: u8) -> u89595     fn vpcmpuw128(a: u16x8, b: u16x8, op: i32, mask: u8) -> u8;
9596 
9597     #[link_name = "llvm.x86.avx512.mask.ucmp.b.512"]
vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u649598     fn vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u64;
9599     #[link_name = "llvm.x86.avx512.mask.ucmp.b.256"]
vpcmpub256(a: u8x32, b: u8x32, op: i32, mask: u32) -> u329600     fn vpcmpub256(a: u8x32, b: u8x32, op: i32, mask: u32) -> u32;
9601     #[link_name = "llvm.x86.avx512.mask.ucmp.b.128"]
vpcmpub128(a: u8x16, b: u8x16, op: i32, mask: u16) -> u169602     fn vpcmpub128(a: u8x16, b: u8x16, op: i32, mask: u16) -> u16;
9603 
9604     #[link_name = "llvm.x86.avx512.mask.cmp.w.512"]
vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u329605     fn vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u32;
9606     #[link_name = "llvm.x86.avx512.mask.cmp.w.256"]
vpcmpw256(a: i16x16, b: i16x16, op: i32, mask: u16) -> u169607     fn vpcmpw256(a: i16x16, b: i16x16, op: i32, mask: u16) -> u16;
9608     #[link_name = "llvm.x86.avx512.mask.cmp.w.128"]
vpcmpw128(a: i16x8, b: i16x8, op: i32, mask: u8) -> u89609     fn vpcmpw128(a: i16x8, b: i16x8, op: i32, mask: u8) -> u8;
9610 
9611     #[link_name = "llvm.x86.avx512.mask.cmp.b.512"]
vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u649612     fn vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u64;
9613     #[link_name = "llvm.x86.avx512.mask.cmp.b.256"]
vpcmpb256(a: i8x32, b: i8x32, op: i32, mask: u32) -> u329614     fn vpcmpb256(a: i8x32, b: i8x32, op: i32, mask: u32) -> u32;
9615     #[link_name = "llvm.x86.avx512.mask.cmp.b.128"]
vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u169616     fn vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u16;
9617 
9618     #[link_name = "llvm.x86.avx512.mask.pmaxu.w.512"]
vpmaxuw(a: u16x32, b: u16x32) -> u16x329619     fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32;
9620     #[link_name = "llvm.x86.avx512.mask.pmaxu.b.512"]
vpmaxub(a: u8x64, b: u8x64) -> u8x649621     fn vpmaxub(a: u8x64, b: u8x64) -> u8x64;
9622     #[link_name = "llvm.x86.avx512.mask.pmaxs.w.512"]
vpmaxsw(a: i16x32, b: i16x32) -> i16x329623     fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32;
9624     #[link_name = "llvm.x86.avx512.mask.pmaxs.b.512"]
vpmaxsb(a: i8x64, b: i8x64) -> i8x649625     fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64;
9626 
9627     #[link_name = "llvm.x86.avx512.mask.pminu.w.512"]
vpminuw(a: u16x32, b: u16x32) -> u16x329628     fn vpminuw(a: u16x32, b: u16x32) -> u16x32;
9629     #[link_name = "llvm.x86.avx512.mask.pminu.b.512"]
vpminub(a: u8x64, b: u8x64) -> u8x649630     fn vpminub(a: u8x64, b: u8x64) -> u8x64;
9631     #[link_name = "llvm.x86.avx512.mask.pmins.w.512"]
vpminsw(a: i16x32, b: i16x32) -> i16x329632     fn vpminsw(a: i16x32, b: i16x32) -> i16x32;
9633     #[link_name = "llvm.x86.avx512.mask.pmins.b.512"]
vpminsb(a: i8x64, b: i8x64) -> i8x649634     fn vpminsb(a: i8x64, b: i8x64) -> i8x64;
9635 
9636     #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
vpmaddwd(a: i16x32, b: i16x32) -> i32x169637     fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
9638     #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
vpmaddubsw(a: i8x64, b: i8x64) -> i16x329639     fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
9640 
9641     #[link_name = "llvm.x86.avx512.packssdw.512"]
vpackssdw(a: i32x16, b: i32x16) -> i16x329642     fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
9643     #[link_name = "llvm.x86.avx512.packsswb.512"]
vpacksswb(a: i16x32, b: i16x32) -> i8x649644     fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
9645     #[link_name = "llvm.x86.avx512.packusdw.512"]
vpackusdw(a: i32x16, b: i32x16) -> u16x329646     fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
9647     #[link_name = "llvm.x86.avx512.packuswb.512"]
vpackuswb(a: i16x32, b: i16x32) -> u8x649648     fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
9649 
9650     #[link_name = "llvm.x86.avx512.pavg.w.512"]
vpavgw(a: u16x32, b: u16x32) -> u16x329651     fn vpavgw(a: u16x32, b: u16x32) -> u16x32;
9652     #[link_name = "llvm.x86.avx512.pavg.b.512"]
vpavgb(a: u8x64, b: u8x64) -> u8x649653     fn vpavgb(a: u8x64, b: u8x64) -> u8x64;
9654 
9655     #[link_name = "llvm.x86.avx512.psll.w.512"]
vpsllw(a: i16x32, count: i16x8) -> i16x329656     fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
9657     #[link_name = "llvm.x86.avx512.pslli.w.512"]
vpslliw(a: i16x32, imm8: u32) -> i16x329658     fn vpslliw(a: i16x32, imm8: u32) -> i16x32;
9659 
9660     #[link_name = "llvm.x86.avx2.pslli.w"]
pslliw256(a: i16x16, imm8: i32) -> i16x169661     fn pslliw256(a: i16x16, imm8: i32) -> i16x16;
9662     #[link_name = "llvm.x86.sse2.pslli.w"]
pslliw128(a: i16x8, imm8: i32) -> i16x89663     fn pslliw128(a: i16x8, imm8: i32) -> i16x8;
9664 
9665     #[link_name = "llvm.x86.avx512.psllv.w.512"]
vpsllvw(a: i16x32, b: i16x32) -> i16x329666     fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
9667     #[link_name = "llvm.x86.avx512.psllv.w.256"]
vpsllvw256(a: i16x16, b: i16x16) -> i16x169668     fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
9669     #[link_name = "llvm.x86.avx512.psllv.w.128"]
vpsllvw128(a: i16x8, b: i16x8) -> i16x89670     fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
9671 
9672     #[link_name = "llvm.x86.avx512.psrl.w.512"]
vpsrlw(a: i16x32, count: i16x8) -> i16x329673     fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
9674     #[link_name = "llvm.x86.avx512.psrli.w.512"]
vpsrliw(a: i16x32, imm8: u32) -> i16x329675     fn vpsrliw(a: i16x32, imm8: u32) -> i16x32;
9676 
9677     #[link_name = "llvm.x86.avx512.psrlv.w.512"]
vpsrlvw(a: i16x32, b: i16x32) -> i16x329678     fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
9679     #[link_name = "llvm.x86.avx512.psrlv.w.256"]
vpsrlvw256(a: i16x16, b: i16x16) -> i16x169680     fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
9681     #[link_name = "llvm.x86.avx512.psrlv.w.128"]
vpsrlvw128(a: i16x8, b: i16x8) -> i16x89682     fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
9683 
9684     #[link_name = "llvm.x86.avx512.psra.w.512"]
vpsraw(a: i16x32, count: i16x8) -> i16x329685     fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
9686     #[link_name = "llvm.x86.avx512.psrai.w.512"]
vpsraiw(a: i16x32, imm8: u32) -> i16x329687     fn vpsraiw(a: i16x32, imm8: u32) -> i16x32;
9688 
9689     #[link_name = "llvm.x86.avx2.psrai.w"]
psraiw256(a: i16x16, imm8: i32) -> i16x169690     fn psraiw256(a: i16x16, imm8: i32) -> i16x16;
9691     #[link_name = "llvm.x86.sse2.psrai.w"]
psraiw128(a: i16x8, imm8: i32) -> i16x89692     fn psraiw128(a: i16x8, imm8: i32) -> i16x8;
9693 
9694     #[link_name = "llvm.x86.avx512.psrav.w.512"]
vpsravw(a: i16x32, count: i16x32) -> i16x329695     fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
9696     #[link_name = "llvm.x86.avx512.psrav.w.256"]
vpsravw256(a: i16x16, count: i16x16) -> i16x169697     fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
9698     #[link_name = "llvm.x86.avx512.psrav.w.128"]
vpsravw128(a: i16x8, count: i16x8) -> i16x89699     fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
9700 
9701     #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x329702     fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
9703     #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x169704     fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
9705     #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x89706     fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
9707 
9708     #[link_name = "llvm.x86.avx512.permvar.hi.512"]
vpermw(a: i16x32, idx: i16x32) -> i16x329709     fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
9710     #[link_name = "llvm.x86.avx512.permvar.hi.256"]
vpermw256(a: i16x16, idx: i16x16) -> i16x169711     fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
9712     #[link_name = "llvm.x86.avx512.permvar.hi.128"]
vpermw128(a: i16x8, idx: i16x8) -> i16x89713     fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
9714 
9715     #[link_name = "llvm.x86.avx512.pshuf.b.512"]
vpshufb(a: i8x64, b: i8x64) -> i8x649716     fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
9717 
9718     #[link_name = "llvm.x86.avx512.psad.bw.512"]
vpsadbw(a: u8x64, b: u8x64) -> u64x89719     fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
9720 
9721     #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x329722     fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
9723     #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x169724     fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
9725     #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x89726     fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
9727 
9728     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x329729     fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
9730     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x169731     fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
9732     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x169733     fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
9734 
9735     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x329736     fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
9737     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x169738     fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
9739     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x169740     fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
9741 
9742     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32)9743     fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9744     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16)9745     fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9746     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8)9747     fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9748 
9749     #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32)9750     fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9751     #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16)9752     fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9753     #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8)9754     fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9755 
9756     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32)9757     fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9758     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16)9759     fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9760     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8)9761     fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9762 }
9763 
9764 #[cfg(test)]
9765 mod tests {
9766 
9767     use stdarch_test::simd_test;
9768 
9769     use crate::core_arch::x86::*;
9770     use crate::hint::black_box;
9771     use crate::mem::{self};
9772 
9773     #[simd_test(enable = "avx512bw")]
test_mm512_abs_epi16()9774     unsafe fn test_mm512_abs_epi16() {
9775         let a = _mm512_set1_epi16(-1);
9776         let r = _mm512_abs_epi16(a);
9777         let e = _mm512_set1_epi16(1);
9778         assert_eq_m512i(r, e);
9779     }
9780 
9781     #[simd_test(enable = "avx512bw")]
test_mm512_mask_abs_epi16()9782     unsafe fn test_mm512_mask_abs_epi16() {
9783         let a = _mm512_set1_epi16(-1);
9784         let r = _mm512_mask_abs_epi16(a, 0, a);
9785         assert_eq_m512i(r, a);
9786         let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
9787         #[rustfmt::skip]
9788         let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
9789                                  -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
9790         assert_eq_m512i(r, e);
9791     }
9792 
9793     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_abs_epi16()9794     unsafe fn test_mm512_maskz_abs_epi16() {
9795         let a = _mm512_set1_epi16(-1);
9796         let r = _mm512_maskz_abs_epi16(0, a);
9797         assert_eq_m512i(r, _mm512_setzero_si512());
9798         let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
9799         #[rustfmt::skip]
9800         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
9801                                   0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
9802         assert_eq_m512i(r, e);
9803     }
9804 
9805     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_abs_epi16()9806     unsafe fn test_mm256_mask_abs_epi16() {
9807         let a = _mm256_set1_epi16(-1);
9808         let r = _mm256_mask_abs_epi16(a, 0, a);
9809         assert_eq_m256i(r, a);
9810         let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
9811         let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
9812         assert_eq_m256i(r, e);
9813     }
9814 
9815     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_abs_epi16()9816     unsafe fn test_mm256_maskz_abs_epi16() {
9817         let a = _mm256_set1_epi16(-1);
9818         let r = _mm256_maskz_abs_epi16(0, a);
9819         assert_eq_m256i(r, _mm256_setzero_si256());
9820         let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
9821         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
9822         assert_eq_m256i(r, e);
9823     }
9824 
9825     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_abs_epi16()9826     unsafe fn test_mm_mask_abs_epi16() {
9827         let a = _mm_set1_epi16(-1);
9828         let r = _mm_mask_abs_epi16(a, 0, a);
9829         assert_eq_m128i(r, a);
9830         let r = _mm_mask_abs_epi16(a, 0b00001111, a);
9831         let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
9832         assert_eq_m128i(r, e);
9833     }
9834 
9835     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_abs_epi16()9836     unsafe fn test_mm_maskz_abs_epi16() {
9837         let a = _mm_set1_epi16(-1);
9838         let r = _mm_maskz_abs_epi16(0, a);
9839         assert_eq_m128i(r, _mm_setzero_si128());
9840         let r = _mm_maskz_abs_epi16(0b00001111, a);
9841         let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
9842         assert_eq_m128i(r, e);
9843     }
9844 
9845     #[simd_test(enable = "avx512bw")]
test_mm512_abs_epi8()9846     unsafe fn test_mm512_abs_epi8() {
9847         let a = _mm512_set1_epi8(-1);
9848         let r = _mm512_abs_epi8(a);
9849         let e = _mm512_set1_epi8(1);
9850         assert_eq_m512i(r, e);
9851     }
9852 
9853     #[simd_test(enable = "avx512bw")]
test_mm512_mask_abs_epi8()9854     unsafe fn test_mm512_mask_abs_epi8() {
9855         let a = _mm512_set1_epi8(-1);
9856         let r = _mm512_mask_abs_epi8(a, 0, a);
9857         assert_eq_m512i(r, a);
9858         let r = _mm512_mask_abs_epi8(
9859             a,
9860             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
9861             a,
9862         );
9863         #[rustfmt::skip]
9864         let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
9865                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
9866                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
9867                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
9868         assert_eq_m512i(r, e);
9869     }
9870 
9871     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_abs_epi8()9872     unsafe fn test_mm512_maskz_abs_epi8() {
9873         let a = _mm512_set1_epi8(-1);
9874         let r = _mm512_maskz_abs_epi8(0, a);
9875         assert_eq_m512i(r, _mm512_setzero_si512());
9876         let r = _mm512_maskz_abs_epi8(
9877             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
9878             a,
9879         );
9880         #[rustfmt::skip]
9881         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
9882                                 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
9883                                 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
9884                                 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
9885         assert_eq_m512i(r, e);
9886     }
9887 
9888     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_abs_epi8()9889     unsafe fn test_mm256_mask_abs_epi8() {
9890         let a = _mm256_set1_epi8(-1);
9891         let r = _mm256_mask_abs_epi8(a, 0, a);
9892         assert_eq_m256i(r, a);
9893         let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
9894         #[rustfmt::skip]
9895         let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
9896                                 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
9897         assert_eq_m256i(r, e);
9898     }
9899 
9900     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_abs_epi8()9901     unsafe fn test_mm256_maskz_abs_epi8() {
9902         let a = _mm256_set1_epi8(-1);
9903         let r = _mm256_maskz_abs_epi8(0, a);
9904         assert_eq_m256i(r, _mm256_setzero_si256());
9905         let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
9906         #[rustfmt::skip]
9907         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
9908                                 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
9909         assert_eq_m256i(r, e);
9910     }
9911 
9912     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_abs_epi8()9913     unsafe fn test_mm_mask_abs_epi8() {
9914         let a = _mm_set1_epi8(-1);
9915         let r = _mm_mask_abs_epi8(a, 0, a);
9916         assert_eq_m128i(r, a);
9917         let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
9918         let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
9919         assert_eq_m128i(r, e);
9920     }
9921 
9922     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_abs_epi8()9923     unsafe fn test_mm_maskz_abs_epi8() {
9924         let a = _mm_set1_epi8(-1);
9925         let r = _mm_maskz_abs_epi8(0, a);
9926         assert_eq_m128i(r, _mm_setzero_si128());
9927         let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
9928         #[rustfmt::skip]
9929         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
9930         assert_eq_m128i(r, e);
9931     }
9932 
9933     #[simd_test(enable = "avx512bw")]
test_mm512_add_epi16()9934     unsafe fn test_mm512_add_epi16() {
9935         let a = _mm512_set1_epi16(1);
9936         let b = _mm512_set1_epi16(2);
9937         let r = _mm512_add_epi16(a, b);
9938         let e = _mm512_set1_epi16(3);
9939         assert_eq_m512i(r, e);
9940     }
9941 
9942     #[simd_test(enable = "avx512bw")]
test_mm512_mask_add_epi16()9943     unsafe fn test_mm512_mask_add_epi16() {
9944         let a = _mm512_set1_epi16(1);
9945         let b = _mm512_set1_epi16(2);
9946         let r = _mm512_mask_add_epi16(a, 0, a, b);
9947         assert_eq_m512i(r, a);
9948         let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
9949         #[rustfmt::skip]
9950         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
9951                                  1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
9952         assert_eq_m512i(r, e);
9953     }
9954 
9955     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_add_epi16()9956     unsafe fn test_mm512_maskz_add_epi16() {
9957         let a = _mm512_set1_epi16(1);
9958         let b = _mm512_set1_epi16(2);
9959         let r = _mm512_maskz_add_epi16(0, a, b);
9960         assert_eq_m512i(r, _mm512_setzero_si512());
9961         let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
9962         #[rustfmt::skip]
9963         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
9964                                  0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
9965         assert_eq_m512i(r, e);
9966     }
9967 
9968     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_add_epi16()9969     unsafe fn test_mm256_mask_add_epi16() {
9970         let a = _mm256_set1_epi16(1);
9971         let b = _mm256_set1_epi16(2);
9972         let r = _mm256_mask_add_epi16(a, 0, a, b);
9973         assert_eq_m256i(r, a);
9974         let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
9975         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
9976         assert_eq_m256i(r, e);
9977     }
9978 
9979     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_add_epi16()9980     unsafe fn test_mm256_maskz_add_epi16() {
9981         let a = _mm256_set1_epi16(1);
9982         let b = _mm256_set1_epi16(2);
9983         let r = _mm256_maskz_add_epi16(0, a, b);
9984         assert_eq_m256i(r, _mm256_setzero_si256());
9985         let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
9986         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
9987         assert_eq_m256i(r, e);
9988     }
9989 
9990     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_add_epi16()9991     unsafe fn test_mm_mask_add_epi16() {
9992         let a = _mm_set1_epi16(1);
9993         let b = _mm_set1_epi16(2);
9994         let r = _mm_mask_add_epi16(a, 0, a, b);
9995         assert_eq_m128i(r, a);
9996         let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
9997         let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
9998         assert_eq_m128i(r, e);
9999     }
10000 
10001     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_add_epi16()10002     unsafe fn test_mm_maskz_add_epi16() {
10003         let a = _mm_set1_epi16(1);
10004         let b = _mm_set1_epi16(2);
10005         let r = _mm_maskz_add_epi16(0, a, b);
10006         assert_eq_m128i(r, _mm_setzero_si128());
10007         let r = _mm_maskz_add_epi16(0b00001111, a, b);
10008         let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
10009         assert_eq_m128i(r, e);
10010     }
10011 
10012     #[simd_test(enable = "avx512bw")]
test_mm512_add_epi8()10013     unsafe fn test_mm512_add_epi8() {
10014         let a = _mm512_set1_epi8(1);
10015         let b = _mm512_set1_epi8(2);
10016         let r = _mm512_add_epi8(a, b);
10017         let e = _mm512_set1_epi8(3);
10018         assert_eq_m512i(r, e);
10019     }
10020 
10021     #[simd_test(enable = "avx512bw")]
test_mm512_mask_add_epi8()10022     unsafe fn test_mm512_mask_add_epi8() {
10023         let a = _mm512_set1_epi8(1);
10024         let b = _mm512_set1_epi8(2);
10025         let r = _mm512_mask_add_epi8(a, 0, a, b);
10026         assert_eq_m512i(r, a);
10027         let r = _mm512_mask_add_epi8(
10028             a,
10029             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10030             a,
10031             b,
10032         );
10033         #[rustfmt::skip]
10034         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10035                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10036                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10037                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10038         assert_eq_m512i(r, e);
10039     }
10040 
10041     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_add_epi8()10042     unsafe fn test_mm512_maskz_add_epi8() {
10043         let a = _mm512_set1_epi8(1);
10044         let b = _mm512_set1_epi8(2);
10045         let r = _mm512_maskz_add_epi8(0, a, b);
10046         assert_eq_m512i(r, _mm512_setzero_si512());
10047         let r = _mm512_maskz_add_epi8(
10048             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10049             a,
10050             b,
10051         );
10052         #[rustfmt::skip]
10053         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10054                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10055                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10056                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10057         assert_eq_m512i(r, e);
10058     }
10059 
10060     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_add_epi8()10061     unsafe fn test_mm256_mask_add_epi8() {
10062         let a = _mm256_set1_epi8(1);
10063         let b = _mm256_set1_epi8(2);
10064         let r = _mm256_mask_add_epi8(a, 0, a, b);
10065         assert_eq_m256i(r, a);
10066         let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
10067         #[rustfmt::skip]
10068         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10069                                 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10070         assert_eq_m256i(r, e);
10071     }
10072 
10073     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_add_epi8()10074     unsafe fn test_mm256_maskz_add_epi8() {
10075         let a = _mm256_set1_epi8(1);
10076         let b = _mm256_set1_epi8(2);
10077         let r = _mm256_maskz_add_epi8(0, a, b);
10078         assert_eq_m256i(r, _mm256_setzero_si256());
10079         let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
10080         #[rustfmt::skip]
10081         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10082                                 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10083         assert_eq_m256i(r, e);
10084     }
10085 
10086     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_add_epi8()10087     unsafe fn test_mm_mask_add_epi8() {
10088         let a = _mm_set1_epi8(1);
10089         let b = _mm_set1_epi8(2);
10090         let r = _mm_mask_add_epi8(a, 0, a, b);
10091         assert_eq_m128i(r, a);
10092         let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
10093         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10094         assert_eq_m128i(r, e);
10095     }
10096 
10097     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_add_epi8()10098     unsafe fn test_mm_maskz_add_epi8() {
10099         let a = _mm_set1_epi8(1);
10100         let b = _mm_set1_epi8(2);
10101         let r = _mm_maskz_add_epi8(0, a, b);
10102         assert_eq_m128i(r, _mm_setzero_si128());
10103         let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
10104         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10105         assert_eq_m128i(r, e);
10106     }
10107 
10108     #[simd_test(enable = "avx512bw")]
test_mm512_adds_epu16()10109     unsafe fn test_mm512_adds_epu16() {
10110         let a = _mm512_set1_epi16(1);
10111         let b = _mm512_set1_epi16(u16::MAX as i16);
10112         let r = _mm512_adds_epu16(a, b);
10113         let e = _mm512_set1_epi16(u16::MAX as i16);
10114         assert_eq_m512i(r, e);
10115     }
10116 
10117     #[simd_test(enable = "avx512bw")]
test_mm512_mask_adds_epu16()10118     unsafe fn test_mm512_mask_adds_epu16() {
10119         let a = _mm512_set1_epi16(1);
10120         let b = _mm512_set1_epi16(u16::MAX as i16);
10121         let r = _mm512_mask_adds_epu16(a, 0, a, b);
10122         assert_eq_m512i(r, a);
10123         let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
10124         #[rustfmt::skip]
10125         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10126                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10127         assert_eq_m512i(r, e);
10128     }
10129 
10130     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_adds_epu16()10131     unsafe fn test_mm512_maskz_adds_epu16() {
10132         let a = _mm512_set1_epi16(1);
10133         let b = _mm512_set1_epi16(u16::MAX as i16);
10134         let r = _mm512_maskz_adds_epu16(0, a, b);
10135         assert_eq_m512i(r, _mm512_setzero_si512());
10136         let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
10137         #[rustfmt::skip]
10138         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10139                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10140         assert_eq_m512i(r, e);
10141     }
10142 
10143     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_adds_epu16()10144     unsafe fn test_mm256_mask_adds_epu16() {
10145         let a = _mm256_set1_epi16(1);
10146         let b = _mm256_set1_epi16(u16::MAX as i16);
10147         let r = _mm256_mask_adds_epu16(a, 0, a, b);
10148         assert_eq_m256i(r, a);
10149         let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
10150         #[rustfmt::skip]
10151         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10152         assert_eq_m256i(r, e);
10153     }
10154 
10155     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_adds_epu16()10156     unsafe fn test_mm256_maskz_adds_epu16() {
10157         let a = _mm256_set1_epi16(1);
10158         let b = _mm256_set1_epi16(u16::MAX as i16);
10159         let r = _mm256_maskz_adds_epu16(0, a, b);
10160         assert_eq_m256i(r, _mm256_setzero_si256());
10161         let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
10162         #[rustfmt::skip]
10163         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10164         assert_eq_m256i(r, e);
10165     }
10166 
10167     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_adds_epu16()10168     unsafe fn test_mm_mask_adds_epu16() {
10169         let a = _mm_set1_epi16(1);
10170         let b = _mm_set1_epi16(u16::MAX as i16);
10171         let r = _mm_mask_adds_epu16(a, 0, a, b);
10172         assert_eq_m128i(r, a);
10173         let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
10174         #[rustfmt::skip]
10175         let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10176         assert_eq_m128i(r, e);
10177     }
10178 
10179     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_adds_epu16()10180     unsafe fn test_mm_maskz_adds_epu16() {
10181         let a = _mm_set1_epi16(1);
10182         let b = _mm_set1_epi16(u16::MAX as i16);
10183         let r = _mm_maskz_adds_epu16(0, a, b);
10184         assert_eq_m128i(r, _mm_setzero_si128());
10185         let r = _mm_maskz_adds_epu16(0b00001111, a, b);
10186         #[rustfmt::skip]
10187         let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10188         assert_eq_m128i(r, e);
10189     }
10190 
10191     #[simd_test(enable = "avx512bw")]
test_mm512_adds_epu8()10192     unsafe fn test_mm512_adds_epu8() {
10193         let a = _mm512_set1_epi8(1);
10194         let b = _mm512_set1_epi8(u8::MAX as i8);
10195         let r = _mm512_adds_epu8(a, b);
10196         let e = _mm512_set1_epi8(u8::MAX as i8);
10197         assert_eq_m512i(r, e);
10198     }
10199 
10200     #[simd_test(enable = "avx512bw")]
test_mm512_mask_adds_epu8()10201     unsafe fn test_mm512_mask_adds_epu8() {
10202         let a = _mm512_set1_epi8(1);
10203         let b = _mm512_set1_epi8(u8::MAX as i8);
10204         let r = _mm512_mask_adds_epu8(a, 0, a, b);
10205         assert_eq_m512i(r, a);
10206         let r = _mm512_mask_adds_epu8(
10207             a,
10208             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10209             a,
10210             b,
10211         );
10212         #[rustfmt::skip]
10213         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10214                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10215                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10216                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10217         assert_eq_m512i(r, e);
10218     }
10219 
10220     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_adds_epu8()10221     unsafe fn test_mm512_maskz_adds_epu8() {
10222         let a = _mm512_set1_epi8(1);
10223         let b = _mm512_set1_epi8(u8::MAX as i8);
10224         let r = _mm512_maskz_adds_epu8(0, a, b);
10225         assert_eq_m512i(r, _mm512_setzero_si512());
10226         let r = _mm512_maskz_adds_epu8(
10227             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10228             a,
10229             b,
10230         );
10231         #[rustfmt::skip]
10232         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10233                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10234                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10235                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10236         assert_eq_m512i(r, e);
10237     }
10238 
10239     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_adds_epu8()10240     unsafe fn test_mm256_mask_adds_epu8() {
10241         let a = _mm256_set1_epi8(1);
10242         let b = _mm256_set1_epi8(u8::MAX as i8);
10243         let r = _mm256_mask_adds_epu8(a, 0, a, b);
10244         assert_eq_m256i(r, a);
10245         let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
10246         #[rustfmt::skip]
10247         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10248                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10249         assert_eq_m256i(r, e);
10250     }
10251 
10252     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_adds_epu8()10253     unsafe fn test_mm256_maskz_adds_epu8() {
10254         let a = _mm256_set1_epi8(1);
10255         let b = _mm256_set1_epi8(u8::MAX as i8);
10256         let r = _mm256_maskz_adds_epu8(0, a, b);
10257         assert_eq_m256i(r, _mm256_setzero_si256());
10258         let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
10259         #[rustfmt::skip]
10260         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10261                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10262         assert_eq_m256i(r, e);
10263     }
10264 
10265     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_adds_epu8()10266     unsafe fn test_mm_mask_adds_epu8() {
10267         let a = _mm_set1_epi8(1);
10268         let b = _mm_set1_epi8(u8::MAX as i8);
10269         let r = _mm_mask_adds_epu8(a, 0, a, b);
10270         assert_eq_m128i(r, a);
10271         let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
10272         #[rustfmt::skip]
10273         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10274         assert_eq_m128i(r, e);
10275     }
10276 
10277     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_adds_epu8()10278     unsafe fn test_mm_maskz_adds_epu8() {
10279         let a = _mm_set1_epi8(1);
10280         let b = _mm_set1_epi8(u8::MAX as i8);
10281         let r = _mm_maskz_adds_epu8(0, a, b);
10282         assert_eq_m128i(r, _mm_setzero_si128());
10283         let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
10284         #[rustfmt::skip]
10285         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10286         assert_eq_m128i(r, e);
10287     }
10288 
10289     #[simd_test(enable = "avx512bw")]
test_mm512_adds_epi16()10290     unsafe fn test_mm512_adds_epi16() {
10291         let a = _mm512_set1_epi16(1);
10292         let b = _mm512_set1_epi16(i16::MAX);
10293         let r = _mm512_adds_epi16(a, b);
10294         let e = _mm512_set1_epi16(i16::MAX);
10295         assert_eq_m512i(r, e);
10296     }
10297 
10298     #[simd_test(enable = "avx512bw")]
test_mm512_mask_adds_epi16()10299     unsafe fn test_mm512_mask_adds_epi16() {
10300         let a = _mm512_set1_epi16(1);
10301         let b = _mm512_set1_epi16(i16::MAX);
10302         let r = _mm512_mask_adds_epi16(a, 0, a, b);
10303         assert_eq_m512i(r, a);
10304         let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
10305         #[rustfmt::skip]
10306         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10307                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10308         assert_eq_m512i(r, e);
10309     }
10310 
10311     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_adds_epi16()10312     unsafe fn test_mm512_maskz_adds_epi16() {
10313         let a = _mm512_set1_epi16(1);
10314         let b = _mm512_set1_epi16(i16::MAX);
10315         let r = _mm512_maskz_adds_epi16(0, a, b);
10316         assert_eq_m512i(r, _mm512_setzero_si512());
10317         let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
10318         #[rustfmt::skip]
10319         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10320                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10321         assert_eq_m512i(r, e);
10322     }
10323 
10324     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_adds_epi16()10325     unsafe fn test_mm256_mask_adds_epi16() {
10326         let a = _mm256_set1_epi16(1);
10327         let b = _mm256_set1_epi16(i16::MAX);
10328         let r = _mm256_mask_adds_epi16(a, 0, a, b);
10329         assert_eq_m256i(r, a);
10330         let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
10331         #[rustfmt::skip]
10332         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10333         assert_eq_m256i(r, e);
10334     }
10335 
10336     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_adds_epi16()10337     unsafe fn test_mm256_maskz_adds_epi16() {
10338         let a = _mm256_set1_epi16(1);
10339         let b = _mm256_set1_epi16(i16::MAX);
10340         let r = _mm256_maskz_adds_epi16(0, a, b);
10341         assert_eq_m256i(r, _mm256_setzero_si256());
10342         let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
10343         #[rustfmt::skip]
10344         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10345         assert_eq_m256i(r, e);
10346     }
10347 
10348     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_adds_epi16()10349     unsafe fn test_mm_mask_adds_epi16() {
10350         let a = _mm_set1_epi16(1);
10351         let b = _mm_set1_epi16(i16::MAX);
10352         let r = _mm_mask_adds_epi16(a, 0, a, b);
10353         assert_eq_m128i(r, a);
10354         let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
10355         let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10356         assert_eq_m128i(r, e);
10357     }
10358 
10359     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_adds_epi16()10360     unsafe fn test_mm_maskz_adds_epi16() {
10361         let a = _mm_set1_epi16(1);
10362         let b = _mm_set1_epi16(i16::MAX);
10363         let r = _mm_maskz_adds_epi16(0, a, b);
10364         assert_eq_m128i(r, _mm_setzero_si128());
10365         let r = _mm_maskz_adds_epi16(0b00001111, a, b);
10366         let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10367         assert_eq_m128i(r, e);
10368     }
10369 
10370     #[simd_test(enable = "avx512bw")]
test_mm512_adds_epi8()10371     unsafe fn test_mm512_adds_epi8() {
10372         let a = _mm512_set1_epi8(1);
10373         let b = _mm512_set1_epi8(i8::MAX);
10374         let r = _mm512_adds_epi8(a, b);
10375         let e = _mm512_set1_epi8(i8::MAX);
10376         assert_eq_m512i(r, e);
10377     }
10378 
10379     #[simd_test(enable = "avx512bw")]
test_mm512_mask_adds_epi8()10380     unsafe fn test_mm512_mask_adds_epi8() {
10381         let a = _mm512_set1_epi8(1);
10382         let b = _mm512_set1_epi8(i8::MAX);
10383         let r = _mm512_mask_adds_epi8(a, 0, a, b);
10384         assert_eq_m512i(r, a);
10385         let r = _mm512_mask_adds_epi8(
10386             a,
10387             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10388             a,
10389             b,
10390         );
10391         #[rustfmt::skip]
10392         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10393                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10394                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10395                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10396         assert_eq_m512i(r, e);
10397     }
10398 
10399     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_adds_epi8()10400     unsafe fn test_mm512_maskz_adds_epi8() {
10401         let a = _mm512_set1_epi8(1);
10402         let b = _mm512_set1_epi8(i8::MAX);
10403         let r = _mm512_maskz_adds_epi8(0, a, b);
10404         assert_eq_m512i(r, _mm512_setzero_si512());
10405         let r = _mm512_maskz_adds_epi8(
10406             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10407             a,
10408             b,
10409         );
10410         #[rustfmt::skip]
10411         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10412                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10413                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10414                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10415         assert_eq_m512i(r, e);
10416     }
10417 
10418     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_adds_epi8()10419     unsafe fn test_mm256_mask_adds_epi8() {
10420         let a = _mm256_set1_epi8(1);
10421         let b = _mm256_set1_epi8(i8::MAX);
10422         let r = _mm256_mask_adds_epi8(a, 0, a, b);
10423         assert_eq_m256i(r, a);
10424         let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
10425         #[rustfmt::skip]
10426         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10427                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10428         assert_eq_m256i(r, e);
10429     }
10430 
10431     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_adds_epi8()10432     unsafe fn test_mm256_maskz_adds_epi8() {
10433         let a = _mm256_set1_epi8(1);
10434         let b = _mm256_set1_epi8(i8::MAX);
10435         let r = _mm256_maskz_adds_epi8(0, a, b);
10436         assert_eq_m256i(r, _mm256_setzero_si256());
10437         let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
10438         #[rustfmt::skip]
10439         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10440                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10441         assert_eq_m256i(r, e);
10442     }
10443 
10444     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_adds_epi8()10445     unsafe fn test_mm_mask_adds_epi8() {
10446         let a = _mm_set1_epi8(1);
10447         let b = _mm_set1_epi8(i8::MAX);
10448         let r = _mm_mask_adds_epi8(a, 0, a, b);
10449         assert_eq_m128i(r, a);
10450         let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
10451         #[rustfmt::skip]
10452         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10453         assert_eq_m128i(r, e);
10454     }
10455 
10456     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_adds_epi8()10457     unsafe fn test_mm_maskz_adds_epi8() {
10458         let a = _mm_set1_epi8(1);
10459         let b = _mm_set1_epi8(i8::MAX);
10460         let r = _mm_maskz_adds_epi8(0, a, b);
10461         assert_eq_m128i(r, _mm_setzero_si128());
10462         let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
10463         #[rustfmt::skip]
10464         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10465         assert_eq_m128i(r, e);
10466     }
10467 
10468     #[simd_test(enable = "avx512bw")]
test_mm512_sub_epi16()10469     unsafe fn test_mm512_sub_epi16() {
10470         let a = _mm512_set1_epi16(1);
10471         let b = _mm512_set1_epi16(2);
10472         let r = _mm512_sub_epi16(a, b);
10473         let e = _mm512_set1_epi16(-1);
10474         assert_eq_m512i(r, e);
10475     }
10476 
10477     #[simd_test(enable = "avx512bw")]
test_mm512_mask_sub_epi16()10478     unsafe fn test_mm512_mask_sub_epi16() {
10479         let a = _mm512_set1_epi16(1);
10480         let b = _mm512_set1_epi16(2);
10481         let r = _mm512_mask_sub_epi16(a, 0, a, b);
10482         assert_eq_m512i(r, a);
10483         let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
10484         #[rustfmt::skip]
10485         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10486                                  1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10487         assert_eq_m512i(r, e);
10488     }
10489 
10490     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_sub_epi16()10491     unsafe fn test_mm512_maskz_sub_epi16() {
10492         let a = _mm512_set1_epi16(1);
10493         let b = _mm512_set1_epi16(2);
10494         let r = _mm512_maskz_sub_epi16(0, a, b);
10495         assert_eq_m512i(r, _mm512_setzero_si512());
10496         let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
10497         #[rustfmt::skip]
10498         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10499                                  0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10500         assert_eq_m512i(r, e);
10501     }
10502 
10503     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_sub_epi16()10504     unsafe fn test_mm256_mask_sub_epi16() {
10505         let a = _mm256_set1_epi16(1);
10506         let b = _mm256_set1_epi16(2);
10507         let r = _mm256_mask_sub_epi16(a, 0, a, b);
10508         assert_eq_m256i(r, a);
10509         let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
10510         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10511         assert_eq_m256i(r, e);
10512     }
10513 
10514     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_sub_epi16()10515     unsafe fn test_mm256_maskz_sub_epi16() {
10516         let a = _mm256_set1_epi16(1);
10517         let b = _mm256_set1_epi16(2);
10518         let r = _mm256_maskz_sub_epi16(0, a, b);
10519         assert_eq_m256i(r, _mm256_setzero_si256());
10520         let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
10521         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10522         assert_eq_m256i(r, e);
10523     }
10524 
10525     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_sub_epi16()10526     unsafe fn test_mm_mask_sub_epi16() {
10527         let a = _mm_set1_epi16(1);
10528         let b = _mm_set1_epi16(2);
10529         let r = _mm_mask_sub_epi16(a, 0, a, b);
10530         assert_eq_m128i(r, a);
10531         let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
10532         let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
10533         assert_eq_m128i(r, e);
10534     }
10535 
10536     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_sub_epi16()10537     unsafe fn test_mm_maskz_sub_epi16() {
10538         let a = _mm_set1_epi16(1);
10539         let b = _mm_set1_epi16(2);
10540         let r = _mm_maskz_sub_epi16(0, a, b);
10541         assert_eq_m128i(r, _mm_setzero_si128());
10542         let r = _mm_maskz_sub_epi16(0b00001111, a, b);
10543         let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
10544         assert_eq_m128i(r, e);
10545     }
10546 
10547     #[simd_test(enable = "avx512bw")]
test_mm512_sub_epi8()10548     unsafe fn test_mm512_sub_epi8() {
10549         let a = _mm512_set1_epi8(1);
10550         let b = _mm512_set1_epi8(2);
10551         let r = _mm512_sub_epi8(a, b);
10552         let e = _mm512_set1_epi8(-1);
10553         assert_eq_m512i(r, e);
10554     }
10555 
10556     #[simd_test(enable = "avx512bw")]
test_mm512_mask_sub_epi8()10557     unsafe fn test_mm512_mask_sub_epi8() {
10558         let a = _mm512_set1_epi8(1);
10559         let b = _mm512_set1_epi8(2);
10560         let r = _mm512_mask_sub_epi8(a, 0, a, b);
10561         assert_eq_m512i(r, a);
10562         let r = _mm512_mask_sub_epi8(
10563             a,
10564             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10565             a,
10566             b,
10567         );
10568         #[rustfmt::skip]
10569         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10570                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10571                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10572                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10573         assert_eq_m512i(r, e);
10574     }
10575 
10576     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_sub_epi8()10577     unsafe fn test_mm512_maskz_sub_epi8() {
10578         let a = _mm512_set1_epi8(1);
10579         let b = _mm512_set1_epi8(2);
10580         let r = _mm512_maskz_sub_epi8(0, a, b);
10581         assert_eq_m512i(r, _mm512_setzero_si512());
10582         let r = _mm512_maskz_sub_epi8(
10583             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10584             a,
10585             b,
10586         );
10587         #[rustfmt::skip]
10588         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10589                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10590                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10591                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10592         assert_eq_m512i(r, e);
10593     }
10594 
10595     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_sub_epi8()10596     unsafe fn test_mm256_mask_sub_epi8() {
10597         let a = _mm256_set1_epi8(1);
10598         let b = _mm256_set1_epi8(2);
10599         let r = _mm256_mask_sub_epi8(a, 0, a, b);
10600         assert_eq_m256i(r, a);
10601         let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
10602         #[rustfmt::skip]
10603         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10604                                 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10605         assert_eq_m256i(r, e);
10606     }
10607 
10608     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_sub_epi8()10609     unsafe fn test_mm256_maskz_sub_epi8() {
10610         let a = _mm256_set1_epi8(1);
10611         let b = _mm256_set1_epi8(2);
10612         let r = _mm256_maskz_sub_epi8(0, a, b);
10613         assert_eq_m256i(r, _mm256_setzero_si256());
10614         let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
10615         #[rustfmt::skip]
10616         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10617                                 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10618         assert_eq_m256i(r, e);
10619     }
10620 
10621     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_sub_epi8()10622     unsafe fn test_mm_mask_sub_epi8() {
10623         let a = _mm_set1_epi8(1);
10624         let b = _mm_set1_epi8(2);
10625         let r = _mm_mask_sub_epi8(a, 0, a, b);
10626         assert_eq_m128i(r, a);
10627         let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
10628         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10629         assert_eq_m128i(r, e);
10630     }
10631 
10632     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_sub_epi8()10633     unsafe fn test_mm_maskz_sub_epi8() {
10634         let a = _mm_set1_epi8(1);
10635         let b = _mm_set1_epi8(2);
10636         let r = _mm_maskz_sub_epi8(0, a, b);
10637         assert_eq_m128i(r, _mm_setzero_si128());
10638         let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
10639         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10640         assert_eq_m128i(r, e);
10641     }
10642 
10643     #[simd_test(enable = "avx512bw")]
test_mm512_subs_epu16()10644     unsafe fn test_mm512_subs_epu16() {
10645         let a = _mm512_set1_epi16(1);
10646         let b = _mm512_set1_epi16(u16::MAX as i16);
10647         let r = _mm512_subs_epu16(a, b);
10648         let e = _mm512_set1_epi16(0);
10649         assert_eq_m512i(r, e);
10650     }
10651 
10652     #[simd_test(enable = "avx512bw")]
test_mm512_mask_subs_epu16()10653     unsafe fn test_mm512_mask_subs_epu16() {
10654         let a = _mm512_set1_epi16(1);
10655         let b = _mm512_set1_epi16(u16::MAX as i16);
10656         let r = _mm512_mask_subs_epu16(a, 0, a, b);
10657         assert_eq_m512i(r, a);
10658         let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
10659         #[rustfmt::skip]
10660         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10661                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10662         assert_eq_m512i(r, e);
10663     }
10664 
10665     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_subs_epu16()10666     unsafe fn test_mm512_maskz_subs_epu16() {
10667         let a = _mm512_set1_epi16(1);
10668         let b = _mm512_set1_epi16(u16::MAX as i16);
10669         let r = _mm512_maskz_subs_epu16(0, a, b);
10670         assert_eq_m512i(r, _mm512_setzero_si512());
10671         let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
10672         #[rustfmt::skip]
10673         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10674                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
10675         assert_eq_m512i(r, e);
10676     }
10677 
10678     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_subs_epu16()10679     unsafe fn test_mm256_mask_subs_epu16() {
10680         let a = _mm256_set1_epi16(1);
10681         let b = _mm256_set1_epi16(u16::MAX as i16);
10682         let r = _mm256_mask_subs_epu16(a, 0, a, b);
10683         assert_eq_m256i(r, a);
10684         let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
10685         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10686         assert_eq_m256i(r, e);
10687     }
10688 
10689     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_subs_epu16()10690     unsafe fn test_mm256_maskz_subs_epu16() {
10691         let a = _mm256_set1_epi16(1);
10692         let b = _mm256_set1_epi16(u16::MAX as i16);
10693         let r = _mm256_maskz_subs_epu16(0, a, b);
10694         assert_eq_m256i(r, _mm256_setzero_si256());
10695         let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
10696         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
10697         assert_eq_m256i(r, e);
10698     }
10699 
10700     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_subs_epu16()10701     unsafe fn test_mm_mask_subs_epu16() {
10702         let a = _mm_set1_epi16(1);
10703         let b = _mm_set1_epi16(u16::MAX as i16);
10704         let r = _mm_mask_subs_epu16(a, 0, a, b);
10705         assert_eq_m128i(r, a);
10706         let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
10707         let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
10708         assert_eq_m128i(r, e);
10709     }
10710 
10711     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_subs_epu16()10712     unsafe fn test_mm_maskz_subs_epu16() {
10713         let a = _mm_set1_epi16(1);
10714         let b = _mm_set1_epi16(u16::MAX as i16);
10715         let r = _mm_maskz_subs_epu16(0, a, b);
10716         assert_eq_m128i(r, _mm_setzero_si128());
10717         let r = _mm_maskz_subs_epu16(0b00001111, a, b);
10718         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
10719         assert_eq_m128i(r, e);
10720     }
10721 
10722     #[simd_test(enable = "avx512bw")]
test_mm512_subs_epu8()10723     unsafe fn test_mm512_subs_epu8() {
10724         let a = _mm512_set1_epi8(1);
10725         let b = _mm512_set1_epi8(u8::MAX as i8);
10726         let r = _mm512_subs_epu8(a, b);
10727         let e = _mm512_set1_epi8(0);
10728         assert_eq_m512i(r, e);
10729     }
10730 
10731     #[simd_test(enable = "avx512bw")]
test_mm512_mask_subs_epu8()10732     unsafe fn test_mm512_mask_subs_epu8() {
10733         let a = _mm512_set1_epi8(1);
10734         let b = _mm512_set1_epi8(u8::MAX as i8);
10735         let r = _mm512_mask_subs_epu8(a, 0, a, b);
10736         assert_eq_m512i(r, a);
10737         let r = _mm512_mask_subs_epu8(
10738             a,
10739             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10740             a,
10741             b,
10742         );
10743         #[rustfmt::skip]
10744         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10745                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10746                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10747                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10748         assert_eq_m512i(r, e);
10749     }
10750 
10751     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_subs_epu8()10752     unsafe fn test_mm512_maskz_subs_epu8() {
10753         let a = _mm512_set1_epi8(1);
10754         let b = _mm512_set1_epi8(u8::MAX as i8);
10755         let r = _mm512_maskz_subs_epu8(0, a, b);
10756         assert_eq_m512i(r, _mm512_setzero_si512());
10757         let r = _mm512_maskz_subs_epu8(
10758             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10759             a,
10760             b,
10761         );
10762         #[rustfmt::skip]
10763         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10764                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10765                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10766                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
10767         assert_eq_m512i(r, e);
10768     }
10769 
10770     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_subs_epu8()10771     unsafe fn test_mm256_mask_subs_epu8() {
10772         let a = _mm256_set1_epi8(1);
10773         let b = _mm256_set1_epi8(u8::MAX as i8);
10774         let r = _mm256_mask_subs_epu8(a, 0, a, b);
10775         assert_eq_m256i(r, a);
10776         let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
10777         #[rustfmt::skip]
10778         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10779                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10780         assert_eq_m256i(r, e);
10781     }
10782 
10783     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_subs_epu8()10784     unsafe fn test_mm256_maskz_subs_epu8() {
10785         let a = _mm256_set1_epi8(1);
10786         let b = _mm256_set1_epi8(u8::MAX as i8);
10787         let r = _mm256_maskz_subs_epu8(0, a, b);
10788         assert_eq_m256i(r, _mm256_setzero_si256());
10789         let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
10790         #[rustfmt::skip]
10791         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10792                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
10793         assert_eq_m256i(r, e);
10794     }
10795 
10796     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_subs_epu8()10797     unsafe fn test_mm_mask_subs_epu8() {
10798         let a = _mm_set1_epi8(1);
10799         let b = _mm_set1_epi8(u8::MAX as i8);
10800         let r = _mm_mask_subs_epu8(a, 0, a, b);
10801         assert_eq_m128i(r, a);
10802         let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
10803         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10804         assert_eq_m128i(r, e);
10805     }
10806 
10807     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_subs_epu8()10808     unsafe fn test_mm_maskz_subs_epu8() {
10809         let a = _mm_set1_epi8(1);
10810         let b = _mm_set1_epi8(u8::MAX as i8);
10811         let r = _mm_maskz_subs_epu8(0, a, b);
10812         assert_eq_m128i(r, _mm_setzero_si128());
10813         let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
10814         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
10815         assert_eq_m128i(r, e);
10816     }
10817 
10818     #[simd_test(enable = "avx512bw")]
test_mm512_subs_epi16()10819     unsafe fn test_mm512_subs_epi16() {
10820         let a = _mm512_set1_epi16(-1);
10821         let b = _mm512_set1_epi16(i16::MAX);
10822         let r = _mm512_subs_epi16(a, b);
10823         let e = _mm512_set1_epi16(i16::MIN);
10824         assert_eq_m512i(r, e);
10825     }
10826 
10827     #[simd_test(enable = "avx512bw")]
test_mm512_mask_subs_epi16()10828     unsafe fn test_mm512_mask_subs_epi16() {
10829         let a = _mm512_set1_epi16(-1);
10830         let b = _mm512_set1_epi16(i16::MAX);
10831         let r = _mm512_mask_subs_epi16(a, 0, a, b);
10832         assert_eq_m512i(r, a);
10833         let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
10834         #[rustfmt::skip]
10835         let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10836                                  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
10837         assert_eq_m512i(r, e);
10838     }
10839 
10840     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_subs_epi16()10841     unsafe fn test_mm512_maskz_subs_epi16() {
10842         let a = _mm512_set1_epi16(-1);
10843         let b = _mm512_set1_epi16(i16::MAX);
10844         let r = _mm512_maskz_subs_epi16(0, a, b);
10845         assert_eq_m512i(r, _mm512_setzero_si512());
10846         let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
10847         #[rustfmt::skip]
10848         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10849                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
10850         assert_eq_m512i(r, e);
10851     }
10852 
10853     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_subs_epi16()10854     unsafe fn test_mm256_mask_subs_epi16() {
10855         let a = _mm256_set1_epi16(-1);
10856         let b = _mm256_set1_epi16(i16::MAX);
10857         let r = _mm256_mask_subs_epi16(a, 0, a, b);
10858         assert_eq_m256i(r, a);
10859         let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
10860         #[rustfmt::skip]
10861         let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
10862         assert_eq_m256i(r, e);
10863     }
10864 
10865     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_subs_epi16()10866     unsafe fn test_mm256_maskz_subs_epi16() {
10867         let a = _mm256_set1_epi16(-1);
10868         let b = _mm256_set1_epi16(i16::MAX);
10869         let r = _mm256_maskz_subs_epi16(0, a, b);
10870         assert_eq_m256i(r, _mm256_setzero_si256());
10871         let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
10872         #[rustfmt::skip]
10873         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
10874         assert_eq_m256i(r, e);
10875     }
10876 
10877     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_subs_epi16()10878     unsafe fn test_mm_mask_subs_epi16() {
10879         let a = _mm_set1_epi16(-1);
10880         let b = _mm_set1_epi16(i16::MAX);
10881         let r = _mm_mask_subs_epi16(a, 0, a, b);
10882         assert_eq_m128i(r, a);
10883         let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
10884         let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
10885         assert_eq_m128i(r, e);
10886     }
10887 
10888     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_subs_epi16()10889     unsafe fn test_mm_maskz_subs_epi16() {
10890         let a = _mm_set1_epi16(-1);
10891         let b = _mm_set1_epi16(i16::MAX);
10892         let r = _mm_maskz_subs_epi16(0, a, b);
10893         assert_eq_m128i(r, _mm_setzero_si128());
10894         let r = _mm_maskz_subs_epi16(0b00001111, a, b);
10895         let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
10896         assert_eq_m128i(r, e);
10897     }
10898 
10899     #[simd_test(enable = "avx512bw")]
test_mm512_subs_epi8()10900     unsafe fn test_mm512_subs_epi8() {
10901         let a = _mm512_set1_epi8(-1);
10902         let b = _mm512_set1_epi8(i8::MAX);
10903         let r = _mm512_subs_epi8(a, b);
10904         let e = _mm512_set1_epi8(i8::MIN);
10905         assert_eq_m512i(r, e);
10906     }
10907 
10908     #[simd_test(enable = "avx512bw")]
test_mm512_mask_subs_epi8()10909     unsafe fn test_mm512_mask_subs_epi8() {
10910         let a = _mm512_set1_epi8(-1);
10911         let b = _mm512_set1_epi8(i8::MAX);
10912         let r = _mm512_mask_subs_epi8(a, 0, a, b);
10913         assert_eq_m512i(r, a);
10914         let r = _mm512_mask_subs_epi8(
10915             a,
10916             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10917             a,
10918             b,
10919         );
10920         #[rustfmt::skip]
10921         let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10922                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10923                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10924                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
10925         assert_eq_m512i(r, e);
10926     }
10927 
10928     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_subs_epi8()10929     unsafe fn test_mm512_maskz_subs_epi8() {
10930         let a = _mm512_set1_epi8(-1);
10931         let b = _mm512_set1_epi8(i8::MAX);
10932         let r = _mm512_maskz_subs_epi8(0, a, b);
10933         assert_eq_m512i(r, _mm512_setzero_si512());
10934         let r = _mm512_maskz_subs_epi8(
10935             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10936             a,
10937             b,
10938         );
10939         #[rustfmt::skip]
10940         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10941                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10942                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10943                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
10944         assert_eq_m512i(r, e);
10945     }
10946 
10947     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_subs_epi8()10948     unsafe fn test_mm256_mask_subs_epi8() {
10949         let a = _mm256_set1_epi8(-1);
10950         let b = _mm256_set1_epi8(i8::MAX);
10951         let r = _mm256_mask_subs_epi8(a, 0, a, b);
10952         assert_eq_m256i(r, a);
10953         let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
10954         #[rustfmt::skip]
10955         let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
10956                                 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
10957         assert_eq_m256i(r, e);
10958     }
10959 
10960     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_subs_epi8()10961     unsafe fn test_mm256_maskz_subs_epi8() {
10962         let a = _mm256_set1_epi8(-1);
10963         let b = _mm256_set1_epi8(i8::MAX);
10964         let r = _mm256_maskz_subs_epi8(0, a, b);
10965         assert_eq_m256i(r, _mm256_setzero_si256());
10966         let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
10967         #[rustfmt::skip]
10968         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10969                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
10970         assert_eq_m256i(r, e);
10971     }
10972 
10973     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_subs_epi8()10974     unsafe fn test_mm_mask_subs_epi8() {
10975         let a = _mm_set1_epi8(-1);
10976         let b = _mm_set1_epi8(i8::MAX);
10977         let r = _mm_mask_subs_epi8(a, 0, a, b);
10978         assert_eq_m128i(r, a);
10979         let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
10980         #[rustfmt::skip]
10981         let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
10982         assert_eq_m128i(r, e);
10983     }
10984 
10985     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_subs_epi8()10986     unsafe fn test_mm_maskz_subs_epi8() {
10987         let a = _mm_set1_epi8(-1);
10988         let b = _mm_set1_epi8(i8::MAX);
10989         let r = _mm_maskz_subs_epi8(0, a, b);
10990         assert_eq_m128i(r, _mm_setzero_si128());
10991         let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
10992         #[rustfmt::skip]
10993         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
10994         assert_eq_m128i(r, e);
10995     }
10996 
10997     #[simd_test(enable = "avx512bw")]
test_mm512_mulhi_epu16()10998     unsafe fn test_mm512_mulhi_epu16() {
10999         let a = _mm512_set1_epi16(1);
11000         let b = _mm512_set1_epi16(1);
11001         let r = _mm512_mulhi_epu16(a, b);
11002         let e = _mm512_set1_epi16(0);
11003         assert_eq_m512i(r, e);
11004     }
11005 
11006     #[simd_test(enable = "avx512bw")]
test_mm512_mask_mulhi_epu16()11007     unsafe fn test_mm512_mask_mulhi_epu16() {
11008         let a = _mm512_set1_epi16(1);
11009         let b = _mm512_set1_epi16(1);
11010         let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
11011         assert_eq_m512i(r, a);
11012         let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
11013         #[rustfmt::skip]
11014         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11015                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11016         assert_eq_m512i(r, e);
11017     }
11018 
11019     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_mulhi_epu16()11020     unsafe fn test_mm512_maskz_mulhi_epu16() {
11021         let a = _mm512_set1_epi16(1);
11022         let b = _mm512_set1_epi16(1);
11023         let r = _mm512_maskz_mulhi_epu16(0, a, b);
11024         assert_eq_m512i(r, _mm512_setzero_si512());
11025         let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
11026         #[rustfmt::skip]
11027         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11028                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11029         assert_eq_m512i(r, e);
11030     }
11031 
11032     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_mulhi_epu16()11033     unsafe fn test_mm256_mask_mulhi_epu16() {
11034         let a = _mm256_set1_epi16(1);
11035         let b = _mm256_set1_epi16(1);
11036         let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
11037         assert_eq_m256i(r, a);
11038         let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
11039         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11040         assert_eq_m256i(r, e);
11041     }
11042 
11043     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_mulhi_epu16()11044     unsafe fn test_mm256_maskz_mulhi_epu16() {
11045         let a = _mm256_set1_epi16(1);
11046         let b = _mm256_set1_epi16(1);
11047         let r = _mm256_maskz_mulhi_epu16(0, a, b);
11048         assert_eq_m256i(r, _mm256_setzero_si256());
11049         let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
11050         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11051         assert_eq_m256i(r, e);
11052     }
11053 
11054     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_mulhi_epu16()11055     unsafe fn test_mm_mask_mulhi_epu16() {
11056         let a = _mm_set1_epi16(1);
11057         let b = _mm_set1_epi16(1);
11058         let r = _mm_mask_mulhi_epu16(a, 0, a, b);
11059         assert_eq_m128i(r, a);
11060         let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
11061         let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11062         assert_eq_m128i(r, e);
11063     }
11064 
11065     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_mulhi_epu16()11066     unsafe fn test_mm_maskz_mulhi_epu16() {
11067         let a = _mm_set1_epi16(1);
11068         let b = _mm_set1_epi16(1);
11069         let r = _mm_maskz_mulhi_epu16(0, a, b);
11070         assert_eq_m128i(r, _mm_setzero_si128());
11071         let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
11072         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11073         assert_eq_m128i(r, e);
11074     }
11075 
11076     #[simd_test(enable = "avx512bw")]
test_mm512_mulhi_epi16()11077     unsafe fn test_mm512_mulhi_epi16() {
11078         let a = _mm512_set1_epi16(1);
11079         let b = _mm512_set1_epi16(1);
11080         let r = _mm512_mulhi_epi16(a, b);
11081         let e = _mm512_set1_epi16(0);
11082         assert_eq_m512i(r, e);
11083     }
11084 
11085     #[simd_test(enable = "avx512bw")]
test_mm512_mask_mulhi_epi16()11086     unsafe fn test_mm512_mask_mulhi_epi16() {
11087         let a = _mm512_set1_epi16(1);
11088         let b = _mm512_set1_epi16(1);
11089         let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
11090         assert_eq_m512i(r, a);
11091         let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11092         #[rustfmt::skip]
11093         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11094                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11095         assert_eq_m512i(r, e);
11096     }
11097 
11098     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_mulhi_epi16()11099     unsafe fn test_mm512_maskz_mulhi_epi16() {
11100         let a = _mm512_set1_epi16(1);
11101         let b = _mm512_set1_epi16(1);
11102         let r = _mm512_maskz_mulhi_epi16(0, a, b);
11103         assert_eq_m512i(r, _mm512_setzero_si512());
11104         let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
11105         #[rustfmt::skip]
11106         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11107                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11108         assert_eq_m512i(r, e);
11109     }
11110 
11111     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_mulhi_epi16()11112     unsafe fn test_mm256_mask_mulhi_epi16() {
11113         let a = _mm256_set1_epi16(1);
11114         let b = _mm256_set1_epi16(1);
11115         let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
11116         assert_eq_m256i(r, a);
11117         let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
11118         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11119         assert_eq_m256i(r, e);
11120     }
11121 
11122     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_mulhi_epi16()11123     unsafe fn test_mm256_maskz_mulhi_epi16() {
11124         let a = _mm256_set1_epi16(1);
11125         let b = _mm256_set1_epi16(1);
11126         let r = _mm256_maskz_mulhi_epi16(0, a, b);
11127         assert_eq_m256i(r, _mm256_setzero_si256());
11128         let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
11129         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11130         assert_eq_m256i(r, e);
11131     }
11132 
11133     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_mulhi_epi16()11134     unsafe fn test_mm_mask_mulhi_epi16() {
11135         let a = _mm_set1_epi16(1);
11136         let b = _mm_set1_epi16(1);
11137         let r = _mm_mask_mulhi_epi16(a, 0, a, b);
11138         assert_eq_m128i(r, a);
11139         let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
11140         let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11141         assert_eq_m128i(r, e);
11142     }
11143 
11144     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_mulhi_epi16()11145     unsafe fn test_mm_maskz_mulhi_epi16() {
11146         let a = _mm_set1_epi16(1);
11147         let b = _mm_set1_epi16(1);
11148         let r = _mm_maskz_mulhi_epi16(0, a, b);
11149         assert_eq_m128i(r, _mm_setzero_si128());
11150         let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
11151         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11152         assert_eq_m128i(r, e);
11153     }
11154 
11155     #[simd_test(enable = "avx512bw")]
test_mm512_mulhrs_epi16()11156     unsafe fn test_mm512_mulhrs_epi16() {
11157         let a = _mm512_set1_epi16(1);
11158         let b = _mm512_set1_epi16(1);
11159         let r = _mm512_mulhrs_epi16(a, b);
11160         let e = _mm512_set1_epi16(0);
11161         assert_eq_m512i(r, e);
11162     }
11163 
11164     #[simd_test(enable = "avx512bw")]
test_mm512_mask_mulhrs_epi16()11165     unsafe fn test_mm512_mask_mulhrs_epi16() {
11166         let a = _mm512_set1_epi16(1);
11167         let b = _mm512_set1_epi16(1);
11168         let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
11169         assert_eq_m512i(r, a);
11170         let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11171         #[rustfmt::skip]
11172         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11173                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11174         assert_eq_m512i(r, e);
11175     }
11176 
11177     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_mulhrs_epi16()11178     unsafe fn test_mm512_maskz_mulhrs_epi16() {
11179         let a = _mm512_set1_epi16(1);
11180         let b = _mm512_set1_epi16(1);
11181         let r = _mm512_maskz_mulhrs_epi16(0, a, b);
11182         assert_eq_m512i(r, _mm512_setzero_si512());
11183         let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
11184         #[rustfmt::skip]
11185         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11186                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11187         assert_eq_m512i(r, e);
11188     }
11189 
11190     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_mulhrs_epi16()11191     unsafe fn test_mm256_mask_mulhrs_epi16() {
11192         let a = _mm256_set1_epi16(1);
11193         let b = _mm256_set1_epi16(1);
11194         let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
11195         assert_eq_m256i(r, a);
11196         let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
11197         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11198         assert_eq_m256i(r, e);
11199     }
11200 
11201     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_mulhrs_epi16()11202     unsafe fn test_mm256_maskz_mulhrs_epi16() {
11203         let a = _mm256_set1_epi16(1);
11204         let b = _mm256_set1_epi16(1);
11205         let r = _mm256_maskz_mulhrs_epi16(0, a, b);
11206         assert_eq_m256i(r, _mm256_setzero_si256());
11207         let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
11208         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11209         assert_eq_m256i(r, e);
11210     }
11211 
11212     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_mulhrs_epi16()11213     unsafe fn test_mm_mask_mulhrs_epi16() {
11214         let a = _mm_set1_epi16(1);
11215         let b = _mm_set1_epi16(1);
11216         let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
11217         assert_eq_m128i(r, a);
11218         let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
11219         let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11220         assert_eq_m128i(r, e);
11221     }
11222 
11223     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_mulhrs_epi16()11224     unsafe fn test_mm_maskz_mulhrs_epi16() {
11225         let a = _mm_set1_epi16(1);
11226         let b = _mm_set1_epi16(1);
11227         let r = _mm_maskz_mulhrs_epi16(0, a, b);
11228         assert_eq_m128i(r, _mm_setzero_si128());
11229         let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
11230         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11231         assert_eq_m128i(r, e);
11232     }
11233 
11234     #[simd_test(enable = "avx512bw")]
test_mm512_mullo_epi16()11235     unsafe fn test_mm512_mullo_epi16() {
11236         let a = _mm512_set1_epi16(1);
11237         let b = _mm512_set1_epi16(1);
11238         let r = _mm512_mullo_epi16(a, b);
11239         let e = _mm512_set1_epi16(1);
11240         assert_eq_m512i(r, e);
11241     }
11242 
11243     #[simd_test(enable = "avx512bw")]
test_mm512_mask_mullo_epi16()11244     unsafe fn test_mm512_mask_mullo_epi16() {
11245         let a = _mm512_set1_epi16(1);
11246         let b = _mm512_set1_epi16(1);
11247         let r = _mm512_mask_mullo_epi16(a, 0, a, b);
11248         assert_eq_m512i(r, a);
11249         let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11250         #[rustfmt::skip]
11251         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11252                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
11253         assert_eq_m512i(r, e);
11254     }
11255 
11256     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_mullo_epi16()11257     unsafe fn test_mm512_maskz_mullo_epi16() {
11258         let a = _mm512_set1_epi16(1);
11259         let b = _mm512_set1_epi16(1);
11260         let r = _mm512_maskz_mullo_epi16(0, a, b);
11261         assert_eq_m512i(r, _mm512_setzero_si512());
11262         let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
11263         #[rustfmt::skip]
11264         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11265                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
11266         assert_eq_m512i(r, e);
11267     }
11268 
11269     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_mullo_epi16()11270     unsafe fn test_mm256_mask_mullo_epi16() {
11271         let a = _mm256_set1_epi16(1);
11272         let b = _mm256_set1_epi16(1);
11273         let r = _mm256_mask_mullo_epi16(a, 0, a, b);
11274         assert_eq_m256i(r, a);
11275         let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
11276         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
11277         assert_eq_m256i(r, e);
11278     }
11279 
11280     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_mullo_epi16()11281     unsafe fn test_mm256_maskz_mullo_epi16() {
11282         let a = _mm256_set1_epi16(1);
11283         let b = _mm256_set1_epi16(1);
11284         let r = _mm256_maskz_mullo_epi16(0, a, b);
11285         assert_eq_m256i(r, _mm256_setzero_si256());
11286         let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
11287         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
11288         assert_eq_m256i(r, e);
11289     }
11290 
11291     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_mullo_epi16()11292     unsafe fn test_mm_mask_mullo_epi16() {
11293         let a = _mm_set1_epi16(1);
11294         let b = _mm_set1_epi16(1);
11295         let r = _mm_mask_mullo_epi16(a, 0, a, b);
11296         assert_eq_m128i(r, a);
11297         let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
11298         let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
11299         assert_eq_m128i(r, e);
11300     }
11301 
11302     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_mullo_epi16()11303     unsafe fn test_mm_maskz_mullo_epi16() {
11304         let a = _mm_set1_epi16(1);
11305         let b = _mm_set1_epi16(1);
11306         let r = _mm_maskz_mullo_epi16(0, a, b);
11307         assert_eq_m128i(r, _mm_setzero_si128());
11308         let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
11309         let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11310         assert_eq_m128i(r, e);
11311     }
11312 
11313     #[simd_test(enable = "avx512bw")]
test_mm512_max_epu16()11314     unsafe fn test_mm512_max_epu16() {
11315         #[rustfmt::skip]
11316         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11317                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11318         #[rustfmt::skip]
11319         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11320                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11321         let r = _mm512_max_epu16(a, b);
11322         #[rustfmt::skip]
11323         let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11324                                  15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11325         assert_eq_m512i(r, e);
11326     }
11327 
11328     #[simd_test(enable = "avx512f")]
test_mm512_mask_max_epu16()11329     unsafe fn test_mm512_mask_max_epu16() {
11330         #[rustfmt::skip]
11331         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11332                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11333         #[rustfmt::skip]
11334         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11335                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11336         let r = _mm512_mask_max_epu16(a, 0, a, b);
11337         assert_eq_m512i(r, a);
11338         let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
11339         #[rustfmt::skip]
11340         let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11341                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11342         assert_eq_m512i(r, e);
11343     }
11344 
11345     #[simd_test(enable = "avx512f")]
test_mm512_maskz_max_epu16()11346     unsafe fn test_mm512_maskz_max_epu16() {
11347         #[rustfmt::skip]
11348         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11349                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11350         #[rustfmt::skip]
11351         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11352                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11353         let r = _mm512_maskz_max_epu16(0, a, b);
11354         assert_eq_m512i(r, _mm512_setzero_si512());
11355         let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
11356         #[rustfmt::skip]
11357         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11358                                  0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11359         assert_eq_m512i(r, e);
11360     }
11361 
11362     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_max_epu16()11363     unsafe fn test_mm256_mask_max_epu16() {
11364         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11365         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11366         let r = _mm256_mask_max_epu16(a, 0, a, b);
11367         assert_eq_m256i(r, a);
11368         let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
11369         let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11370         assert_eq_m256i(r, e);
11371     }
11372 
11373     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_max_epu16()11374     unsafe fn test_mm256_maskz_max_epu16() {
11375         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11376         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11377         let r = _mm256_maskz_max_epu16(0, a, b);
11378         assert_eq_m256i(r, _mm256_setzero_si256());
11379         let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
11380         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11381         assert_eq_m256i(r, e);
11382     }
11383 
11384     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_max_epu16()11385     unsafe fn test_mm_mask_max_epu16() {
11386         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11387         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11388         let r = _mm_mask_max_epu16(a, 0, a, b);
11389         assert_eq_m128i(r, a);
11390         let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
11391         let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11392         assert_eq_m128i(r, e);
11393     }
11394 
11395     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_max_epu16()11396     unsafe fn test_mm_maskz_max_epu16() {
11397         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11398         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11399         let r = _mm_maskz_max_epu16(0, a, b);
11400         assert_eq_m128i(r, _mm_setzero_si128());
11401         let r = _mm_maskz_max_epu16(0b00001111, a, b);
11402         let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
11403         assert_eq_m128i(r, e);
11404     }
11405 
11406     #[simd_test(enable = "avx512bw")]
test_mm512_max_epu8()11407     unsafe fn test_mm512_max_epu8() {
11408         #[rustfmt::skip]
11409         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11410                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11411                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11412                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11413         #[rustfmt::skip]
11414         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11415                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11416                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11417                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11418         let r = _mm512_max_epu8(a, b);
11419         #[rustfmt::skip]
11420         let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11421                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11422                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11423                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11424         assert_eq_m512i(r, e);
11425     }
11426 
11427     #[simd_test(enable = "avx512f")]
test_mm512_mask_max_epu8()11428     unsafe fn test_mm512_mask_max_epu8() {
11429         #[rustfmt::skip]
11430         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11431                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11432                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11433                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11434         #[rustfmt::skip]
11435         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11436                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11437                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11438                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11439         let r = _mm512_mask_max_epu8(a, 0, a, b);
11440         assert_eq_m512i(r, a);
11441         let r = _mm512_mask_max_epu8(
11442             a,
11443             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11444             a,
11445             b,
11446         );
11447         #[rustfmt::skip]
11448         let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11449                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11450                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11451                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11452         assert_eq_m512i(r, e);
11453     }
11454 
11455     #[simd_test(enable = "avx512f")]
test_mm512_maskz_max_epu8()11456     unsafe fn test_mm512_maskz_max_epu8() {
11457         #[rustfmt::skip]
11458         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11459                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11460                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11461                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11462         #[rustfmt::skip]
11463         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11464                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11465                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11466                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11467         let r = _mm512_maskz_max_epu8(0, a, b);
11468         assert_eq_m512i(r, _mm512_setzero_si512());
11469         let r = _mm512_maskz_max_epu8(
11470             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11471             a,
11472             b,
11473         );
11474         #[rustfmt::skip]
11475         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11476                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11477                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11478                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11479         assert_eq_m512i(r, e);
11480     }
11481 
11482     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_max_epu8()11483     unsafe fn test_mm256_mask_max_epu8() {
11484         #[rustfmt::skip]
11485         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11486                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11487         #[rustfmt::skip]
11488         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11489                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11490         let r = _mm256_mask_max_epu8(a, 0, a, b);
11491         assert_eq_m256i(r, a);
11492         let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
11493         #[rustfmt::skip]
11494         let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11495                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11496         assert_eq_m256i(r, e);
11497     }
11498 
11499     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_max_epu8()11500     unsafe fn test_mm256_maskz_max_epu8() {
11501         #[rustfmt::skip]
11502         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11503                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11504         #[rustfmt::skip]
11505         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11506                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11507         let r = _mm256_maskz_max_epu8(0, a, b);
11508         assert_eq_m256i(r, _mm256_setzero_si256());
11509         let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
11510         #[rustfmt::skip]
11511         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11512                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11513         assert_eq_m256i(r, e);
11514     }
11515 
11516     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_max_epu8()11517     unsafe fn test_mm_mask_max_epu8() {
11518         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11519         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11520         let r = _mm_mask_max_epu8(a, 0, a, b);
11521         assert_eq_m128i(r, a);
11522         let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
11523         let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11524         assert_eq_m128i(r, e);
11525     }
11526 
11527     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_max_epu8()11528     unsafe fn test_mm_maskz_max_epu8() {
11529         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11530         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11531         let r = _mm_maskz_max_epu8(0, a, b);
11532         assert_eq_m128i(r, _mm_setzero_si128());
11533         let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
11534         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11535         assert_eq_m128i(r, e);
11536     }
11537 
11538     #[simd_test(enable = "avx512bw")]
test_mm512_max_epi16()11539     unsafe fn test_mm512_max_epi16() {
11540         #[rustfmt::skip]
11541         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11542                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11543         #[rustfmt::skip]
11544         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11545                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11546         let r = _mm512_max_epi16(a, b);
11547         #[rustfmt::skip]
11548         let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11549                                  15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11550         assert_eq_m512i(r, e);
11551     }
11552 
11553     #[simd_test(enable = "avx512f")]
test_mm512_mask_max_epi16()11554     unsafe fn test_mm512_mask_max_epi16() {
11555         #[rustfmt::skip]
11556         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11557                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11558         #[rustfmt::skip]
11559         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11560                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11561         let r = _mm512_mask_max_epi16(a, 0, a, b);
11562         assert_eq_m512i(r, a);
11563         let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11564         #[rustfmt::skip]
11565         let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11566                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11567         assert_eq_m512i(r, e);
11568     }
11569 
11570     #[simd_test(enable = "avx512f")]
test_mm512_maskz_max_epi16()11571     unsafe fn test_mm512_maskz_max_epi16() {
11572         #[rustfmt::skip]
11573         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11574                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11575         #[rustfmt::skip]
11576         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11577                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11578         let r = _mm512_maskz_max_epi16(0, a, b);
11579         assert_eq_m512i(r, _mm512_setzero_si512());
11580         let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
11581         #[rustfmt::skip]
11582         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11583                                  0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11584         assert_eq_m512i(r, e);
11585     }
11586 
11587     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_max_epi16()11588     unsafe fn test_mm256_mask_max_epi16() {
11589         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11590         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11591         let r = _mm256_mask_max_epi16(a, 0, a, b);
11592         assert_eq_m256i(r, a);
11593         let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
11594         let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11595         assert_eq_m256i(r, e);
11596     }
11597 
11598     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_max_epi16()11599     unsafe fn test_mm256_maskz_max_epi16() {
11600         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11601         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11602         let r = _mm256_maskz_max_epi16(0, a, b);
11603         assert_eq_m256i(r, _mm256_setzero_si256());
11604         let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
11605         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11606         assert_eq_m256i(r, e);
11607     }
11608 
11609     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_max_epi16()11610     unsafe fn test_mm_mask_max_epi16() {
11611         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11612         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11613         let r = _mm_mask_max_epi16(a, 0, a, b);
11614         assert_eq_m128i(r, a);
11615         let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
11616         let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11617         assert_eq_m128i(r, e);
11618     }
11619 
11620     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_max_epi16()11621     unsafe fn test_mm_maskz_max_epi16() {
11622         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11623         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11624         let r = _mm_maskz_max_epi16(0, a, b);
11625         assert_eq_m128i(r, _mm_setzero_si128());
11626         let r = _mm_maskz_max_epi16(0b00001111, a, b);
11627         let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
11628         assert_eq_m128i(r, e);
11629     }
11630 
11631     #[simd_test(enable = "avx512bw")]
test_mm512_max_epi8()11632     unsafe fn test_mm512_max_epi8() {
11633         #[rustfmt::skip]
11634         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11635                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11636                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11637                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11638         #[rustfmt::skip]
11639         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11640                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11641                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11642                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11643         let r = _mm512_max_epi8(a, b);
11644         #[rustfmt::skip]
11645         let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11646                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11647                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11648                                 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11649         assert_eq_m512i(r, e);
11650     }
11651 
11652     #[simd_test(enable = "avx512f")]
test_mm512_mask_max_epi8()11653     unsafe fn test_mm512_mask_max_epi8() {
11654         #[rustfmt::skip]
11655         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11656                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11657                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11658                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11659         #[rustfmt::skip]
11660         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11661                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11662                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11663                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11664         let r = _mm512_mask_max_epi8(a, 0, a, b);
11665         assert_eq_m512i(r, a);
11666         let r = _mm512_mask_max_epi8(
11667             a,
11668             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11669             a,
11670             b,
11671         );
11672         #[rustfmt::skip]
11673         let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11674                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11675                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11676                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11677         assert_eq_m512i(r, e);
11678     }
11679 
11680     #[simd_test(enable = "avx512f")]
test_mm512_maskz_max_epi8()11681     unsafe fn test_mm512_maskz_max_epi8() {
11682         #[rustfmt::skip]
11683         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11684                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11685                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11686                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11687         #[rustfmt::skip]
11688         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11689                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11690                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11691                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11692         let r = _mm512_maskz_max_epi8(0, a, b);
11693         assert_eq_m512i(r, _mm512_setzero_si512());
11694         let r = _mm512_maskz_max_epi8(
11695             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11696             a,
11697             b,
11698         );
11699         #[rustfmt::skip]
11700         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11701                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11702                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11703                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11704         assert_eq_m512i(r, e);
11705     }
11706 
11707     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_max_epi8()11708     unsafe fn test_mm256_mask_max_epi8() {
11709         #[rustfmt::skip]
11710         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11711                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11712         #[rustfmt::skip]
11713         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11714                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11715         let r = _mm256_mask_max_epi8(a, 0, a, b);
11716         assert_eq_m256i(r, a);
11717         let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
11718         #[rustfmt::skip]
11719         let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11720                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11721         assert_eq_m256i(r, e);
11722     }
11723 
11724     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_max_epi8()11725     unsafe fn test_mm256_maskz_max_epi8() {
11726         #[rustfmt::skip]
11727         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11728                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11729         #[rustfmt::skip]
11730         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11731                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11732         let r = _mm256_maskz_max_epi8(0, a, b);
11733         assert_eq_m256i(r, _mm256_setzero_si256());
11734         let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
11735         #[rustfmt::skip]
11736         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11737                                 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11738         assert_eq_m256i(r, e);
11739     }
11740 
11741     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_max_epi8()11742     unsafe fn test_mm_mask_max_epi8() {
11743         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11744         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11745         let r = _mm_mask_max_epi8(a, 0, a, b);
11746         assert_eq_m128i(r, a);
11747         let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
11748         let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11749         assert_eq_m128i(r, e);
11750     }
11751 
11752     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_max_epi8()11753     unsafe fn test_mm_maskz_max_epi8() {
11754         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11755         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11756         let r = _mm_maskz_max_epi8(0, a, b);
11757         assert_eq_m128i(r, _mm_setzero_si128());
11758         let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
11759         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11760         assert_eq_m128i(r, e);
11761     }
11762 
11763     #[simd_test(enable = "avx512bw")]
test_mm512_min_epu16()11764     unsafe fn test_mm512_min_epu16() {
11765         #[rustfmt::skip]
11766         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11767                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11768         #[rustfmt::skip]
11769         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11770                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11771         let r = _mm512_min_epu16(a, b);
11772         #[rustfmt::skip]
11773         let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11774                                  0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11775         assert_eq_m512i(r, e);
11776     }
11777 
11778     #[simd_test(enable = "avx512f")]
test_mm512_mask_min_epu16()11779     unsafe fn test_mm512_mask_min_epu16() {
11780         #[rustfmt::skip]
11781         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11782                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11783         #[rustfmt::skip]
11784         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11785                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11786         let r = _mm512_mask_min_epu16(a, 0, a, b);
11787         assert_eq_m512i(r, a);
11788         let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
11789         #[rustfmt::skip]
11790         let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11791                                  0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11792         assert_eq_m512i(r, e);
11793     }
11794 
11795     #[simd_test(enable = "avx512f")]
test_mm512_maskz_min_epu16()11796     unsafe fn test_mm512_maskz_min_epu16() {
11797         #[rustfmt::skip]
11798         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11799                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11800         #[rustfmt::skip]
11801         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11802                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11803         let r = _mm512_maskz_min_epu16(0, a, b);
11804         assert_eq_m512i(r, _mm512_setzero_si512());
11805         let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
11806         #[rustfmt::skip]
11807         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
11808                                  0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
11809         assert_eq_m512i(r, e);
11810     }
11811 
11812     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_min_epu16()11813     unsafe fn test_mm256_mask_min_epu16() {
11814         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11815         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11816         let r = _mm256_mask_min_epu16(a, 0, a, b);
11817         assert_eq_m256i(r, a);
11818         let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
11819         let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11820         assert_eq_m256i(r, e);
11821     }
11822 
11823     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_min_epu16()11824     unsafe fn test_mm256_maskz_min_epu16() {
11825         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11826         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11827         let r = _mm256_maskz_min_epu16(0, a, b);
11828         assert_eq_m256i(r, _mm256_setzero_si256());
11829         let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
11830         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
11831         assert_eq_m256i(r, e);
11832     }
11833 
11834     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_min_epu16()11835     unsafe fn test_mm_mask_min_epu16() {
11836         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11837         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11838         let r = _mm_mask_min_epu16(a, 0, a, b);
11839         assert_eq_m128i(r, a);
11840         let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
11841         let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
11842         assert_eq_m128i(r, e);
11843     }
11844 
11845     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_min_epu16()11846     unsafe fn test_mm_maskz_min_epu16() {
11847         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11848         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11849         let r = _mm_maskz_min_epu16(0, a, b);
11850         assert_eq_m128i(r, _mm_setzero_si128());
11851         let r = _mm_maskz_min_epu16(0b00001111, a, b);
11852         let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
11853         assert_eq_m128i(r, e);
11854     }
11855 
11856     #[simd_test(enable = "avx512bw")]
test_mm512_min_epu8()11857     unsafe fn test_mm512_min_epu8() {
11858         #[rustfmt::skip]
11859         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11860                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11861                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11862                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11863         #[rustfmt::skip]
11864         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11865                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11866                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11867                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11868         let r = _mm512_min_epu8(a, b);
11869         #[rustfmt::skip]
11870         let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11871                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11872                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11873                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11874         assert_eq_m512i(r, e);
11875     }
11876 
11877     #[simd_test(enable = "avx512f")]
test_mm512_mask_min_epu8()11878     unsafe fn test_mm512_mask_min_epu8() {
11879         #[rustfmt::skip]
11880         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11881                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11882                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11883                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11884         #[rustfmt::skip]
11885         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11886                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11887                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11888                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11889         let r = _mm512_mask_min_epu8(a, 0, a, b);
11890         assert_eq_m512i(r, a);
11891         let r = _mm512_mask_min_epu8(
11892             a,
11893             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11894             a,
11895             b,
11896         );
11897         #[rustfmt::skip]
11898         let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11899                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11900                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11901                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11902         assert_eq_m512i(r, e);
11903     }
11904 
11905     #[simd_test(enable = "avx512f")]
test_mm512_maskz_min_epu8()11906     unsafe fn test_mm512_maskz_min_epu8() {
11907         #[rustfmt::skip]
11908         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11909                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11910                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11911                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11912         #[rustfmt::skip]
11913         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11914                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11915                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11916                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11917         let r = _mm512_maskz_min_epu8(0, a, b);
11918         assert_eq_m512i(r, _mm512_setzero_si512());
11919         let r = _mm512_maskz_min_epu8(
11920             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11921             a,
11922             b,
11923         );
11924         #[rustfmt::skip]
11925         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
11926                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
11927                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
11928                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
11929         assert_eq_m512i(r, e);
11930     }
11931 
11932     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_min_epu8()11933     unsafe fn test_mm256_mask_min_epu8() {
11934         #[rustfmt::skip]
11935         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11936                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11937         #[rustfmt::skip]
11938         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11939                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11940         let r = _mm256_mask_min_epu8(a, 0, a, b);
11941         assert_eq_m256i(r, a);
11942         let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
11943         #[rustfmt::skip]
11944         let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11945                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11946         assert_eq_m256i(r, e);
11947     }
11948 
11949     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_min_epu8()11950     unsafe fn test_mm256_maskz_min_epu8() {
11951         #[rustfmt::skip]
11952         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11953                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11954         #[rustfmt::skip]
11955         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11956                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11957         let r = _mm256_maskz_min_epu8(0, a, b);
11958         assert_eq_m256i(r, _mm256_setzero_si256());
11959         let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
11960         #[rustfmt::skip]
11961         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
11962                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
11963         assert_eq_m256i(r, e);
11964     }
11965 
11966     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_min_epu8()11967     unsafe fn test_mm_mask_min_epu8() {
11968         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11969         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11970         let r = _mm_mask_min_epu8(a, 0, a, b);
11971         assert_eq_m128i(r, a);
11972         let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
11973         let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
11974         assert_eq_m128i(r, e);
11975     }
11976 
11977     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_min_epu8()11978     unsafe fn test_mm_maskz_min_epu8() {
11979         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11980         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11981         let r = _mm_maskz_min_epu8(0, a, b);
11982         assert_eq_m128i(r, _mm_setzero_si128());
11983         let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
11984         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
11985         assert_eq_m128i(r, e);
11986     }
11987 
11988     #[simd_test(enable = "avx512bw")]
test_mm512_min_epi16()11989     unsafe fn test_mm512_min_epi16() {
11990         #[rustfmt::skip]
11991         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11992                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11993         #[rustfmt::skip]
11994         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11995                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11996         let r = _mm512_min_epi16(a, b);
11997         #[rustfmt::skip]
11998         let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
11999                                  0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12000         assert_eq_m512i(r, e);
12001     }
12002 
12003     #[simd_test(enable = "avx512f")]
test_mm512_mask_min_epi16()12004     unsafe fn test_mm512_mask_min_epi16() {
12005         #[rustfmt::skip]
12006         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12007                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12008         #[rustfmt::skip]
12009         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12010                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12011         let r = _mm512_mask_min_epi16(a, 0, a, b);
12012         assert_eq_m512i(r, a);
12013         let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12014         #[rustfmt::skip]
12015         let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12016                                  0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12017         assert_eq_m512i(r, e);
12018     }
12019 
12020     #[simd_test(enable = "avx512f")]
test_mm512_maskz_min_epi16()12021     unsafe fn test_mm512_maskz_min_epi16() {
12022         #[rustfmt::skip]
12023         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12024                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12025         #[rustfmt::skip]
12026         let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12027                                  15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12028         let r = _mm512_maskz_min_epi16(0, a, b);
12029         assert_eq_m512i(r, _mm512_setzero_si512());
12030         let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
12031         #[rustfmt::skip]
12032         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12033                                  0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12034         assert_eq_m512i(r, e);
12035     }
12036 
12037     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_min_epi16()12038     unsafe fn test_mm256_mask_min_epi16() {
12039         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12040         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12041         let r = _mm256_mask_min_epi16(a, 0, a, b);
12042         assert_eq_m256i(r, a);
12043         let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
12044         let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12045         assert_eq_m256i(r, e);
12046     }
12047 
12048     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_min_epi16()12049     unsafe fn test_mm256_maskz_min_epi16() {
12050         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12051         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12052         let r = _mm256_maskz_min_epi16(0, a, b);
12053         assert_eq_m256i(r, _mm256_setzero_si256());
12054         let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
12055         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12056         assert_eq_m256i(r, e);
12057     }
12058 
12059     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_min_epi16()12060     unsafe fn test_mm_mask_min_epi16() {
12061         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
12062         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
12063         let r = _mm_mask_min_epi16(a, 0, a, b);
12064         assert_eq_m128i(r, a);
12065         let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
12066         let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
12067         assert_eq_m128i(r, e);
12068     }
12069 
12070     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_min_epi16()12071     unsafe fn test_mm_maskz_min_epi16() {
12072         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
12073         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
12074         let r = _mm_maskz_min_epi16(0, a, b);
12075         assert_eq_m128i(r, _mm_setzero_si128());
12076         let r = _mm_maskz_min_epi16(0b00001111, a, b);
12077         let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
12078         assert_eq_m128i(r, e);
12079     }
12080 
12081     #[simd_test(enable = "avx512bw")]
test_mm512_min_epi8()12082     unsafe fn test_mm512_min_epi8() {
12083         #[rustfmt::skip]
12084         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12085                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12086                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12087                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12088         #[rustfmt::skip]
12089         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12090                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12091                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12092                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12093         let r = _mm512_min_epi8(a, b);
12094         #[rustfmt::skip]
12095         let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12096                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12097                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12098                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12099         assert_eq_m512i(r, e);
12100     }
12101 
12102     #[simd_test(enable = "avx512f")]
test_mm512_mask_min_epi8()12103     unsafe fn test_mm512_mask_min_epi8() {
12104         #[rustfmt::skip]
12105         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12106                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12107                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12108                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12109         #[rustfmt::skip]
12110         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12111                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12112                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12113                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12114         let r = _mm512_mask_min_epi8(a, 0, a, b);
12115         assert_eq_m512i(r, a);
12116         let r = _mm512_mask_min_epi8(
12117             a,
12118             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12119             a,
12120             b,
12121         );
12122         #[rustfmt::skip]
12123         let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12124                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12125                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12126                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12127         assert_eq_m512i(r, e);
12128     }
12129 
12130     #[simd_test(enable = "avx512f")]
test_mm512_maskz_min_epi8()12131     unsafe fn test_mm512_maskz_min_epi8() {
12132         #[rustfmt::skip]
12133         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12134                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12135                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12136                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12137         #[rustfmt::skip]
12138         let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12139                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12140                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12141                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12142         let r = _mm512_maskz_min_epi8(0, a, b);
12143         assert_eq_m512i(r, _mm512_setzero_si512());
12144         let r = _mm512_maskz_min_epi8(
12145             0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12146             a,
12147             b,
12148         );
12149         #[rustfmt::skip]
12150         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12151                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12152                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12153                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12154         assert_eq_m512i(r, e);
12155     }
12156 
12157     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_mask_min_epi8()12158     unsafe fn test_mm256_mask_min_epi8() {
12159         #[rustfmt::skip]
12160         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12161                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12162         #[rustfmt::skip]
12163         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12164                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12165         let r = _mm256_mask_min_epi8(a, 0, a, b);
12166         assert_eq_m256i(r, a);
12167         let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12168         #[rustfmt::skip]
12169         let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12170                                 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12171         assert_eq_m256i(r, e);
12172     }
12173 
12174     #[simd_test(enable = "avx512f,avx512vl")]
test_mm256_maskz_min_epi8()12175     unsafe fn test_mm256_maskz_min_epi8() {
12176         #[rustfmt::skip]
12177         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12178                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12179         #[rustfmt::skip]
12180         let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12181                                 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12182         let r = _mm256_maskz_min_epi8(0, a, b);
12183         assert_eq_m256i(r, _mm256_setzero_si256());
12184         let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
12185         #[rustfmt::skip]
12186         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12187                                 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12188         assert_eq_m256i(r, e);
12189     }
12190 
12191     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_mask_min_epi8()12192     unsafe fn test_mm_mask_min_epi8() {
12193         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12194         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12195         let r = _mm_mask_min_epi8(a, 0, a, b);
12196         assert_eq_m128i(r, a);
12197         let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
12198         let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12199         assert_eq_m128i(r, e);
12200     }
12201 
12202     #[simd_test(enable = "avx512f,avx512vl")]
test_mm_maskz_min_epi8()12203     unsafe fn test_mm_maskz_min_epi8() {
12204         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12205         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12206         let r = _mm_maskz_min_epi8(0, a, b);
12207         assert_eq_m128i(r, _mm_setzero_si128());
12208         let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
12209         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12210         assert_eq_m128i(r, e);
12211     }
12212 
12213     #[simd_test(enable = "avx512bw")]
test_mm512_cmplt_epu16_mask()12214     unsafe fn test_mm512_cmplt_epu16_mask() {
12215         let a = _mm512_set1_epi16(-2);
12216         let b = _mm512_set1_epi16(-1);
12217         let m = _mm512_cmplt_epu16_mask(a, b);
12218         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12219     }
12220 
12221     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmplt_epu16_mask()12222     unsafe fn test_mm512_mask_cmplt_epu16_mask() {
12223         let a = _mm512_set1_epi16(-2);
12224         let b = _mm512_set1_epi16(-1);
12225         let mask = 0b01010101_01010101_01010101_01010101;
12226         let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
12227         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12228     }
12229 
12230     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmplt_epu16_mask()12231     unsafe fn test_mm256_cmplt_epu16_mask() {
12232         let a = _mm256_set1_epi16(-2);
12233         let b = _mm256_set1_epi16(-1);
12234         let m = _mm256_cmplt_epu16_mask(a, b);
12235         assert_eq!(m, 0b11111111_11111111);
12236     }
12237 
12238     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmplt_epu16_mask()12239     unsafe fn test_mm256_mask_cmplt_epu16_mask() {
12240         let a = _mm256_set1_epi16(-2);
12241         let b = _mm256_set1_epi16(-1);
12242         let mask = 0b01010101_01010101;
12243         let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
12244         assert_eq!(r, 0b01010101_01010101);
12245     }
12246 
12247     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmplt_epu16_mask()12248     unsafe fn test_mm_cmplt_epu16_mask() {
12249         let a = _mm_set1_epi16(-2);
12250         let b = _mm_set1_epi16(-1);
12251         let m = _mm_cmplt_epu16_mask(a, b);
12252         assert_eq!(m, 0b11111111);
12253     }
12254 
12255     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmplt_epu16_mask()12256     unsafe fn test_mm_mask_cmplt_epu16_mask() {
12257         let a = _mm_set1_epi16(-2);
12258         let b = _mm_set1_epi16(-1);
12259         let mask = 0b01010101;
12260         let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
12261         assert_eq!(r, 0b01010101);
12262     }
12263 
12264     #[simd_test(enable = "avx512bw")]
test_mm512_cmplt_epu8_mask()12265     unsafe fn test_mm512_cmplt_epu8_mask() {
12266         let a = _mm512_set1_epi8(-2);
12267         let b = _mm512_set1_epi8(-1);
12268         let m = _mm512_cmplt_epu8_mask(a, b);
12269         assert_eq!(
12270             m,
12271             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12272         );
12273     }
12274 
12275     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmplt_epu8_mask()12276     unsafe fn test_mm512_mask_cmplt_epu8_mask() {
12277         let a = _mm512_set1_epi8(-2);
12278         let b = _mm512_set1_epi8(-1);
12279         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12280         let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
12281         assert_eq!(
12282             r,
12283             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12284         );
12285     }
12286 
12287     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmplt_epu8_mask()12288     unsafe fn test_mm256_cmplt_epu8_mask() {
12289         let a = _mm256_set1_epi8(-2);
12290         let b = _mm256_set1_epi8(-1);
12291         let m = _mm256_cmplt_epu8_mask(a, b);
12292         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12293     }
12294 
12295     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmplt_epu8_mask()12296     unsafe fn test_mm256_mask_cmplt_epu8_mask() {
12297         let a = _mm256_set1_epi8(-2);
12298         let b = _mm256_set1_epi8(-1);
12299         let mask = 0b01010101_01010101_01010101_01010101;
12300         let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
12301         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12302     }
12303 
12304     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmplt_epu8_mask()12305     unsafe fn test_mm_cmplt_epu8_mask() {
12306         let a = _mm_set1_epi8(-2);
12307         let b = _mm_set1_epi8(-1);
12308         let m = _mm_cmplt_epu8_mask(a, b);
12309         assert_eq!(m, 0b11111111_11111111);
12310     }
12311 
12312     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmplt_epu8_mask()12313     unsafe fn test_mm_mask_cmplt_epu8_mask() {
12314         let a = _mm_set1_epi8(-2);
12315         let b = _mm_set1_epi8(-1);
12316         let mask = 0b01010101_01010101;
12317         let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
12318         assert_eq!(r, 0b01010101_01010101);
12319     }
12320 
12321     #[simd_test(enable = "avx512bw")]
test_mm512_cmplt_epi16_mask()12322     unsafe fn test_mm512_cmplt_epi16_mask() {
12323         let a = _mm512_set1_epi16(-2);
12324         let b = _mm512_set1_epi16(-1);
12325         let m = _mm512_cmplt_epi16_mask(a, b);
12326         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12327     }
12328 
12329     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmplt_epi16_mask()12330     unsafe fn test_mm512_mask_cmplt_epi16_mask() {
12331         let a = _mm512_set1_epi16(-2);
12332         let b = _mm512_set1_epi16(-1);
12333         let mask = 0b01010101_01010101_01010101_01010101;
12334         let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
12335         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12336     }
12337 
12338     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmplt_epi16_mask()12339     unsafe fn test_mm256_cmplt_epi16_mask() {
12340         let a = _mm256_set1_epi16(-2);
12341         let b = _mm256_set1_epi16(-1);
12342         let m = _mm256_cmplt_epi16_mask(a, b);
12343         assert_eq!(m, 0b11111111_11111111);
12344     }
12345 
12346     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmplt_epi16_mask()12347     unsafe fn test_mm256_mask_cmplt_epi16_mask() {
12348         let a = _mm256_set1_epi16(-2);
12349         let b = _mm256_set1_epi16(-1);
12350         let mask = 0b01010101_01010101;
12351         let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
12352         assert_eq!(r, 0b01010101_01010101);
12353     }
12354 
12355     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmplt_epi16_mask()12356     unsafe fn test_mm_cmplt_epi16_mask() {
12357         let a = _mm_set1_epi16(-2);
12358         let b = _mm_set1_epi16(-1);
12359         let m = _mm_cmplt_epi16_mask(a, b);
12360         assert_eq!(m, 0b11111111);
12361     }
12362 
12363     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmplt_epi16_mask()12364     unsafe fn test_mm_mask_cmplt_epi16_mask() {
12365         let a = _mm_set1_epi16(-2);
12366         let b = _mm_set1_epi16(-1);
12367         let mask = 0b01010101;
12368         let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
12369         assert_eq!(r, 0b01010101);
12370     }
12371 
12372     #[simd_test(enable = "avx512bw")]
test_mm512_cmplt_epi8_mask()12373     unsafe fn test_mm512_cmplt_epi8_mask() {
12374         let a = _mm512_set1_epi8(-2);
12375         let b = _mm512_set1_epi8(-1);
12376         let m = _mm512_cmplt_epi8_mask(a, b);
12377         assert_eq!(
12378             m,
12379             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12380         );
12381     }
12382 
12383     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmplt_epi8_mask()12384     unsafe fn test_mm512_mask_cmplt_epi8_mask() {
12385         let a = _mm512_set1_epi8(-2);
12386         let b = _mm512_set1_epi8(-1);
12387         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12388         let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
12389         assert_eq!(
12390             r,
12391             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12392         );
12393     }
12394 
12395     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmplt_epi8_mask()12396     unsafe fn test_mm256_cmplt_epi8_mask() {
12397         let a = _mm256_set1_epi8(-2);
12398         let b = _mm256_set1_epi8(-1);
12399         let m = _mm256_cmplt_epi8_mask(a, b);
12400         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12401     }
12402 
12403     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmplt_epi8_mask()12404     unsafe fn test_mm256_mask_cmplt_epi8_mask() {
12405         let a = _mm256_set1_epi8(-2);
12406         let b = _mm256_set1_epi8(-1);
12407         let mask = 0b01010101_01010101_01010101_01010101;
12408         let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
12409         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12410     }
12411 
12412     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmplt_epi8_mask()12413     unsafe fn test_mm_cmplt_epi8_mask() {
12414         let a = _mm_set1_epi8(-2);
12415         let b = _mm_set1_epi8(-1);
12416         let m = _mm_cmplt_epi8_mask(a, b);
12417         assert_eq!(m, 0b11111111_11111111);
12418     }
12419 
12420     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmplt_epi8_mask()12421     unsafe fn test_mm_mask_cmplt_epi8_mask() {
12422         let a = _mm_set1_epi8(-2);
12423         let b = _mm_set1_epi8(-1);
12424         let mask = 0b01010101_01010101;
12425         let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
12426         assert_eq!(r, 0b01010101_01010101);
12427     }
12428 
12429     #[simd_test(enable = "avx512bw")]
test_mm512_cmpgt_epu16_mask()12430     unsafe fn test_mm512_cmpgt_epu16_mask() {
12431         let a = _mm512_set1_epi16(2);
12432         let b = _mm512_set1_epi16(1);
12433         let m = _mm512_cmpgt_epu16_mask(a, b);
12434         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12435     }
12436 
12437     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpgt_epu16_mask()12438     unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
12439         let a = _mm512_set1_epi16(2);
12440         let b = _mm512_set1_epi16(1);
12441         let mask = 0b01010101_01010101_01010101_01010101;
12442         let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
12443         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12444     }
12445 
12446     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpgt_epu16_mask()12447     unsafe fn test_mm256_cmpgt_epu16_mask() {
12448         let a = _mm256_set1_epi16(2);
12449         let b = _mm256_set1_epi16(1);
12450         let m = _mm256_cmpgt_epu16_mask(a, b);
12451         assert_eq!(m, 0b11111111_11111111);
12452     }
12453 
12454     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpgt_epu16_mask()12455     unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
12456         let a = _mm256_set1_epi16(2);
12457         let b = _mm256_set1_epi16(1);
12458         let mask = 0b01010101_01010101;
12459         let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
12460         assert_eq!(r, 0b01010101_01010101);
12461     }
12462 
12463     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpgt_epu16_mask()12464     unsafe fn test_mm_cmpgt_epu16_mask() {
12465         let a = _mm_set1_epi16(2);
12466         let b = _mm_set1_epi16(1);
12467         let m = _mm_cmpgt_epu16_mask(a, b);
12468         assert_eq!(m, 0b11111111);
12469     }
12470 
12471     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpgt_epu16_mask()12472     unsafe fn test_mm_mask_cmpgt_epu16_mask() {
12473         let a = _mm_set1_epi16(2);
12474         let b = _mm_set1_epi16(1);
12475         let mask = 0b01010101;
12476         let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
12477         assert_eq!(r, 0b01010101);
12478     }
12479 
12480     #[simd_test(enable = "avx512bw")]
test_mm512_cmpgt_epu8_mask()12481     unsafe fn test_mm512_cmpgt_epu8_mask() {
12482         let a = _mm512_set1_epi8(2);
12483         let b = _mm512_set1_epi8(1);
12484         let m = _mm512_cmpgt_epu8_mask(a, b);
12485         assert_eq!(
12486             m,
12487             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12488         );
12489     }
12490 
12491     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpgt_epu8_mask()12492     unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
12493         let a = _mm512_set1_epi8(2);
12494         let b = _mm512_set1_epi8(1);
12495         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12496         let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
12497         assert_eq!(
12498             r,
12499             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12500         );
12501     }
12502 
12503     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpgt_epu8_mask()12504     unsafe fn test_mm256_cmpgt_epu8_mask() {
12505         let a = _mm256_set1_epi8(2);
12506         let b = _mm256_set1_epi8(1);
12507         let m = _mm256_cmpgt_epu8_mask(a, b);
12508         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12509     }
12510 
12511     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpgt_epu8_mask()12512     unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
12513         let a = _mm256_set1_epi8(2);
12514         let b = _mm256_set1_epi8(1);
12515         let mask = 0b01010101_01010101_01010101_01010101;
12516         let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
12517         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12518     }
12519 
12520     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpgt_epu8_mask()12521     unsafe fn test_mm_cmpgt_epu8_mask() {
12522         let a = _mm_set1_epi8(2);
12523         let b = _mm_set1_epi8(1);
12524         let m = _mm_cmpgt_epu8_mask(a, b);
12525         assert_eq!(m, 0b11111111_11111111);
12526     }
12527 
12528     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpgt_epu8_mask()12529     unsafe fn test_mm_mask_cmpgt_epu8_mask() {
12530         let a = _mm_set1_epi8(2);
12531         let b = _mm_set1_epi8(1);
12532         let mask = 0b01010101_01010101;
12533         let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
12534         assert_eq!(r, 0b01010101_01010101);
12535     }
12536 
12537     #[simd_test(enable = "avx512bw")]
test_mm512_cmpgt_epi16_mask()12538     unsafe fn test_mm512_cmpgt_epi16_mask() {
12539         let a = _mm512_set1_epi16(2);
12540         let b = _mm512_set1_epi16(-1);
12541         let m = _mm512_cmpgt_epi16_mask(a, b);
12542         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12543     }
12544 
12545     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpgt_epi16_mask()12546     unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
12547         let a = _mm512_set1_epi16(2);
12548         let b = _mm512_set1_epi16(-1);
12549         let mask = 0b01010101_01010101_01010101_01010101;
12550         let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
12551         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12552     }
12553 
12554     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpgt_epi16_mask()12555     unsafe fn test_mm256_cmpgt_epi16_mask() {
12556         let a = _mm256_set1_epi16(2);
12557         let b = _mm256_set1_epi16(-1);
12558         let m = _mm256_cmpgt_epi16_mask(a, b);
12559         assert_eq!(m, 0b11111111_11111111);
12560     }
12561 
12562     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpgt_epi16_mask()12563     unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
12564         let a = _mm256_set1_epi16(2);
12565         let b = _mm256_set1_epi16(-1);
12566         let mask = 0b001010101_01010101;
12567         let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
12568         assert_eq!(r, 0b01010101_01010101);
12569     }
12570 
12571     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpgt_epi16_mask()12572     unsafe fn test_mm_cmpgt_epi16_mask() {
12573         let a = _mm_set1_epi16(2);
12574         let b = _mm_set1_epi16(-1);
12575         let m = _mm_cmpgt_epi16_mask(a, b);
12576         assert_eq!(m, 0b11111111);
12577     }
12578 
12579     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpgt_epi16_mask()12580     unsafe fn test_mm_mask_cmpgt_epi16_mask() {
12581         let a = _mm_set1_epi16(2);
12582         let b = _mm_set1_epi16(-1);
12583         let mask = 0b01010101;
12584         let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
12585         assert_eq!(r, 0b01010101);
12586     }
12587 
12588     #[simd_test(enable = "avx512bw")]
test_mm512_cmpgt_epi8_mask()12589     unsafe fn test_mm512_cmpgt_epi8_mask() {
12590         let a = _mm512_set1_epi8(2);
12591         let b = _mm512_set1_epi8(-1);
12592         let m = _mm512_cmpgt_epi8_mask(a, b);
12593         assert_eq!(
12594             m,
12595             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12596         );
12597     }
12598 
12599     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpgt_epi8_mask()12600     unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
12601         let a = _mm512_set1_epi8(2);
12602         let b = _mm512_set1_epi8(-1);
12603         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12604         let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
12605         assert_eq!(
12606             r,
12607             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12608         );
12609     }
12610 
12611     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpgt_epi8_mask()12612     unsafe fn test_mm256_cmpgt_epi8_mask() {
12613         let a = _mm256_set1_epi8(2);
12614         let b = _mm256_set1_epi8(-1);
12615         let m = _mm256_cmpgt_epi8_mask(a, b);
12616         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12617     }
12618 
12619     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpgt_epi8_mask()12620     unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
12621         let a = _mm256_set1_epi8(2);
12622         let b = _mm256_set1_epi8(-1);
12623         let mask = 0b01010101_01010101_01010101_01010101;
12624         let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
12625         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12626     }
12627 
12628     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpgt_epi8_mask()12629     unsafe fn test_mm_cmpgt_epi8_mask() {
12630         let a = _mm_set1_epi8(2);
12631         let b = _mm_set1_epi8(-1);
12632         let m = _mm_cmpgt_epi8_mask(a, b);
12633         assert_eq!(m, 0b11111111_11111111);
12634     }
12635 
12636     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpgt_epi8_mask()12637     unsafe fn test_mm_mask_cmpgt_epi8_mask() {
12638         let a = _mm_set1_epi8(2);
12639         let b = _mm_set1_epi8(-1);
12640         let mask = 0b01010101_01010101;
12641         let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
12642         assert_eq!(r, 0b01010101_01010101);
12643     }
12644 
12645     #[simd_test(enable = "avx512bw")]
test_mm512_cmple_epu16_mask()12646     unsafe fn test_mm512_cmple_epu16_mask() {
12647         let a = _mm512_set1_epi16(-1);
12648         let b = _mm512_set1_epi16(-1);
12649         let m = _mm512_cmple_epu16_mask(a, b);
12650         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12651     }
12652 
12653     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmple_epu16_mask()12654     unsafe fn test_mm512_mask_cmple_epu16_mask() {
12655         let a = _mm512_set1_epi16(-1);
12656         let b = _mm512_set1_epi16(-1);
12657         let mask = 0b01010101_01010101_01010101_01010101;
12658         let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
12659         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12660     }
12661 
12662     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmple_epu16_mask()12663     unsafe fn test_mm256_cmple_epu16_mask() {
12664         let a = _mm256_set1_epi16(-1);
12665         let b = _mm256_set1_epi16(-1);
12666         let m = _mm256_cmple_epu16_mask(a, b);
12667         assert_eq!(m, 0b11111111_11111111);
12668     }
12669 
12670     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmple_epu16_mask()12671     unsafe fn test_mm256_mask_cmple_epu16_mask() {
12672         let a = _mm256_set1_epi16(-1);
12673         let b = _mm256_set1_epi16(-1);
12674         let mask = 0b01010101_01010101;
12675         let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
12676         assert_eq!(r, 0b01010101_01010101);
12677     }
12678 
12679     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmple_epu16_mask()12680     unsafe fn test_mm_cmple_epu16_mask() {
12681         let a = _mm_set1_epi16(-1);
12682         let b = _mm_set1_epi16(-1);
12683         let m = _mm_cmple_epu16_mask(a, b);
12684         assert_eq!(m, 0b11111111);
12685     }
12686 
12687     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmple_epu16_mask()12688     unsafe fn test_mm_mask_cmple_epu16_mask() {
12689         let a = _mm_set1_epi16(-1);
12690         let b = _mm_set1_epi16(-1);
12691         let mask = 0b01010101;
12692         let r = _mm_mask_cmple_epu16_mask(mask, a, b);
12693         assert_eq!(r, 0b01010101);
12694     }
12695 
12696     #[simd_test(enable = "avx512bw")]
test_mm512_cmple_epu8_mask()12697     unsafe fn test_mm512_cmple_epu8_mask() {
12698         let a = _mm512_set1_epi8(-1);
12699         let b = _mm512_set1_epi8(-1);
12700         let m = _mm512_cmple_epu8_mask(a, b);
12701         assert_eq!(
12702             m,
12703             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12704         );
12705     }
12706 
12707     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmple_epu8_mask()12708     unsafe fn test_mm512_mask_cmple_epu8_mask() {
12709         let a = _mm512_set1_epi8(-1);
12710         let b = _mm512_set1_epi8(-1);
12711         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12712         let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
12713         assert_eq!(
12714             r,
12715             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12716         );
12717     }
12718 
12719     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmple_epu8_mask()12720     unsafe fn test_mm256_cmple_epu8_mask() {
12721         let a = _mm256_set1_epi8(-1);
12722         let b = _mm256_set1_epi8(-1);
12723         let m = _mm256_cmple_epu8_mask(a, b);
12724         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12725     }
12726 
12727     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmple_epu8_mask()12728     unsafe fn test_mm256_mask_cmple_epu8_mask() {
12729         let a = _mm256_set1_epi8(-1);
12730         let b = _mm256_set1_epi8(-1);
12731         let mask = 0b01010101_01010101_01010101_01010101;
12732         let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
12733         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12734     }
12735 
12736     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmple_epu8_mask()12737     unsafe fn test_mm_cmple_epu8_mask() {
12738         let a = _mm_set1_epi8(-1);
12739         let b = _mm_set1_epi8(-1);
12740         let m = _mm_cmple_epu8_mask(a, b);
12741         assert_eq!(m, 0b11111111_11111111);
12742     }
12743 
12744     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmple_epu8_mask()12745     unsafe fn test_mm_mask_cmple_epu8_mask() {
12746         let a = _mm_set1_epi8(-1);
12747         let b = _mm_set1_epi8(-1);
12748         let mask = 0b01010101_01010101;
12749         let r = _mm_mask_cmple_epu8_mask(mask, a, b);
12750         assert_eq!(r, 0b01010101_01010101);
12751     }
12752 
12753     #[simd_test(enable = "avx512bw")]
test_mm512_cmple_epi16_mask()12754     unsafe fn test_mm512_cmple_epi16_mask() {
12755         let a = _mm512_set1_epi16(-1);
12756         let b = _mm512_set1_epi16(-1);
12757         let m = _mm512_cmple_epi16_mask(a, b);
12758         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12759     }
12760 
12761     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmple_epi16_mask()12762     unsafe fn test_mm512_mask_cmple_epi16_mask() {
12763         let a = _mm512_set1_epi16(-1);
12764         let b = _mm512_set1_epi16(-1);
12765         let mask = 0b01010101_01010101_01010101_01010101;
12766         let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
12767         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12768     }
12769 
12770     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmple_epi16_mask()12771     unsafe fn test_mm256_cmple_epi16_mask() {
12772         let a = _mm256_set1_epi16(-1);
12773         let b = _mm256_set1_epi16(-1);
12774         let m = _mm256_cmple_epi16_mask(a, b);
12775         assert_eq!(m, 0b11111111_11111111);
12776     }
12777 
12778     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmple_epi16_mask()12779     unsafe fn test_mm256_mask_cmple_epi16_mask() {
12780         let a = _mm256_set1_epi16(-1);
12781         let b = _mm256_set1_epi16(-1);
12782         let mask = 0b01010101_01010101;
12783         let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
12784         assert_eq!(r, 0b01010101_01010101);
12785     }
12786 
12787     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmple_epi16_mask()12788     unsafe fn test_mm_cmple_epi16_mask() {
12789         let a = _mm_set1_epi16(-1);
12790         let b = _mm_set1_epi16(-1);
12791         let m = _mm_cmple_epi16_mask(a, b);
12792         assert_eq!(m, 0b11111111);
12793     }
12794 
12795     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmple_epi16_mask()12796     unsafe fn test_mm_mask_cmple_epi16_mask() {
12797         let a = _mm_set1_epi16(-1);
12798         let b = _mm_set1_epi16(-1);
12799         let mask = 0b01010101;
12800         let r = _mm_mask_cmple_epi16_mask(mask, a, b);
12801         assert_eq!(r, 0b01010101);
12802     }
12803 
12804     #[simd_test(enable = "avx512bw")]
test_mm512_cmple_epi8_mask()12805     unsafe fn test_mm512_cmple_epi8_mask() {
12806         let a = _mm512_set1_epi8(-1);
12807         let b = _mm512_set1_epi8(-1);
12808         let m = _mm512_cmple_epi8_mask(a, b);
12809         assert_eq!(
12810             m,
12811             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12812         );
12813     }
12814 
12815     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmple_epi8_mask()12816     unsafe fn test_mm512_mask_cmple_epi8_mask() {
12817         let a = _mm512_set1_epi8(-1);
12818         let b = _mm512_set1_epi8(-1);
12819         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12820         let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
12821         assert_eq!(
12822             r,
12823             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12824         );
12825     }
12826 
12827     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmple_epi8_mask()12828     unsafe fn test_mm256_cmple_epi8_mask() {
12829         let a = _mm256_set1_epi8(-1);
12830         let b = _mm256_set1_epi8(-1);
12831         let m = _mm256_cmple_epi8_mask(a, b);
12832         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12833     }
12834 
12835     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmple_epi8_mask()12836     unsafe fn test_mm256_mask_cmple_epi8_mask() {
12837         let a = _mm256_set1_epi8(-1);
12838         let b = _mm256_set1_epi8(-1);
12839         let mask = 0b01010101_01010101_01010101_01010101;
12840         let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
12841         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12842     }
12843 
12844     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmple_epi8_mask()12845     unsafe fn test_mm_cmple_epi8_mask() {
12846         let a = _mm_set1_epi8(-1);
12847         let b = _mm_set1_epi8(-1);
12848         let m = _mm_cmple_epi8_mask(a, b);
12849         assert_eq!(m, 0b11111111_11111111);
12850     }
12851 
12852     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmple_epi8_mask()12853     unsafe fn test_mm_mask_cmple_epi8_mask() {
12854         let a = _mm_set1_epi8(-1);
12855         let b = _mm_set1_epi8(-1);
12856         let mask = 0b01010101_01010101;
12857         let r = _mm_mask_cmple_epi8_mask(mask, a, b);
12858         assert_eq!(r, 0b01010101_01010101);
12859     }
12860 
12861     #[simd_test(enable = "avx512bw")]
test_mm512_cmpge_epu16_mask()12862     unsafe fn test_mm512_cmpge_epu16_mask() {
12863         let a = _mm512_set1_epi16(1);
12864         let b = _mm512_set1_epi16(1);
12865         let m = _mm512_cmpge_epu16_mask(a, b);
12866         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12867     }
12868 
12869     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpge_epu16_mask()12870     unsafe fn test_mm512_mask_cmpge_epu16_mask() {
12871         let a = _mm512_set1_epi16(1);
12872         let b = _mm512_set1_epi16(1);
12873         let mask = 0b01010101_01010101_01010101_01010101;
12874         let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
12875         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12876     }
12877 
12878     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpge_epu16_mask()12879     unsafe fn test_mm256_cmpge_epu16_mask() {
12880         let a = _mm256_set1_epi16(1);
12881         let b = _mm256_set1_epi16(1);
12882         let m = _mm256_cmpge_epu16_mask(a, b);
12883         assert_eq!(m, 0b11111111_11111111);
12884     }
12885 
12886     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpge_epu16_mask()12887     unsafe fn test_mm256_mask_cmpge_epu16_mask() {
12888         let a = _mm256_set1_epi16(1);
12889         let b = _mm256_set1_epi16(1);
12890         let mask = 0b01010101_01010101;
12891         let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
12892         assert_eq!(r, 0b01010101_01010101);
12893     }
12894 
12895     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpge_epu16_mask()12896     unsafe fn test_mm_cmpge_epu16_mask() {
12897         let a = _mm_set1_epi16(1);
12898         let b = _mm_set1_epi16(1);
12899         let m = _mm_cmpge_epu16_mask(a, b);
12900         assert_eq!(m, 0b11111111);
12901     }
12902 
12903     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpge_epu16_mask()12904     unsafe fn test_mm_mask_cmpge_epu16_mask() {
12905         let a = _mm_set1_epi16(1);
12906         let b = _mm_set1_epi16(1);
12907         let mask = 0b01010101;
12908         let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
12909         assert_eq!(r, 0b01010101);
12910     }
12911 
12912     #[simd_test(enable = "avx512bw")]
test_mm512_cmpge_epu8_mask()12913     unsafe fn test_mm512_cmpge_epu8_mask() {
12914         let a = _mm512_set1_epi8(1);
12915         let b = _mm512_set1_epi8(1);
12916         let m = _mm512_cmpge_epu8_mask(a, b);
12917         assert_eq!(
12918             m,
12919             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12920         );
12921     }
12922 
12923     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpge_epu8_mask()12924     unsafe fn test_mm512_mask_cmpge_epu8_mask() {
12925         let a = _mm512_set1_epi8(1);
12926         let b = _mm512_set1_epi8(1);
12927         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12928         let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
12929         assert_eq!(
12930             r,
12931             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12932         );
12933     }
12934 
12935     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpge_epu8_mask()12936     unsafe fn test_mm256_cmpge_epu8_mask() {
12937         let a = _mm256_set1_epi8(1);
12938         let b = _mm256_set1_epi8(1);
12939         let m = _mm256_cmpge_epu8_mask(a, b);
12940         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12941     }
12942 
12943     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpge_epu8_mask()12944     unsafe fn test_mm256_mask_cmpge_epu8_mask() {
12945         let a = _mm256_set1_epi8(1);
12946         let b = _mm256_set1_epi8(1);
12947         let mask = 0b01010101_01010101_01010101_01010101;
12948         let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
12949         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12950     }
12951 
12952     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpge_epu8_mask()12953     unsafe fn test_mm_cmpge_epu8_mask() {
12954         let a = _mm_set1_epi8(1);
12955         let b = _mm_set1_epi8(1);
12956         let m = _mm_cmpge_epu8_mask(a, b);
12957         assert_eq!(m, 0b11111111_11111111);
12958     }
12959 
12960     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpge_epu8_mask()12961     unsafe fn test_mm_mask_cmpge_epu8_mask() {
12962         let a = _mm_set1_epi8(1);
12963         let b = _mm_set1_epi8(1);
12964         let mask = 0b01010101_01010101;
12965         let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
12966         assert_eq!(r, 0b01010101_01010101);
12967     }
12968 
12969     #[simd_test(enable = "avx512bw")]
test_mm512_cmpge_epi16_mask()12970     unsafe fn test_mm512_cmpge_epi16_mask() {
12971         let a = _mm512_set1_epi16(-1);
12972         let b = _mm512_set1_epi16(-1);
12973         let m = _mm512_cmpge_epi16_mask(a, b);
12974         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12975     }
12976 
12977     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpge_epi16_mask()12978     unsafe fn test_mm512_mask_cmpge_epi16_mask() {
12979         let a = _mm512_set1_epi16(-1);
12980         let b = _mm512_set1_epi16(-1);
12981         let mask = 0b01010101_01010101_01010101_01010101;
12982         let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
12983         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12984     }
12985 
12986     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpge_epi16_mask()12987     unsafe fn test_mm256_cmpge_epi16_mask() {
12988         let a = _mm256_set1_epi16(-1);
12989         let b = _mm256_set1_epi16(-1);
12990         let m = _mm256_cmpge_epi16_mask(a, b);
12991         assert_eq!(m, 0b11111111_11111111);
12992     }
12993 
12994     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpge_epi16_mask()12995     unsafe fn test_mm256_mask_cmpge_epi16_mask() {
12996         let a = _mm256_set1_epi16(-1);
12997         let b = _mm256_set1_epi16(-1);
12998         let mask = 0b01010101_01010101;
12999         let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
13000         assert_eq!(r, 0b01010101_01010101);
13001     }
13002 
13003     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpge_epi16_mask()13004     unsafe fn test_mm_cmpge_epi16_mask() {
13005         let a = _mm_set1_epi16(-1);
13006         let b = _mm_set1_epi16(-1);
13007         let m = _mm_cmpge_epi16_mask(a, b);
13008         assert_eq!(m, 0b11111111);
13009     }
13010 
13011     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpge_epi16_mask()13012     unsafe fn test_mm_mask_cmpge_epi16_mask() {
13013         let a = _mm_set1_epi16(-1);
13014         let b = _mm_set1_epi16(-1);
13015         let mask = 0b01010101;
13016         let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
13017         assert_eq!(r, 0b01010101);
13018     }
13019 
13020     #[simd_test(enable = "avx512bw")]
test_mm512_cmpge_epi8_mask()13021     unsafe fn test_mm512_cmpge_epi8_mask() {
13022         let a = _mm512_set1_epi8(-1);
13023         let b = _mm512_set1_epi8(-1);
13024         let m = _mm512_cmpge_epi8_mask(a, b);
13025         assert_eq!(
13026             m,
13027             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13028         );
13029     }
13030 
13031     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpge_epi8_mask()13032     unsafe fn test_mm512_mask_cmpge_epi8_mask() {
13033         let a = _mm512_set1_epi8(-1);
13034         let b = _mm512_set1_epi8(-1);
13035         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13036         let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
13037         assert_eq!(
13038             r,
13039             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13040         );
13041     }
13042 
13043     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpge_epi8_mask()13044     unsafe fn test_mm256_cmpge_epi8_mask() {
13045         let a = _mm256_set1_epi8(-1);
13046         let b = _mm256_set1_epi8(-1);
13047         let m = _mm256_cmpge_epi8_mask(a, b);
13048         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13049     }
13050 
13051     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpge_epi8_mask()13052     unsafe fn test_mm256_mask_cmpge_epi8_mask() {
13053         let a = _mm256_set1_epi8(-1);
13054         let b = _mm256_set1_epi8(-1);
13055         let mask = 0b01010101_01010101_01010101_01010101;
13056         let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
13057         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13058     }
13059 
13060     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpge_epi8_mask()13061     unsafe fn test_mm_cmpge_epi8_mask() {
13062         let a = _mm_set1_epi8(-1);
13063         let b = _mm_set1_epi8(-1);
13064         let m = _mm_cmpge_epi8_mask(a, b);
13065         assert_eq!(m, 0b11111111_11111111);
13066     }
13067 
13068     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpge_epi8_mask()13069     unsafe fn test_mm_mask_cmpge_epi8_mask() {
13070         let a = _mm_set1_epi8(-1);
13071         let b = _mm_set1_epi8(-1);
13072         let mask = 0b01010101_01010101;
13073         let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
13074         assert_eq!(r, 0b01010101_01010101);
13075     }
13076 
13077     #[simd_test(enable = "avx512bw")]
test_mm512_cmpeq_epu16_mask()13078     unsafe fn test_mm512_cmpeq_epu16_mask() {
13079         let a = _mm512_set1_epi16(1);
13080         let b = _mm512_set1_epi16(1);
13081         let m = _mm512_cmpeq_epu16_mask(a, b);
13082         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13083     }
13084 
13085     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpeq_epu16_mask()13086     unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
13087         let a = _mm512_set1_epi16(1);
13088         let b = _mm512_set1_epi16(1);
13089         let mask = 0b01010101_01010101_01010101_01010101;
13090         let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
13091         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13092     }
13093 
13094     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpeq_epu16_mask()13095     unsafe fn test_mm256_cmpeq_epu16_mask() {
13096         let a = _mm256_set1_epi16(1);
13097         let b = _mm256_set1_epi16(1);
13098         let m = _mm256_cmpeq_epu16_mask(a, b);
13099         assert_eq!(m, 0b11111111_11111111);
13100     }
13101 
13102     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpeq_epu16_mask()13103     unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
13104         let a = _mm256_set1_epi16(1);
13105         let b = _mm256_set1_epi16(1);
13106         let mask = 0b01010101_01010101;
13107         let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
13108         assert_eq!(r, 0b01010101_01010101);
13109     }
13110 
13111     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpeq_epu16_mask()13112     unsafe fn test_mm_cmpeq_epu16_mask() {
13113         let a = _mm_set1_epi16(1);
13114         let b = _mm_set1_epi16(1);
13115         let m = _mm_cmpeq_epu16_mask(a, b);
13116         assert_eq!(m, 0b11111111);
13117     }
13118 
13119     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpeq_epu16_mask()13120     unsafe fn test_mm_mask_cmpeq_epu16_mask() {
13121         let a = _mm_set1_epi16(1);
13122         let b = _mm_set1_epi16(1);
13123         let mask = 0b01010101;
13124         let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
13125         assert_eq!(r, 0b01010101);
13126     }
13127 
13128     #[simd_test(enable = "avx512bw")]
test_mm512_cmpeq_epu8_mask()13129     unsafe fn test_mm512_cmpeq_epu8_mask() {
13130         let a = _mm512_set1_epi8(1);
13131         let b = _mm512_set1_epi8(1);
13132         let m = _mm512_cmpeq_epu8_mask(a, b);
13133         assert_eq!(
13134             m,
13135             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13136         );
13137     }
13138 
13139     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpeq_epu8_mask()13140     unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
13141         let a = _mm512_set1_epi8(1);
13142         let b = _mm512_set1_epi8(1);
13143         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13144         let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
13145         assert_eq!(
13146             r,
13147             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13148         );
13149     }
13150 
13151     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpeq_epu8_mask()13152     unsafe fn test_mm256_cmpeq_epu8_mask() {
13153         let a = _mm256_set1_epi8(1);
13154         let b = _mm256_set1_epi8(1);
13155         let m = _mm256_cmpeq_epu8_mask(a, b);
13156         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13157     }
13158 
13159     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpeq_epu8_mask()13160     unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
13161         let a = _mm256_set1_epi8(1);
13162         let b = _mm256_set1_epi8(1);
13163         let mask = 0b01010101_01010101_01010101_01010101;
13164         let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
13165         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13166     }
13167 
13168     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpeq_epu8_mask()13169     unsafe fn test_mm_cmpeq_epu8_mask() {
13170         let a = _mm_set1_epi8(1);
13171         let b = _mm_set1_epi8(1);
13172         let m = _mm_cmpeq_epu8_mask(a, b);
13173         assert_eq!(m, 0b11111111_11111111);
13174     }
13175 
13176     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpeq_epu8_mask()13177     unsafe fn test_mm_mask_cmpeq_epu8_mask() {
13178         let a = _mm_set1_epi8(1);
13179         let b = _mm_set1_epi8(1);
13180         let mask = 0b01010101_01010101;
13181         let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
13182         assert_eq!(r, 0b01010101_01010101);
13183     }
13184 
13185     #[simd_test(enable = "avx512bw")]
test_mm512_cmpeq_epi16_mask()13186     unsafe fn test_mm512_cmpeq_epi16_mask() {
13187         let a = _mm512_set1_epi16(-1);
13188         let b = _mm512_set1_epi16(-1);
13189         let m = _mm512_cmpeq_epi16_mask(a, b);
13190         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13191     }
13192 
13193     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpeq_epi16_mask()13194     unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
13195         let a = _mm512_set1_epi16(-1);
13196         let b = _mm512_set1_epi16(-1);
13197         let mask = 0b01010101_01010101_01010101_01010101;
13198         let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
13199         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13200     }
13201 
13202     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpeq_epi16_mask()13203     unsafe fn test_mm256_cmpeq_epi16_mask() {
13204         let a = _mm256_set1_epi16(-1);
13205         let b = _mm256_set1_epi16(-1);
13206         let m = _mm256_cmpeq_epi16_mask(a, b);
13207         assert_eq!(m, 0b11111111_11111111);
13208     }
13209 
13210     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpeq_epi16_mask()13211     unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
13212         let a = _mm256_set1_epi16(-1);
13213         let b = _mm256_set1_epi16(-1);
13214         let mask = 0b01010101_01010101;
13215         let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
13216         assert_eq!(r, 0b01010101_01010101);
13217     }
13218 
13219     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpeq_epi16_mask()13220     unsafe fn test_mm_cmpeq_epi16_mask() {
13221         let a = _mm_set1_epi16(-1);
13222         let b = _mm_set1_epi16(-1);
13223         let m = _mm_cmpeq_epi16_mask(a, b);
13224         assert_eq!(m, 0b11111111);
13225     }
13226 
13227     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpeq_epi16_mask()13228     unsafe fn test_mm_mask_cmpeq_epi16_mask() {
13229         let a = _mm_set1_epi16(-1);
13230         let b = _mm_set1_epi16(-1);
13231         let mask = 0b01010101;
13232         let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
13233         assert_eq!(r, 0b01010101);
13234     }
13235 
13236     #[simd_test(enable = "avx512bw")]
test_mm512_cmpeq_epi8_mask()13237     unsafe fn test_mm512_cmpeq_epi8_mask() {
13238         let a = _mm512_set1_epi8(-1);
13239         let b = _mm512_set1_epi8(-1);
13240         let m = _mm512_cmpeq_epi8_mask(a, b);
13241         assert_eq!(
13242             m,
13243             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13244         );
13245     }
13246 
13247     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpeq_epi8_mask()13248     unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
13249         let a = _mm512_set1_epi8(-1);
13250         let b = _mm512_set1_epi8(-1);
13251         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13252         let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
13253         assert_eq!(
13254             r,
13255             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13256         );
13257     }
13258 
13259     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpeq_epi8_mask()13260     unsafe fn test_mm256_cmpeq_epi8_mask() {
13261         let a = _mm256_set1_epi8(-1);
13262         let b = _mm256_set1_epi8(-1);
13263         let m = _mm256_cmpeq_epi8_mask(a, b);
13264         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13265     }
13266 
13267     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpeq_epi8_mask()13268     unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
13269         let a = _mm256_set1_epi8(-1);
13270         let b = _mm256_set1_epi8(-1);
13271         let mask = 0b01010101_01010101_01010101_01010101;
13272         let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
13273         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13274     }
13275 
13276     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpeq_epi8_mask()13277     unsafe fn test_mm_cmpeq_epi8_mask() {
13278         let a = _mm_set1_epi8(-1);
13279         let b = _mm_set1_epi8(-1);
13280         let m = _mm_cmpeq_epi8_mask(a, b);
13281         assert_eq!(m, 0b11111111_11111111);
13282     }
13283 
13284     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpeq_epi8_mask()13285     unsafe fn test_mm_mask_cmpeq_epi8_mask() {
13286         let a = _mm_set1_epi8(-1);
13287         let b = _mm_set1_epi8(-1);
13288         let mask = 0b01010101_01010101;
13289         let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
13290         assert_eq!(r, 0b01010101_01010101);
13291     }
13292 
13293     #[simd_test(enable = "avx512bw")]
test_mm512_cmpneq_epu16_mask()13294     unsafe fn test_mm512_cmpneq_epu16_mask() {
13295         let a = _mm512_set1_epi16(2);
13296         let b = _mm512_set1_epi16(1);
13297         let m = _mm512_cmpneq_epu16_mask(a, b);
13298         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13299     }
13300 
13301     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpneq_epu16_mask()13302     unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
13303         let a = _mm512_set1_epi16(2);
13304         let b = _mm512_set1_epi16(1);
13305         let mask = 0b01010101_01010101_01010101_01010101;
13306         let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
13307         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13308     }
13309 
13310     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpneq_epu16_mask()13311     unsafe fn test_mm256_cmpneq_epu16_mask() {
13312         let a = _mm256_set1_epi16(2);
13313         let b = _mm256_set1_epi16(1);
13314         let m = _mm256_cmpneq_epu16_mask(a, b);
13315         assert_eq!(m, 0b11111111_11111111);
13316     }
13317 
13318     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpneq_epu16_mask()13319     unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
13320         let a = _mm256_set1_epi16(2);
13321         let b = _mm256_set1_epi16(1);
13322         let mask = 0b01010101_01010101;
13323         let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
13324         assert_eq!(r, 0b01010101_01010101);
13325     }
13326 
13327     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpneq_epu16_mask()13328     unsafe fn test_mm_cmpneq_epu16_mask() {
13329         let a = _mm_set1_epi16(2);
13330         let b = _mm_set1_epi16(1);
13331         let m = _mm_cmpneq_epu16_mask(a, b);
13332         assert_eq!(m, 0b11111111);
13333     }
13334 
13335     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpneq_epu16_mask()13336     unsafe fn test_mm_mask_cmpneq_epu16_mask() {
13337         let a = _mm_set1_epi16(2);
13338         let b = _mm_set1_epi16(1);
13339         let mask = 0b01010101;
13340         let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
13341         assert_eq!(r, 0b01010101);
13342     }
13343 
13344     #[simd_test(enable = "avx512bw")]
test_mm512_cmpneq_epu8_mask()13345     unsafe fn test_mm512_cmpneq_epu8_mask() {
13346         let a = _mm512_set1_epi8(2);
13347         let b = _mm512_set1_epi8(1);
13348         let m = _mm512_cmpneq_epu8_mask(a, b);
13349         assert_eq!(
13350             m,
13351             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13352         );
13353     }
13354 
13355     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpneq_epu8_mask()13356     unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
13357         let a = _mm512_set1_epi8(2);
13358         let b = _mm512_set1_epi8(1);
13359         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13360         let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
13361         assert_eq!(
13362             r,
13363             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13364         );
13365     }
13366 
13367     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpneq_epu8_mask()13368     unsafe fn test_mm256_cmpneq_epu8_mask() {
13369         let a = _mm256_set1_epi8(2);
13370         let b = _mm256_set1_epi8(1);
13371         let m = _mm256_cmpneq_epu8_mask(a, b);
13372         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13373     }
13374 
13375     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpneq_epu8_mask()13376     unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
13377         let a = _mm256_set1_epi8(2);
13378         let b = _mm256_set1_epi8(1);
13379         let mask = 0b01010101_01010101_01010101_01010101;
13380         let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
13381         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13382     }
13383 
13384     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpneq_epu8_mask()13385     unsafe fn test_mm_cmpneq_epu8_mask() {
13386         let a = _mm_set1_epi8(2);
13387         let b = _mm_set1_epi8(1);
13388         let m = _mm_cmpneq_epu8_mask(a, b);
13389         assert_eq!(m, 0b11111111_11111111);
13390     }
13391 
13392     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpneq_epu8_mask()13393     unsafe fn test_mm_mask_cmpneq_epu8_mask() {
13394         let a = _mm_set1_epi8(2);
13395         let b = _mm_set1_epi8(1);
13396         let mask = 0b01010101_01010101;
13397         let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
13398         assert_eq!(r, 0b01010101_01010101);
13399     }
13400 
13401     #[simd_test(enable = "avx512bw")]
test_mm512_cmpneq_epi16_mask()13402     unsafe fn test_mm512_cmpneq_epi16_mask() {
13403         let a = _mm512_set1_epi16(1);
13404         let b = _mm512_set1_epi16(-1);
13405         let m = _mm512_cmpneq_epi16_mask(a, b);
13406         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13407     }
13408 
13409     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpneq_epi16_mask()13410     unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
13411         let a = _mm512_set1_epi16(1);
13412         let b = _mm512_set1_epi16(-1);
13413         let mask = 0b01010101_01010101_01010101_01010101;
13414         let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
13415         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13416     }
13417 
13418     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpneq_epi16_mask()13419     unsafe fn test_mm256_cmpneq_epi16_mask() {
13420         let a = _mm256_set1_epi16(1);
13421         let b = _mm256_set1_epi16(-1);
13422         let m = _mm256_cmpneq_epi16_mask(a, b);
13423         assert_eq!(m, 0b11111111_11111111);
13424     }
13425 
13426     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpneq_epi16_mask()13427     unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
13428         let a = _mm256_set1_epi16(1);
13429         let b = _mm256_set1_epi16(-1);
13430         let mask = 0b01010101_01010101;
13431         let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
13432         assert_eq!(r, 0b01010101_01010101);
13433     }
13434 
13435     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpneq_epi16_mask()13436     unsafe fn test_mm_cmpneq_epi16_mask() {
13437         let a = _mm_set1_epi16(1);
13438         let b = _mm_set1_epi16(-1);
13439         let m = _mm_cmpneq_epi16_mask(a, b);
13440         assert_eq!(m, 0b11111111);
13441     }
13442 
13443     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpneq_epi16_mask()13444     unsafe fn test_mm_mask_cmpneq_epi16_mask() {
13445         let a = _mm_set1_epi16(1);
13446         let b = _mm_set1_epi16(-1);
13447         let mask = 0b01010101;
13448         let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
13449         assert_eq!(r, 0b01010101);
13450     }
13451 
13452     #[simd_test(enable = "avx512bw")]
test_mm512_cmpneq_epi8_mask()13453     unsafe fn test_mm512_cmpneq_epi8_mask() {
13454         let a = _mm512_set1_epi8(1);
13455         let b = _mm512_set1_epi8(-1);
13456         let m = _mm512_cmpneq_epi8_mask(a, b);
13457         assert_eq!(
13458             m,
13459             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13460         );
13461     }
13462 
13463     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmpneq_epi8_mask()13464     unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
13465         let a = _mm512_set1_epi8(1);
13466         let b = _mm512_set1_epi8(-1);
13467         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13468         let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
13469         assert_eq!(
13470             r,
13471             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13472         );
13473     }
13474 
13475     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmpneq_epi8_mask()13476     unsafe fn test_mm256_cmpneq_epi8_mask() {
13477         let a = _mm256_set1_epi8(1);
13478         let b = _mm256_set1_epi8(-1);
13479         let m = _mm256_cmpneq_epi8_mask(a, b);
13480         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13481     }
13482 
13483     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmpneq_epi8_mask()13484     unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
13485         let a = _mm256_set1_epi8(1);
13486         let b = _mm256_set1_epi8(-1);
13487         let mask = 0b01010101_01010101_01010101_01010101;
13488         let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
13489         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13490     }
13491 
13492     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmpneq_epi8_mask()13493     unsafe fn test_mm_cmpneq_epi8_mask() {
13494         let a = _mm_set1_epi8(1);
13495         let b = _mm_set1_epi8(-1);
13496         let m = _mm_cmpneq_epi8_mask(a, b);
13497         assert_eq!(m, 0b11111111_11111111);
13498     }
13499 
13500     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmpneq_epi8_mask()13501     unsafe fn test_mm_mask_cmpneq_epi8_mask() {
13502         let a = _mm_set1_epi8(1);
13503         let b = _mm_set1_epi8(-1);
13504         let mask = 0b01010101_01010101;
13505         let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
13506         assert_eq!(r, 0b01010101_01010101);
13507     }
13508 
13509     #[simd_test(enable = "avx512bw")]
test_mm512_cmp_epu16_mask()13510     unsafe fn test_mm512_cmp_epu16_mask() {
13511         let a = _mm512_set1_epi16(0);
13512         let b = _mm512_set1_epi16(1);
13513         let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
13514         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13515     }
13516 
13517     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmp_epu16_mask()13518     unsafe fn test_mm512_mask_cmp_epu16_mask() {
13519         let a = _mm512_set1_epi16(0);
13520         let b = _mm512_set1_epi16(1);
13521         let mask = 0b01010101_01010101_01010101_01010101;
13522         let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
13523         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13524     }
13525 
13526     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmp_epu16_mask()13527     unsafe fn test_mm256_cmp_epu16_mask() {
13528         let a = _mm256_set1_epi16(0);
13529         let b = _mm256_set1_epi16(1);
13530         let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
13531         assert_eq!(m, 0b11111111_11111111);
13532     }
13533 
13534     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmp_epu16_mask()13535     unsafe fn test_mm256_mask_cmp_epu16_mask() {
13536         let a = _mm256_set1_epi16(0);
13537         let b = _mm256_set1_epi16(1);
13538         let mask = 0b01010101_01010101;
13539         let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
13540         assert_eq!(r, 0b01010101_01010101);
13541     }
13542 
13543     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmp_epu16_mask()13544     unsafe fn test_mm_cmp_epu16_mask() {
13545         let a = _mm_set1_epi16(0);
13546         let b = _mm_set1_epi16(1);
13547         let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
13548         assert_eq!(m, 0b11111111);
13549     }
13550 
13551     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmp_epu16_mask()13552     unsafe fn test_mm_mask_cmp_epu16_mask() {
13553         let a = _mm_set1_epi16(0);
13554         let b = _mm_set1_epi16(1);
13555         let mask = 0b01010101;
13556         let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
13557         assert_eq!(r, 0b01010101);
13558     }
13559 
13560     #[simd_test(enable = "avx512bw")]
test_mm512_cmp_epu8_mask()13561     unsafe fn test_mm512_cmp_epu8_mask() {
13562         let a = _mm512_set1_epi8(0);
13563         let b = _mm512_set1_epi8(1);
13564         let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
13565         assert_eq!(
13566             m,
13567             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13568         );
13569     }
13570 
13571     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmp_epu8_mask()13572     unsafe fn test_mm512_mask_cmp_epu8_mask() {
13573         let a = _mm512_set1_epi8(0);
13574         let b = _mm512_set1_epi8(1);
13575         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13576         let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
13577         assert_eq!(
13578             r,
13579             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13580         );
13581     }
13582 
13583     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmp_epu8_mask()13584     unsafe fn test_mm256_cmp_epu8_mask() {
13585         let a = _mm256_set1_epi8(0);
13586         let b = _mm256_set1_epi8(1);
13587         let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
13588         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13589     }
13590 
13591     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmp_epu8_mask()13592     unsafe fn test_mm256_mask_cmp_epu8_mask() {
13593         let a = _mm256_set1_epi8(0);
13594         let b = _mm256_set1_epi8(1);
13595         let mask = 0b01010101_01010101_01010101_01010101;
13596         let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
13597         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13598     }
13599 
13600     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmp_epu8_mask()13601     unsafe fn test_mm_cmp_epu8_mask() {
13602         let a = _mm_set1_epi8(0);
13603         let b = _mm_set1_epi8(1);
13604         let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
13605         assert_eq!(m, 0b11111111_11111111);
13606     }
13607 
13608     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmp_epu8_mask()13609     unsafe fn test_mm_mask_cmp_epu8_mask() {
13610         let a = _mm_set1_epi8(0);
13611         let b = _mm_set1_epi8(1);
13612         let mask = 0b01010101_01010101;
13613         let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
13614         assert_eq!(r, 0b01010101_01010101);
13615     }
13616 
13617     #[simd_test(enable = "avx512bw")]
test_mm512_cmp_epi16_mask()13618     unsafe fn test_mm512_cmp_epi16_mask() {
13619         let a = _mm512_set1_epi16(0);
13620         let b = _mm512_set1_epi16(1);
13621         let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
13622         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13623     }
13624 
13625     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmp_epi16_mask()13626     unsafe fn test_mm512_mask_cmp_epi16_mask() {
13627         let a = _mm512_set1_epi16(0);
13628         let b = _mm512_set1_epi16(1);
13629         let mask = 0b01010101_01010101_01010101_01010101;
13630         let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
13631         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13632     }
13633 
13634     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmp_epi16_mask()13635     unsafe fn test_mm256_cmp_epi16_mask() {
13636         let a = _mm256_set1_epi16(0);
13637         let b = _mm256_set1_epi16(1);
13638         let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
13639         assert_eq!(m, 0b11111111_11111111);
13640     }
13641 
13642     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmp_epi16_mask()13643     unsafe fn test_mm256_mask_cmp_epi16_mask() {
13644         let a = _mm256_set1_epi16(0);
13645         let b = _mm256_set1_epi16(1);
13646         let mask = 0b01010101_01010101;
13647         let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
13648         assert_eq!(r, 0b01010101_01010101);
13649     }
13650 
13651     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmp_epi16_mask()13652     unsafe fn test_mm_cmp_epi16_mask() {
13653         let a = _mm_set1_epi16(0);
13654         let b = _mm_set1_epi16(1);
13655         let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
13656         assert_eq!(m, 0b11111111);
13657     }
13658 
13659     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmp_epi16_mask()13660     unsafe fn test_mm_mask_cmp_epi16_mask() {
13661         let a = _mm_set1_epi16(0);
13662         let b = _mm_set1_epi16(1);
13663         let mask = 0b01010101;
13664         let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
13665         assert_eq!(r, 0b01010101);
13666     }
13667 
13668     #[simd_test(enable = "avx512bw")]
test_mm512_cmp_epi8_mask()13669     unsafe fn test_mm512_cmp_epi8_mask() {
13670         let a = _mm512_set1_epi8(0);
13671         let b = _mm512_set1_epi8(1);
13672         let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
13673         assert_eq!(
13674             m,
13675             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13676         );
13677     }
13678 
13679     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cmp_epi8_mask()13680     unsafe fn test_mm512_mask_cmp_epi8_mask() {
13681         let a = _mm512_set1_epi8(0);
13682         let b = _mm512_set1_epi8(1);
13683         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13684         let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
13685         assert_eq!(
13686             r,
13687             0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13688         );
13689     }
13690 
13691     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cmp_epi8_mask()13692     unsafe fn test_mm256_cmp_epi8_mask() {
13693         let a = _mm256_set1_epi8(0);
13694         let b = _mm256_set1_epi8(1);
13695         let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
13696         assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13697     }
13698 
13699     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cmp_epi8_mask()13700     unsafe fn test_mm256_mask_cmp_epi8_mask() {
13701         let a = _mm256_set1_epi8(0);
13702         let b = _mm256_set1_epi8(1);
13703         let mask = 0b01010101_01010101_01010101_01010101;
13704         let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
13705         assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13706     }
13707 
13708     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cmp_epi8_mask()13709     unsafe fn test_mm_cmp_epi8_mask() {
13710         let a = _mm_set1_epi8(0);
13711         let b = _mm_set1_epi8(1);
13712         let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
13713         assert_eq!(m, 0b11111111_11111111);
13714     }
13715 
13716     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cmp_epi8_mask()13717     unsafe fn test_mm_mask_cmp_epi8_mask() {
13718         let a = _mm_set1_epi8(0);
13719         let b = _mm_set1_epi8(1);
13720         let mask = 0b01010101_01010101;
13721         let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
13722         assert_eq!(r, 0b01010101_01010101);
13723     }
13724 
13725     #[simd_test(enable = "avx512bw")]
test_mm512_loadu_epi16()13726     unsafe fn test_mm512_loadu_epi16() {
13727         #[rustfmt::skip]
13728         let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
13729         let r = _mm512_loadu_epi16(&a[0]);
13730         #[rustfmt::skip]
13731         let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
13732         assert_eq_m512i(r, e);
13733     }
13734 
13735     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_loadu_epi16()13736     unsafe fn test_mm256_loadu_epi16() {
13737         let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
13738         let r = _mm256_loadu_epi16(&a[0]);
13739         let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
13740         assert_eq_m256i(r, e);
13741     }
13742 
13743     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_loadu_epi16()13744     unsafe fn test_mm_loadu_epi16() {
13745         let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
13746         let r = _mm_loadu_epi16(&a[0]);
13747         let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
13748         assert_eq_m128i(r, e);
13749     }
13750 
13751     #[simd_test(enable = "avx512bw")]
test_mm512_loadu_epi8()13752     unsafe fn test_mm512_loadu_epi8() {
13753         #[rustfmt::skip]
13754         let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
13755                            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
13756         let r = _mm512_loadu_epi8(&a[0]);
13757         #[rustfmt::skip]
13758         let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
13759                                 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
13760         assert_eq_m512i(r, e);
13761     }
13762 
13763     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_loadu_epi8()13764     unsafe fn test_mm256_loadu_epi8() {
13765         #[rustfmt::skip]
13766         let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
13767         let r = _mm256_loadu_epi8(&a[0]);
13768         #[rustfmt::skip]
13769         let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
13770         assert_eq_m256i(r, e);
13771     }
13772 
13773     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_loadu_epi8()13774     unsafe fn test_mm_loadu_epi8() {
13775         let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
13776         let r = _mm_loadu_epi8(&a[0]);
13777         let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
13778         assert_eq_m128i(r, e);
13779     }
13780 
13781     #[simd_test(enable = "avx512bw")]
test_mm512_storeu_epi16()13782     unsafe fn test_mm512_storeu_epi16() {
13783         let a = _mm512_set1_epi16(9);
13784         let mut r = _mm512_undefined_epi32();
13785         _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
13786         assert_eq_m512i(r, a);
13787     }
13788 
13789     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_storeu_epi16()13790     unsafe fn test_mm256_storeu_epi16() {
13791         let a = _mm256_set1_epi16(9);
13792         let mut r = _mm256_set1_epi32(0);
13793         _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
13794         assert_eq_m256i(r, a);
13795     }
13796 
13797     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_storeu_epi16()13798     unsafe fn test_mm_storeu_epi16() {
13799         let a = _mm_set1_epi16(9);
13800         let mut r = _mm_set1_epi32(0);
13801         _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
13802         assert_eq_m128i(r, a);
13803     }
13804 
13805     #[simd_test(enable = "avx512bw")]
test_mm512_storeu_epi8()13806     unsafe fn test_mm512_storeu_epi8() {
13807         let a = _mm512_set1_epi8(9);
13808         let mut r = _mm512_undefined_epi32();
13809         _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
13810         assert_eq_m512i(r, a);
13811     }
13812 
13813     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_storeu_epi8()13814     unsafe fn test_mm256_storeu_epi8() {
13815         let a = _mm256_set1_epi8(9);
13816         let mut r = _mm256_set1_epi32(0);
13817         _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
13818         assert_eq_m256i(r, a);
13819     }
13820 
13821     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_storeu_epi8()13822     unsafe fn test_mm_storeu_epi8() {
13823         let a = _mm_set1_epi8(9);
13824         let mut r = _mm_set1_epi32(0);
13825         _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
13826         assert_eq_m128i(r, a);
13827     }
13828 
13829     #[simd_test(enable = "avx512bw")]
test_mm512_madd_epi16()13830     unsafe fn test_mm512_madd_epi16() {
13831         let a = _mm512_set1_epi16(1);
13832         let b = _mm512_set1_epi16(1);
13833         let r = _mm512_madd_epi16(a, b);
13834         let e = _mm512_set1_epi32(2);
13835         assert_eq_m512i(r, e);
13836     }
13837 
13838     #[simd_test(enable = "avx512bw")]
test_mm512_mask_madd_epi16()13839     unsafe fn test_mm512_mask_madd_epi16() {
13840         let a = _mm512_set1_epi16(1);
13841         let b = _mm512_set1_epi16(1);
13842         let r = _mm512_mask_madd_epi16(a, 0, a, b);
13843         assert_eq_m512i(r, a);
13844         let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
13845         let e = _mm512_set_epi32(
13846             1 << 16 | 1,
13847             1 << 16 | 1,
13848             1 << 16 | 1,
13849             1 << 16 | 1,
13850             1 << 16 | 1,
13851             1 << 16 | 1,
13852             1 << 16 | 1,
13853             1 << 16 | 1,
13854             1 << 16 | 1,
13855             1 << 16 | 1,
13856             1 << 16 | 1,
13857             1 << 16 | 1,
13858             2,
13859             2,
13860             2,
13861             2,
13862         );
13863         assert_eq_m512i(r, e);
13864     }
13865 
13866     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_madd_epi16()13867     unsafe fn test_mm512_maskz_madd_epi16() {
13868         let a = _mm512_set1_epi16(1);
13869         let b = _mm512_set1_epi16(1);
13870         let r = _mm512_maskz_madd_epi16(0, a, b);
13871         assert_eq_m512i(r, _mm512_setzero_si512());
13872         let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
13873         let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
13874         assert_eq_m512i(r, e);
13875     }
13876 
13877     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_madd_epi16()13878     unsafe fn test_mm256_mask_madd_epi16() {
13879         let a = _mm256_set1_epi16(1);
13880         let b = _mm256_set1_epi16(1);
13881         let r = _mm256_mask_madd_epi16(a, 0, a, b);
13882         assert_eq_m256i(r, a);
13883         let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
13884         let e = _mm256_set_epi32(
13885             1 << 16 | 1,
13886             1 << 16 | 1,
13887             1 << 16 | 1,
13888             1 << 16 | 1,
13889             2,
13890             2,
13891             2,
13892             2,
13893         );
13894         assert_eq_m256i(r, e);
13895     }
13896 
13897     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_madd_epi16()13898     unsafe fn test_mm256_maskz_madd_epi16() {
13899         let a = _mm256_set1_epi16(1);
13900         let b = _mm256_set1_epi16(1);
13901         let r = _mm256_maskz_madd_epi16(0, a, b);
13902         assert_eq_m256i(r, _mm256_setzero_si256());
13903         let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
13904         let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
13905         assert_eq_m256i(r, e);
13906     }
13907 
13908     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_madd_epi16()13909     unsafe fn test_mm_mask_madd_epi16() {
13910         let a = _mm_set1_epi16(1);
13911         let b = _mm_set1_epi16(1);
13912         let r = _mm_mask_madd_epi16(a, 0, a, b);
13913         assert_eq_m128i(r, a);
13914         let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
13915         let e = _mm_set_epi32(2, 2, 2, 2);
13916         assert_eq_m128i(r, e);
13917     }
13918 
13919     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_madd_epi16()13920     unsafe fn test_mm_maskz_madd_epi16() {
13921         let a = _mm_set1_epi16(1);
13922         let b = _mm_set1_epi16(1);
13923         let r = _mm_maskz_madd_epi16(0, a, b);
13924         assert_eq_m128i(r, _mm_setzero_si128());
13925         let r = _mm_maskz_madd_epi16(0b00001111, a, b);
13926         let e = _mm_set_epi32(2, 2, 2, 2);
13927         assert_eq_m128i(r, e);
13928     }
13929 
13930     #[simd_test(enable = "avx512bw")]
test_mm512_maddubs_epi16()13931     unsafe fn test_mm512_maddubs_epi16() {
13932         let a = _mm512_set1_epi8(1);
13933         let b = _mm512_set1_epi8(1);
13934         let r = _mm512_maddubs_epi16(a, b);
13935         let e = _mm512_set1_epi16(2);
13936         assert_eq_m512i(r, e);
13937     }
13938 
13939     #[simd_test(enable = "avx512bw")]
test_mm512_mask_maddubs_epi16()13940     unsafe fn test_mm512_mask_maddubs_epi16() {
13941         let a = _mm512_set1_epi8(1);
13942         let b = _mm512_set1_epi8(1);
13943         let src = _mm512_set1_epi16(1);
13944         let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
13945         assert_eq_m512i(r, src);
13946         let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
13947         #[rustfmt::skip]
13948         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13949                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
13950         assert_eq_m512i(r, e);
13951     }
13952 
13953     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_maddubs_epi16()13954     unsafe fn test_mm512_maskz_maddubs_epi16() {
13955         let a = _mm512_set1_epi8(1);
13956         let b = _mm512_set1_epi8(1);
13957         let r = _mm512_maskz_maddubs_epi16(0, a, b);
13958         assert_eq_m512i(r, _mm512_setzero_si512());
13959         let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
13960         #[rustfmt::skip]
13961         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
13962                                  0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
13963         assert_eq_m512i(r, e);
13964     }
13965 
13966     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_maddubs_epi16()13967     unsafe fn test_mm256_mask_maddubs_epi16() {
13968         let a = _mm256_set1_epi8(1);
13969         let b = _mm256_set1_epi8(1);
13970         let src = _mm256_set1_epi16(1);
13971         let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
13972         assert_eq_m256i(r, src);
13973         let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
13974         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
13975         assert_eq_m256i(r, e);
13976     }
13977 
13978     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_maddubs_epi16()13979     unsafe fn test_mm256_maskz_maddubs_epi16() {
13980         let a = _mm256_set1_epi8(1);
13981         let b = _mm256_set1_epi8(1);
13982         let r = _mm256_maskz_maddubs_epi16(0, a, b);
13983         assert_eq_m256i(r, _mm256_setzero_si256());
13984         let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
13985         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
13986         assert_eq_m256i(r, e);
13987     }
13988 
13989     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_maddubs_epi16()13990     unsafe fn test_mm_mask_maddubs_epi16() {
13991         let a = _mm_set1_epi8(1);
13992         let b = _mm_set1_epi8(1);
13993         let src = _mm_set1_epi16(1);
13994         let r = _mm_mask_maddubs_epi16(src, 0, a, b);
13995         assert_eq_m128i(r, src);
13996         let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
13997         let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
13998         assert_eq_m128i(r, e);
13999     }
14000 
14001     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_maddubs_epi16()14002     unsafe fn test_mm_maskz_maddubs_epi16() {
14003         let a = _mm_set1_epi8(1);
14004         let b = _mm_set1_epi8(1);
14005         let r = _mm_maskz_maddubs_epi16(0, a, b);
14006         assert_eq_m128i(r, _mm_setzero_si128());
14007         let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
14008         let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
14009         assert_eq_m128i(r, e);
14010     }
14011 
14012     #[simd_test(enable = "avx512bw")]
test_mm512_packs_epi32()14013     unsafe fn test_mm512_packs_epi32() {
14014         let a = _mm512_set1_epi32(i32::MAX);
14015         let b = _mm512_set1_epi32(1);
14016         let r = _mm512_packs_epi32(a, b);
14017         #[rustfmt::skip]
14018         let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
14019                                  1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14020         assert_eq_m512i(r, e);
14021     }
14022 
14023     #[simd_test(enable = "avx512bw")]
test_mm512_mask_packs_epi32()14024     unsafe fn test_mm512_mask_packs_epi32() {
14025         let a = _mm512_set1_epi32(i32::MAX);
14026         let b = _mm512_set1_epi32(1 << 16 | 1);
14027         let r = _mm512_mask_packs_epi32(a, 0, a, b);
14028         assert_eq_m512i(r, a);
14029         let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
14030         #[rustfmt::skip]
14031         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14032                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14033         assert_eq_m512i(r, e);
14034     }
14035 
14036     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_packs_epi32()14037     unsafe fn test_mm512_maskz_packs_epi32() {
14038         let a = _mm512_set1_epi32(i32::MAX);
14039         let b = _mm512_set1_epi32(1);
14040         let r = _mm512_maskz_packs_epi32(0, a, b);
14041         assert_eq_m512i(r, _mm512_setzero_si512());
14042         let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
14043         #[rustfmt::skip]
14044         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14045                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14046         assert_eq_m512i(r, e);
14047     }
14048 
14049     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_packs_epi32()14050     unsafe fn test_mm256_mask_packs_epi32() {
14051         let a = _mm256_set1_epi32(i32::MAX);
14052         let b = _mm256_set1_epi32(1 << 16 | 1);
14053         let r = _mm256_mask_packs_epi32(a, 0, a, b);
14054         assert_eq_m256i(r, a);
14055         let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
14056         #[rustfmt::skip]
14057         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14058         assert_eq_m256i(r, e);
14059     }
14060 
14061     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_packs_epi32()14062     unsafe fn test_mm256_maskz_packs_epi32() {
14063         let a = _mm256_set1_epi32(i32::MAX);
14064         let b = _mm256_set1_epi32(1);
14065         let r = _mm256_maskz_packs_epi32(0, a, b);
14066         assert_eq_m256i(r, _mm256_setzero_si256());
14067         let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
14068         #[rustfmt::skip]
14069         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14070         assert_eq_m256i(r, e);
14071     }
14072 
14073     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_packs_epi32()14074     unsafe fn test_mm_mask_packs_epi32() {
14075         let a = _mm_set1_epi32(i32::MAX);
14076         let b = _mm_set1_epi32(1 << 16 | 1);
14077         let r = _mm_mask_packs_epi32(a, 0, a, b);
14078         assert_eq_m128i(r, a);
14079         let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
14080         let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14081         assert_eq_m128i(r, e);
14082     }
14083 
14084     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_packs_epi32()14085     unsafe fn test_mm_maskz_packs_epi32() {
14086         let a = _mm_set1_epi32(i32::MAX);
14087         let b = _mm_set1_epi32(1);
14088         let r = _mm_maskz_packs_epi32(0, a, b);
14089         assert_eq_m128i(r, _mm_setzero_si128());
14090         let r = _mm_maskz_packs_epi32(0b00001111, a, b);
14091         let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14092         assert_eq_m128i(r, e);
14093     }
14094 
14095     #[simd_test(enable = "avx512bw")]
test_mm512_packs_epi16()14096     unsafe fn test_mm512_packs_epi16() {
14097         let a = _mm512_set1_epi16(i16::MAX);
14098         let b = _mm512_set1_epi16(1);
14099         let r = _mm512_packs_epi16(a, b);
14100         #[rustfmt::skip]
14101         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
14102                                 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
14103                                 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
14104                                 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14105         assert_eq_m512i(r, e);
14106     }
14107 
14108     #[simd_test(enable = "avx512bw")]
test_mm512_mask_packs_epi16()14109     unsafe fn test_mm512_mask_packs_epi16() {
14110         let a = _mm512_set1_epi16(i16::MAX);
14111         let b = _mm512_set1_epi16(1 << 8 | 1);
14112         let r = _mm512_mask_packs_epi16(a, 0, a, b);
14113         assert_eq_m512i(r, a);
14114         let r = _mm512_mask_packs_epi16(
14115             b,
14116             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14117             a,
14118             b,
14119         );
14120         #[rustfmt::skip]
14121         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14122                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14123                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14124                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14125         assert_eq_m512i(r, e);
14126     }
14127 
14128     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_packs_epi16()14129     unsafe fn test_mm512_maskz_packs_epi16() {
14130         let a = _mm512_set1_epi16(i16::MAX);
14131         let b = _mm512_set1_epi16(1);
14132         let r = _mm512_maskz_packs_epi16(0, a, b);
14133         assert_eq_m512i(r, _mm512_setzero_si512());
14134         let r = _mm512_maskz_packs_epi16(
14135             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14136             a,
14137             b,
14138         );
14139         #[rustfmt::skip]
14140         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14141                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14142                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14143                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14144         assert_eq_m512i(r, e);
14145     }
14146 
14147     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_packs_epi16()14148     unsafe fn test_mm256_mask_packs_epi16() {
14149         let a = _mm256_set1_epi16(i16::MAX);
14150         let b = _mm256_set1_epi16(1 << 8 | 1);
14151         let r = _mm256_mask_packs_epi16(a, 0, a, b);
14152         assert_eq_m256i(r, a);
14153         let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
14154         #[rustfmt::skip]
14155         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14156                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14157         assert_eq_m256i(r, e);
14158     }
14159 
14160     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_packs_epi16()14161     unsafe fn test_mm256_maskz_packs_epi16() {
14162         let a = _mm256_set1_epi16(i16::MAX);
14163         let b = _mm256_set1_epi16(1);
14164         let r = _mm256_maskz_packs_epi16(0, a, b);
14165         assert_eq_m256i(r, _mm256_setzero_si256());
14166         let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14167         #[rustfmt::skip]
14168         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14169                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14170         assert_eq_m256i(r, e);
14171     }
14172 
14173     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_packs_epi16()14174     unsafe fn test_mm_mask_packs_epi16() {
14175         let a = _mm_set1_epi16(i16::MAX);
14176         let b = _mm_set1_epi16(1 << 8 | 1);
14177         let r = _mm_mask_packs_epi16(a, 0, a, b);
14178         assert_eq_m128i(r, a);
14179         let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
14180         #[rustfmt::skip]
14181         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14182         assert_eq_m128i(r, e);
14183     }
14184 
14185     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_packs_epi16()14186     unsafe fn test_mm_maskz_packs_epi16() {
14187         let a = _mm_set1_epi16(i16::MAX);
14188         let b = _mm_set1_epi16(1);
14189         let r = _mm_maskz_packs_epi16(0, a, b);
14190         assert_eq_m128i(r, _mm_setzero_si128());
14191         let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
14192         #[rustfmt::skip]
14193         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14194         assert_eq_m128i(r, e);
14195     }
14196 
14197     #[simd_test(enable = "avx512bw")]
test_mm512_packus_epi32()14198     unsafe fn test_mm512_packus_epi32() {
14199         let a = _mm512_set1_epi32(-1);
14200         let b = _mm512_set1_epi32(1);
14201         let r = _mm512_packus_epi32(a, b);
14202         #[rustfmt::skip]
14203         let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
14204                                  1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
14205         assert_eq_m512i(r, e);
14206     }
14207 
14208     #[simd_test(enable = "avx512bw")]
test_mm512_mask_packus_epi32()14209     unsafe fn test_mm512_mask_packus_epi32() {
14210         let a = _mm512_set1_epi32(-1);
14211         let b = _mm512_set1_epi32(1 << 16 | 1);
14212         let r = _mm512_mask_packus_epi32(a, 0, a, b);
14213         assert_eq_m512i(r, a);
14214         let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
14215         #[rustfmt::skip]
14216         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14217                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14218         assert_eq_m512i(r, e);
14219     }
14220 
14221     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_packus_epi32()14222     unsafe fn test_mm512_maskz_packus_epi32() {
14223         let a = _mm512_set1_epi32(-1);
14224         let b = _mm512_set1_epi32(1);
14225         let r = _mm512_maskz_packus_epi32(0, a, b);
14226         assert_eq_m512i(r, _mm512_setzero_si512());
14227         let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
14228         #[rustfmt::skip]
14229         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14230                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14231         assert_eq_m512i(r, e);
14232     }
14233 
14234     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_packus_epi32()14235     unsafe fn test_mm256_mask_packus_epi32() {
14236         let a = _mm256_set1_epi32(-1);
14237         let b = _mm256_set1_epi32(1 << 16 | 1);
14238         let r = _mm256_mask_packus_epi32(a, 0, a, b);
14239         assert_eq_m256i(r, a);
14240         let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
14241         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14242         assert_eq_m256i(r, e);
14243     }
14244 
14245     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_packus_epi32()14246     unsafe fn test_mm256_maskz_packus_epi32() {
14247         let a = _mm256_set1_epi32(-1);
14248         let b = _mm256_set1_epi32(1);
14249         let r = _mm256_maskz_packus_epi32(0, a, b);
14250         assert_eq_m256i(r, _mm256_setzero_si256());
14251         let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
14252         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14253         assert_eq_m256i(r, e);
14254     }
14255 
14256     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_packus_epi32()14257     unsafe fn test_mm_mask_packus_epi32() {
14258         let a = _mm_set1_epi32(-1);
14259         let b = _mm_set1_epi32(1 << 16 | 1);
14260         let r = _mm_mask_packus_epi32(a, 0, a, b);
14261         assert_eq_m128i(r, a);
14262         let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
14263         let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14264         assert_eq_m128i(r, e);
14265     }
14266 
14267     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_packus_epi32()14268     unsafe fn test_mm_maskz_packus_epi32() {
14269         let a = _mm_set1_epi32(-1);
14270         let b = _mm_set1_epi32(1);
14271         let r = _mm_maskz_packus_epi32(0, a, b);
14272         assert_eq_m128i(r, _mm_setzero_si128());
14273         let r = _mm_maskz_packus_epi32(0b00001111, a, b);
14274         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14275         assert_eq_m128i(r, e);
14276     }
14277 
14278     #[simd_test(enable = "avx512bw")]
test_mm512_packus_epi16()14279     unsafe fn test_mm512_packus_epi16() {
14280         let a = _mm512_set1_epi16(-1);
14281         let b = _mm512_set1_epi16(1);
14282         let r = _mm512_packus_epi16(a, b);
14283         #[rustfmt::skip]
14284         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
14285                                 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
14286                                 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
14287                                 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
14288         assert_eq_m512i(r, e);
14289     }
14290 
14291     #[simd_test(enable = "avx512bw")]
test_mm512_mask_packus_epi16()14292     unsafe fn test_mm512_mask_packus_epi16() {
14293         let a = _mm512_set1_epi16(-1);
14294         let b = _mm512_set1_epi16(1 << 8 | 1);
14295         let r = _mm512_mask_packus_epi16(a, 0, a, b);
14296         assert_eq_m512i(r, a);
14297         let r = _mm512_mask_packus_epi16(
14298             b,
14299             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14300             a,
14301             b,
14302         );
14303         #[rustfmt::skip]
14304         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14305                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14306                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14307                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14308         assert_eq_m512i(r, e);
14309     }
14310 
14311     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_packus_epi16()14312     unsafe fn test_mm512_maskz_packus_epi16() {
14313         let a = _mm512_set1_epi16(-1);
14314         let b = _mm512_set1_epi16(1);
14315         let r = _mm512_maskz_packus_epi16(0, a, b);
14316         assert_eq_m512i(r, _mm512_setzero_si512());
14317         let r = _mm512_maskz_packus_epi16(
14318             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14319             a,
14320             b,
14321         );
14322         #[rustfmt::skip]
14323         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14324                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14325                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14326                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14327         assert_eq_m512i(r, e);
14328     }
14329 
14330     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_packus_epi16()14331     unsafe fn test_mm256_mask_packus_epi16() {
14332         let a = _mm256_set1_epi16(-1);
14333         let b = _mm256_set1_epi16(1 << 8 | 1);
14334         let r = _mm256_mask_packus_epi16(a, 0, a, b);
14335         assert_eq_m256i(r, a);
14336         let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
14337         #[rustfmt::skip]
14338         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14339                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14340         assert_eq_m256i(r, e);
14341     }
14342 
14343     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_packus_epi16()14344     unsafe fn test_mm256_maskz_packus_epi16() {
14345         let a = _mm256_set1_epi16(-1);
14346         let b = _mm256_set1_epi16(1);
14347         let r = _mm256_maskz_packus_epi16(0, a, b);
14348         assert_eq_m256i(r, _mm256_setzero_si256());
14349         let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
14350         #[rustfmt::skip]
14351         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14352                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14353         assert_eq_m256i(r, e);
14354     }
14355 
14356     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_packus_epi16()14357     unsafe fn test_mm_mask_packus_epi16() {
14358         let a = _mm_set1_epi16(-1);
14359         let b = _mm_set1_epi16(1 << 8 | 1);
14360         let r = _mm_mask_packus_epi16(a, 0, a, b);
14361         assert_eq_m128i(r, a);
14362         let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
14363         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14364         assert_eq_m128i(r, e);
14365     }
14366 
14367     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_packus_epi16()14368     unsafe fn test_mm_maskz_packus_epi16() {
14369         let a = _mm_set1_epi16(-1);
14370         let b = _mm_set1_epi16(1);
14371         let r = _mm_maskz_packus_epi16(0, a, b);
14372         assert_eq_m128i(r, _mm_setzero_si128());
14373         let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
14374         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14375         assert_eq_m128i(r, e);
14376     }
14377 
14378     #[simd_test(enable = "avx512bw")]
test_mm512_avg_epu16()14379     unsafe fn test_mm512_avg_epu16() {
14380         let a = _mm512_set1_epi16(1);
14381         let b = _mm512_set1_epi16(1);
14382         let r = _mm512_avg_epu16(a, b);
14383         let e = _mm512_set1_epi16(1);
14384         assert_eq_m512i(r, e);
14385     }
14386 
14387     #[simd_test(enable = "avx512bw")]
test_mm512_mask_avg_epu16()14388     unsafe fn test_mm512_mask_avg_epu16() {
14389         let a = _mm512_set1_epi16(1);
14390         let b = _mm512_set1_epi16(1);
14391         let r = _mm512_mask_avg_epu16(a, 0, a, b);
14392         assert_eq_m512i(r, a);
14393         let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
14394         #[rustfmt::skip]
14395         let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14396                                  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14397         assert_eq_m512i(r, e);
14398     }
14399 
14400     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_avg_epu16()14401     unsafe fn test_mm512_maskz_avg_epu16() {
14402         let a = _mm512_set1_epi16(1);
14403         let b = _mm512_set1_epi16(1);
14404         let r = _mm512_maskz_avg_epu16(0, a, b);
14405         assert_eq_m512i(r, _mm512_setzero_si512());
14406         let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
14407         #[rustfmt::skip]
14408         let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14409                                  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14410         assert_eq_m512i(r, e);
14411     }
14412 
14413     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_avg_epu16()14414     unsafe fn test_mm256_mask_avg_epu16() {
14415         let a = _mm256_set1_epi16(1);
14416         let b = _mm256_set1_epi16(1);
14417         let r = _mm256_mask_avg_epu16(a, 0, a, b);
14418         assert_eq_m256i(r, a);
14419         let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
14420         let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14421         assert_eq_m256i(r, e);
14422     }
14423 
14424     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_avg_epu16()14425     unsafe fn test_mm256_maskz_avg_epu16() {
14426         let a = _mm256_set1_epi16(1);
14427         let b = _mm256_set1_epi16(1);
14428         let r = _mm256_maskz_avg_epu16(0, a, b);
14429         assert_eq_m256i(r, _mm256_setzero_si256());
14430         let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
14431         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14432         assert_eq_m256i(r, e);
14433     }
14434 
14435     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_avg_epu16()14436     unsafe fn test_mm_mask_avg_epu16() {
14437         let a = _mm_set1_epi16(1);
14438         let b = _mm_set1_epi16(1);
14439         let r = _mm_mask_avg_epu16(a, 0, a, b);
14440         assert_eq_m128i(r, a);
14441         let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
14442         let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
14443         assert_eq_m128i(r, e);
14444     }
14445 
14446     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_avg_epu16()14447     unsafe fn test_mm_maskz_avg_epu16() {
14448         let a = _mm_set1_epi16(1);
14449         let b = _mm_set1_epi16(1);
14450         let r = _mm_maskz_avg_epu16(0, a, b);
14451         assert_eq_m128i(r, _mm_setzero_si128());
14452         let r = _mm_maskz_avg_epu16(0b00001111, a, b);
14453         let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
14454         assert_eq_m128i(r, e);
14455     }
14456 
14457     #[simd_test(enable = "avx512bw")]
test_mm512_avg_epu8()14458     unsafe fn test_mm512_avg_epu8() {
14459         let a = _mm512_set1_epi8(1);
14460         let b = _mm512_set1_epi8(1);
14461         let r = _mm512_avg_epu8(a, b);
14462         let e = _mm512_set1_epi8(1);
14463         assert_eq_m512i(r, e);
14464     }
14465 
14466     #[simd_test(enable = "avx512bw")]
test_mm512_mask_avg_epu8()14467     unsafe fn test_mm512_mask_avg_epu8() {
14468         let a = _mm512_set1_epi8(1);
14469         let b = _mm512_set1_epi8(1);
14470         let r = _mm512_mask_avg_epu8(a, 0, a, b);
14471         assert_eq_m512i(r, a);
14472         let r = _mm512_mask_avg_epu8(
14473             a,
14474             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14475             a,
14476             b,
14477         );
14478         #[rustfmt::skip]
14479         let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14480                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14481                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14482                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14483         assert_eq_m512i(r, e);
14484     }
14485 
14486     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_avg_epu8()14487     unsafe fn test_mm512_maskz_avg_epu8() {
14488         let a = _mm512_set1_epi8(1);
14489         let b = _mm512_set1_epi8(1);
14490         let r = _mm512_maskz_avg_epu8(0, a, b);
14491         assert_eq_m512i(r, _mm512_setzero_si512());
14492         let r = _mm512_maskz_avg_epu8(
14493             0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
14494             a,
14495             b,
14496         );
14497         #[rustfmt::skip]
14498         let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14499                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14500                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14501                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14502         assert_eq_m512i(r, e);
14503     }
14504 
14505     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_avg_epu8()14506     unsafe fn test_mm256_mask_avg_epu8() {
14507         let a = _mm256_set1_epi8(1);
14508         let b = _mm256_set1_epi8(1);
14509         let r = _mm256_mask_avg_epu8(a, 0, a, b);
14510         assert_eq_m256i(r, a);
14511         let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
14512         #[rustfmt::skip]
14513         let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14514                                 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14515         assert_eq_m256i(r, e);
14516     }
14517 
14518     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_avg_epu8()14519     unsafe fn test_mm256_maskz_avg_epu8() {
14520         let a = _mm256_set1_epi8(1);
14521         let b = _mm256_set1_epi8(1);
14522         let r = _mm256_maskz_avg_epu8(0, a, b);
14523         assert_eq_m256i(r, _mm256_setzero_si256());
14524         let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
14525         #[rustfmt::skip]
14526         let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14527                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14528         assert_eq_m256i(r, e);
14529     }
14530 
14531     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_avg_epu8()14532     unsafe fn test_mm_mask_avg_epu8() {
14533         let a = _mm_set1_epi8(1);
14534         let b = _mm_set1_epi8(1);
14535         let r = _mm_mask_avg_epu8(a, 0, a, b);
14536         assert_eq_m128i(r, a);
14537         let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
14538         let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14539         assert_eq_m128i(r, e);
14540     }
14541 
14542     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_avg_epu8()14543     unsafe fn test_mm_maskz_avg_epu8() {
14544         let a = _mm_set1_epi8(1);
14545         let b = _mm_set1_epi8(1);
14546         let r = _mm_maskz_avg_epu8(0, a, b);
14547         assert_eq_m128i(r, _mm_setzero_si128());
14548         let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
14549         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14550         assert_eq_m128i(r, e);
14551     }
14552 
14553     #[simd_test(enable = "avx512bw")]
test_mm512_sll_epi16()14554     unsafe fn test_mm512_sll_epi16() {
14555         let a = _mm512_set1_epi16(1 << 15);
14556         let count = _mm_set1_epi16(2);
14557         let r = _mm512_sll_epi16(a, count);
14558         let e = _mm512_set1_epi16(0);
14559         assert_eq_m512i(r, e);
14560     }
14561 
14562     #[simd_test(enable = "avx512bw")]
test_mm512_mask_sll_epi16()14563     unsafe fn test_mm512_mask_sll_epi16() {
14564         let a = _mm512_set1_epi16(1 << 15);
14565         let count = _mm_set1_epi16(2);
14566         let r = _mm512_mask_sll_epi16(a, 0, a, count);
14567         assert_eq_m512i(r, a);
14568         let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
14569         let e = _mm512_set1_epi16(0);
14570         assert_eq_m512i(r, e);
14571     }
14572 
14573     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_sll_epi16()14574     unsafe fn test_mm512_maskz_sll_epi16() {
14575         let a = _mm512_set1_epi16(1 << 15);
14576         let count = _mm_set1_epi16(2);
14577         let r = _mm512_maskz_sll_epi16(0, a, count);
14578         assert_eq_m512i(r, _mm512_setzero_si512());
14579         let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
14580         let e = _mm512_set1_epi16(0);
14581         assert_eq_m512i(r, e);
14582     }
14583 
14584     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_sll_epi16()14585     unsafe fn test_mm256_mask_sll_epi16() {
14586         let a = _mm256_set1_epi16(1 << 15);
14587         let count = _mm_set1_epi16(2);
14588         let r = _mm256_mask_sll_epi16(a, 0, a, count);
14589         assert_eq_m256i(r, a);
14590         let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
14591         let e = _mm256_set1_epi16(0);
14592         assert_eq_m256i(r, e);
14593     }
14594 
14595     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_sll_epi16()14596     unsafe fn test_mm256_maskz_sll_epi16() {
14597         let a = _mm256_set1_epi16(1 << 15);
14598         let count = _mm_set1_epi16(2);
14599         let r = _mm256_maskz_sll_epi16(0, a, count);
14600         assert_eq_m256i(r, _mm256_setzero_si256());
14601         let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
14602         let e = _mm256_set1_epi16(0);
14603         assert_eq_m256i(r, e);
14604     }
14605 
14606     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_sll_epi16()14607     unsafe fn test_mm_mask_sll_epi16() {
14608         let a = _mm_set1_epi16(1 << 15);
14609         let count = _mm_set1_epi16(2);
14610         let r = _mm_mask_sll_epi16(a, 0, a, count);
14611         assert_eq_m128i(r, a);
14612         let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
14613         let e = _mm_set1_epi16(0);
14614         assert_eq_m128i(r, e);
14615     }
14616 
14617     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_sll_epi16()14618     unsafe fn test_mm_maskz_sll_epi16() {
14619         let a = _mm_set1_epi16(1 << 15);
14620         let count = _mm_set1_epi16(2);
14621         let r = _mm_maskz_sll_epi16(0, a, count);
14622         assert_eq_m128i(r, _mm_setzero_si128());
14623         let r = _mm_maskz_sll_epi16(0b11111111, a, count);
14624         let e = _mm_set1_epi16(0);
14625         assert_eq_m128i(r, e);
14626     }
14627 
14628     #[simd_test(enable = "avx512bw")]
test_mm512_slli_epi16()14629     unsafe fn test_mm512_slli_epi16() {
14630         let a = _mm512_set1_epi16(1 << 15);
14631         let r = _mm512_slli_epi16::<1>(a);
14632         let e = _mm512_set1_epi16(0);
14633         assert_eq_m512i(r, e);
14634     }
14635 
14636     #[simd_test(enable = "avx512bw")]
test_mm512_mask_slli_epi16()14637     unsafe fn test_mm512_mask_slli_epi16() {
14638         let a = _mm512_set1_epi16(1 << 15);
14639         let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
14640         assert_eq_m512i(r, a);
14641         let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
14642         let e = _mm512_set1_epi16(0);
14643         assert_eq_m512i(r, e);
14644     }
14645 
14646     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_slli_epi16()14647     unsafe fn test_mm512_maskz_slli_epi16() {
14648         let a = _mm512_set1_epi16(1 << 15);
14649         let r = _mm512_maskz_slli_epi16::<1>(0, a);
14650         assert_eq_m512i(r, _mm512_setzero_si512());
14651         let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
14652         let e = _mm512_set1_epi16(0);
14653         assert_eq_m512i(r, e);
14654     }
14655 
14656     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_slli_epi16()14657     unsafe fn test_mm256_mask_slli_epi16() {
14658         let a = _mm256_set1_epi16(1 << 15);
14659         let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
14660         assert_eq_m256i(r, a);
14661         let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
14662         let e = _mm256_set1_epi16(0);
14663         assert_eq_m256i(r, e);
14664     }
14665 
14666     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_slli_epi16()14667     unsafe fn test_mm256_maskz_slli_epi16() {
14668         let a = _mm256_set1_epi16(1 << 15);
14669         let r = _mm256_maskz_slli_epi16::<1>(0, a);
14670         assert_eq_m256i(r, _mm256_setzero_si256());
14671         let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
14672         let e = _mm256_set1_epi16(0);
14673         assert_eq_m256i(r, e);
14674     }
14675 
14676     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_slli_epi16()14677     unsafe fn test_mm_mask_slli_epi16() {
14678         let a = _mm_set1_epi16(1 << 15);
14679         let r = _mm_mask_slli_epi16::<1>(a, 0, a);
14680         assert_eq_m128i(r, a);
14681         let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
14682         let e = _mm_set1_epi16(0);
14683         assert_eq_m128i(r, e);
14684     }
14685 
14686     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_slli_epi16()14687     unsafe fn test_mm_maskz_slli_epi16() {
14688         let a = _mm_set1_epi16(1 << 15);
14689         let r = _mm_maskz_slli_epi16::<1>(0, a);
14690         assert_eq_m128i(r, _mm_setzero_si128());
14691         let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
14692         let e = _mm_set1_epi16(0);
14693         assert_eq_m128i(r, e);
14694     }
14695 
14696     #[simd_test(enable = "avx512bw")]
test_mm512_sllv_epi16()14697     unsafe fn test_mm512_sllv_epi16() {
14698         let a = _mm512_set1_epi16(1 << 15);
14699         let count = _mm512_set1_epi16(2);
14700         let r = _mm512_sllv_epi16(a, count);
14701         let e = _mm512_set1_epi16(0);
14702         assert_eq_m512i(r, e);
14703     }
14704 
14705     #[simd_test(enable = "avx512bw")]
test_mm512_mask_sllv_epi16()14706     unsafe fn test_mm512_mask_sllv_epi16() {
14707         let a = _mm512_set1_epi16(1 << 15);
14708         let count = _mm512_set1_epi16(2);
14709         let r = _mm512_mask_sllv_epi16(a, 0, a, count);
14710         assert_eq_m512i(r, a);
14711         let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
14712         let e = _mm512_set1_epi16(0);
14713         assert_eq_m512i(r, e);
14714     }
14715 
14716     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_sllv_epi16()14717     unsafe fn test_mm512_maskz_sllv_epi16() {
14718         let a = _mm512_set1_epi16(1 << 15);
14719         let count = _mm512_set1_epi16(2);
14720         let r = _mm512_maskz_sllv_epi16(0, a, count);
14721         assert_eq_m512i(r, _mm512_setzero_si512());
14722         let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
14723         let e = _mm512_set1_epi16(0);
14724         assert_eq_m512i(r, e);
14725     }
14726 
14727     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_sllv_epi16()14728     unsafe fn test_mm256_sllv_epi16() {
14729         let a = _mm256_set1_epi16(1 << 15);
14730         let count = _mm256_set1_epi16(2);
14731         let r = _mm256_sllv_epi16(a, count);
14732         let e = _mm256_set1_epi16(0);
14733         assert_eq_m256i(r, e);
14734     }
14735 
14736     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_sllv_epi16()14737     unsafe fn test_mm256_mask_sllv_epi16() {
14738         let a = _mm256_set1_epi16(1 << 15);
14739         let count = _mm256_set1_epi16(2);
14740         let r = _mm256_mask_sllv_epi16(a, 0, a, count);
14741         assert_eq_m256i(r, a);
14742         let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
14743         let e = _mm256_set1_epi16(0);
14744         assert_eq_m256i(r, e);
14745     }
14746 
14747     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_sllv_epi16()14748     unsafe fn test_mm256_maskz_sllv_epi16() {
14749         let a = _mm256_set1_epi16(1 << 15);
14750         let count = _mm256_set1_epi16(2);
14751         let r = _mm256_maskz_sllv_epi16(0, a, count);
14752         assert_eq_m256i(r, _mm256_setzero_si256());
14753         let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
14754         let e = _mm256_set1_epi16(0);
14755         assert_eq_m256i(r, e);
14756     }
14757 
14758     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_sllv_epi16()14759     unsafe fn test_mm_sllv_epi16() {
14760         let a = _mm_set1_epi16(1 << 15);
14761         let count = _mm_set1_epi16(2);
14762         let r = _mm_sllv_epi16(a, count);
14763         let e = _mm_set1_epi16(0);
14764         assert_eq_m128i(r, e);
14765     }
14766 
14767     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_sllv_epi16()14768     unsafe fn test_mm_mask_sllv_epi16() {
14769         let a = _mm_set1_epi16(1 << 15);
14770         let count = _mm_set1_epi16(2);
14771         let r = _mm_mask_sllv_epi16(a, 0, a, count);
14772         assert_eq_m128i(r, a);
14773         let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
14774         let e = _mm_set1_epi16(0);
14775         assert_eq_m128i(r, e);
14776     }
14777 
14778     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_sllv_epi16()14779     unsafe fn test_mm_maskz_sllv_epi16() {
14780         let a = _mm_set1_epi16(1 << 15);
14781         let count = _mm_set1_epi16(2);
14782         let r = _mm_maskz_sllv_epi16(0, a, count);
14783         assert_eq_m128i(r, _mm_setzero_si128());
14784         let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
14785         let e = _mm_set1_epi16(0);
14786         assert_eq_m128i(r, e);
14787     }
14788 
14789     #[simd_test(enable = "avx512bw")]
test_mm512_srl_epi16()14790     unsafe fn test_mm512_srl_epi16() {
14791         let a = _mm512_set1_epi16(1 << 1);
14792         let count = _mm_set1_epi16(2);
14793         let r = _mm512_srl_epi16(a, count);
14794         let e = _mm512_set1_epi16(0);
14795         assert_eq_m512i(r, e);
14796     }
14797 
14798     #[simd_test(enable = "avx512bw")]
test_mm512_mask_srl_epi16()14799     unsafe fn test_mm512_mask_srl_epi16() {
14800         let a = _mm512_set1_epi16(1 << 1);
14801         let count = _mm_set1_epi16(2);
14802         let r = _mm512_mask_srl_epi16(a, 0, a, count);
14803         assert_eq_m512i(r, a);
14804         let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
14805         let e = _mm512_set1_epi16(0);
14806         assert_eq_m512i(r, e);
14807     }
14808 
14809     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_srl_epi16()14810     unsafe fn test_mm512_maskz_srl_epi16() {
14811         let a = _mm512_set1_epi16(1 << 1);
14812         let count = _mm_set1_epi16(2);
14813         let r = _mm512_maskz_srl_epi16(0, a, count);
14814         assert_eq_m512i(r, _mm512_setzero_si512());
14815         let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
14816         let e = _mm512_set1_epi16(0);
14817         assert_eq_m512i(r, e);
14818     }
14819 
14820     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_srl_epi16()14821     unsafe fn test_mm256_mask_srl_epi16() {
14822         let a = _mm256_set1_epi16(1 << 1);
14823         let count = _mm_set1_epi16(2);
14824         let r = _mm256_mask_srl_epi16(a, 0, a, count);
14825         assert_eq_m256i(r, a);
14826         let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
14827         let e = _mm256_set1_epi16(0);
14828         assert_eq_m256i(r, e);
14829     }
14830 
14831     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_srl_epi16()14832     unsafe fn test_mm256_maskz_srl_epi16() {
14833         let a = _mm256_set1_epi16(1 << 1);
14834         let count = _mm_set1_epi16(2);
14835         let r = _mm256_maskz_srl_epi16(0, a, count);
14836         assert_eq_m256i(r, _mm256_setzero_si256());
14837         let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
14838         let e = _mm256_set1_epi16(0);
14839         assert_eq_m256i(r, e);
14840     }
14841 
14842     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_srl_epi16()14843     unsafe fn test_mm_mask_srl_epi16() {
14844         let a = _mm_set1_epi16(1 << 1);
14845         let count = _mm_set1_epi16(2);
14846         let r = _mm_mask_srl_epi16(a, 0, a, count);
14847         assert_eq_m128i(r, a);
14848         let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
14849         let e = _mm_set1_epi16(0);
14850         assert_eq_m128i(r, e);
14851     }
14852 
14853     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_srl_epi16()14854     unsafe fn test_mm_maskz_srl_epi16() {
14855         let a = _mm_set1_epi16(1 << 1);
14856         let count = _mm_set1_epi16(2);
14857         let r = _mm_maskz_srl_epi16(0, a, count);
14858         assert_eq_m128i(r, _mm_setzero_si128());
14859         let r = _mm_maskz_srl_epi16(0b11111111, a, count);
14860         let e = _mm_set1_epi16(0);
14861         assert_eq_m128i(r, e);
14862     }
14863 
14864     #[simd_test(enable = "avx512bw")]
test_mm512_srli_epi16()14865     unsafe fn test_mm512_srli_epi16() {
14866         let a = _mm512_set1_epi16(1 << 1);
14867         let r = _mm512_srli_epi16::<2>(a);
14868         let e = _mm512_set1_epi16(0);
14869         assert_eq_m512i(r, e);
14870     }
14871 
14872     #[simd_test(enable = "avx512bw")]
test_mm512_mask_srli_epi16()14873     unsafe fn test_mm512_mask_srli_epi16() {
14874         let a = _mm512_set1_epi16(1 << 1);
14875         let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
14876         assert_eq_m512i(r, a);
14877         let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
14878         let e = _mm512_set1_epi16(0);
14879         assert_eq_m512i(r, e);
14880     }
14881 
14882     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_srli_epi16()14883     unsafe fn test_mm512_maskz_srli_epi16() {
14884         let a = _mm512_set1_epi16(1 << 1);
14885         let r = _mm512_maskz_srli_epi16::<2>(0, a);
14886         assert_eq_m512i(r, _mm512_setzero_si512());
14887         let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
14888         let e = _mm512_set1_epi16(0);
14889         assert_eq_m512i(r, e);
14890     }
14891 
14892     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_srli_epi16()14893     unsafe fn test_mm256_mask_srli_epi16() {
14894         let a = _mm256_set1_epi16(1 << 1);
14895         let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
14896         assert_eq_m256i(r, a);
14897         let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
14898         let e = _mm256_set1_epi16(0);
14899         assert_eq_m256i(r, e);
14900     }
14901 
14902     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_srli_epi16()14903     unsafe fn test_mm256_maskz_srli_epi16() {
14904         let a = _mm256_set1_epi16(1 << 1);
14905         let r = _mm256_maskz_srli_epi16::<2>(0, a);
14906         assert_eq_m256i(r, _mm256_setzero_si256());
14907         let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
14908         let e = _mm256_set1_epi16(0);
14909         assert_eq_m256i(r, e);
14910     }
14911 
14912     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_srli_epi16()14913     unsafe fn test_mm_mask_srli_epi16() {
14914         let a = _mm_set1_epi16(1 << 1);
14915         let r = _mm_mask_srli_epi16::<2>(a, 0, a);
14916         assert_eq_m128i(r, a);
14917         let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
14918         let e = _mm_set1_epi16(0);
14919         assert_eq_m128i(r, e);
14920     }
14921 
14922     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_srli_epi16()14923     unsafe fn test_mm_maskz_srli_epi16() {
14924         let a = _mm_set1_epi16(1 << 1);
14925         let r = _mm_maskz_srli_epi16::<2>(0, a);
14926         assert_eq_m128i(r, _mm_setzero_si128());
14927         let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
14928         let e = _mm_set1_epi16(0);
14929         assert_eq_m128i(r, e);
14930     }
14931 
14932     #[simd_test(enable = "avx512bw")]
test_mm512_srlv_epi16()14933     unsafe fn test_mm512_srlv_epi16() {
14934         let a = _mm512_set1_epi16(1 << 1);
14935         let count = _mm512_set1_epi16(2);
14936         let r = _mm512_srlv_epi16(a, count);
14937         let e = _mm512_set1_epi16(0);
14938         assert_eq_m512i(r, e);
14939     }
14940 
14941     #[simd_test(enable = "avx512bw")]
test_mm512_mask_srlv_epi16()14942     unsafe fn test_mm512_mask_srlv_epi16() {
14943         let a = _mm512_set1_epi16(1 << 1);
14944         let count = _mm512_set1_epi16(2);
14945         let r = _mm512_mask_srlv_epi16(a, 0, a, count);
14946         assert_eq_m512i(r, a);
14947         let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
14948         let e = _mm512_set1_epi16(0);
14949         assert_eq_m512i(r, e);
14950     }
14951 
14952     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_srlv_epi16()14953     unsafe fn test_mm512_maskz_srlv_epi16() {
14954         let a = _mm512_set1_epi16(1 << 1);
14955         let count = _mm512_set1_epi16(2);
14956         let r = _mm512_maskz_srlv_epi16(0, a, count);
14957         assert_eq_m512i(r, _mm512_setzero_si512());
14958         let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
14959         let e = _mm512_set1_epi16(0);
14960         assert_eq_m512i(r, e);
14961     }
14962 
14963     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_srlv_epi16()14964     unsafe fn test_mm256_srlv_epi16() {
14965         let a = _mm256_set1_epi16(1 << 1);
14966         let count = _mm256_set1_epi16(2);
14967         let r = _mm256_srlv_epi16(a, count);
14968         let e = _mm256_set1_epi16(0);
14969         assert_eq_m256i(r, e);
14970     }
14971 
14972     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_srlv_epi16()14973     unsafe fn test_mm256_mask_srlv_epi16() {
14974         let a = _mm256_set1_epi16(1 << 1);
14975         let count = _mm256_set1_epi16(2);
14976         let r = _mm256_mask_srlv_epi16(a, 0, a, count);
14977         assert_eq_m256i(r, a);
14978         let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
14979         let e = _mm256_set1_epi16(0);
14980         assert_eq_m256i(r, e);
14981     }
14982 
14983     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_srlv_epi16()14984     unsafe fn test_mm256_maskz_srlv_epi16() {
14985         let a = _mm256_set1_epi16(1 << 1);
14986         let count = _mm256_set1_epi16(2);
14987         let r = _mm256_maskz_srlv_epi16(0, a, count);
14988         assert_eq_m256i(r, _mm256_setzero_si256());
14989         let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
14990         let e = _mm256_set1_epi16(0);
14991         assert_eq_m256i(r, e);
14992     }
14993 
14994     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_srlv_epi16()14995     unsafe fn test_mm_srlv_epi16() {
14996         let a = _mm_set1_epi16(1 << 1);
14997         let count = _mm_set1_epi16(2);
14998         let r = _mm_srlv_epi16(a, count);
14999         let e = _mm_set1_epi16(0);
15000         assert_eq_m128i(r, e);
15001     }
15002 
15003     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_srlv_epi16()15004     unsafe fn test_mm_mask_srlv_epi16() {
15005         let a = _mm_set1_epi16(1 << 1);
15006         let count = _mm_set1_epi16(2);
15007         let r = _mm_mask_srlv_epi16(a, 0, a, count);
15008         assert_eq_m128i(r, a);
15009         let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
15010         let e = _mm_set1_epi16(0);
15011         assert_eq_m128i(r, e);
15012     }
15013 
15014     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_srlv_epi16()15015     unsafe fn test_mm_maskz_srlv_epi16() {
15016         let a = _mm_set1_epi16(1 << 1);
15017         let count = _mm_set1_epi16(2);
15018         let r = _mm_maskz_srlv_epi16(0, a, count);
15019         assert_eq_m128i(r, _mm_setzero_si128());
15020         let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
15021         let e = _mm_set1_epi16(0);
15022         assert_eq_m128i(r, e);
15023     }
15024 
15025     #[simd_test(enable = "avx512bw")]
test_mm512_sra_epi16()15026     unsafe fn test_mm512_sra_epi16() {
15027         let a = _mm512_set1_epi16(8);
15028         let count = _mm_set1_epi16(1);
15029         let r = _mm512_sra_epi16(a, count);
15030         let e = _mm512_set1_epi16(0);
15031         assert_eq_m512i(r, e);
15032     }
15033 
15034     #[simd_test(enable = "avx512bw")]
test_mm512_mask_sra_epi16()15035     unsafe fn test_mm512_mask_sra_epi16() {
15036         let a = _mm512_set1_epi16(8);
15037         let count = _mm_set1_epi16(1);
15038         let r = _mm512_mask_sra_epi16(a, 0, a, count);
15039         assert_eq_m512i(r, a);
15040         let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15041         let e = _mm512_set1_epi16(0);
15042         assert_eq_m512i(r, e);
15043     }
15044 
15045     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_sra_epi16()15046     unsafe fn test_mm512_maskz_sra_epi16() {
15047         let a = _mm512_set1_epi16(8);
15048         let count = _mm_set1_epi16(1);
15049         let r = _mm512_maskz_sra_epi16(0, a, count);
15050         assert_eq_m512i(r, _mm512_setzero_si512());
15051         let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
15052         let e = _mm512_set1_epi16(0);
15053         assert_eq_m512i(r, e);
15054     }
15055 
15056     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_sra_epi16()15057     unsafe fn test_mm256_mask_sra_epi16() {
15058         let a = _mm256_set1_epi16(8);
15059         let count = _mm_set1_epi16(1);
15060         let r = _mm256_mask_sra_epi16(a, 0, a, count);
15061         assert_eq_m256i(r, a);
15062         let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
15063         let e = _mm256_set1_epi16(0);
15064         assert_eq_m256i(r, e);
15065     }
15066 
15067     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_sra_epi16()15068     unsafe fn test_mm256_maskz_sra_epi16() {
15069         let a = _mm256_set1_epi16(8);
15070         let count = _mm_set1_epi16(1);
15071         let r = _mm256_maskz_sra_epi16(0, a, count);
15072         assert_eq_m256i(r, _mm256_setzero_si256());
15073         let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
15074         let e = _mm256_set1_epi16(0);
15075         assert_eq_m256i(r, e);
15076     }
15077 
15078     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_sra_epi16()15079     unsafe fn test_mm_mask_sra_epi16() {
15080         let a = _mm_set1_epi16(8);
15081         let count = _mm_set1_epi16(1);
15082         let r = _mm_mask_sra_epi16(a, 0, a, count);
15083         assert_eq_m128i(r, a);
15084         let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
15085         let e = _mm_set1_epi16(0);
15086         assert_eq_m128i(r, e);
15087     }
15088 
15089     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_sra_epi16()15090     unsafe fn test_mm_maskz_sra_epi16() {
15091         let a = _mm_set1_epi16(8);
15092         let count = _mm_set1_epi16(1);
15093         let r = _mm_maskz_sra_epi16(0, a, count);
15094         assert_eq_m128i(r, _mm_setzero_si128());
15095         let r = _mm_maskz_sra_epi16(0b11111111, a, count);
15096         let e = _mm_set1_epi16(0);
15097         assert_eq_m128i(r, e);
15098     }
15099 
15100     #[simd_test(enable = "avx512bw")]
test_mm512_srai_epi16()15101     unsafe fn test_mm512_srai_epi16() {
15102         let a = _mm512_set1_epi16(8);
15103         let r = _mm512_srai_epi16::<2>(a);
15104         let e = _mm512_set1_epi16(2);
15105         assert_eq_m512i(r, e);
15106     }
15107 
15108     #[simd_test(enable = "avx512bw")]
test_mm512_mask_srai_epi16()15109     unsafe fn test_mm512_mask_srai_epi16() {
15110         let a = _mm512_set1_epi16(8);
15111         let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
15112         assert_eq_m512i(r, a);
15113         let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
15114         let e = _mm512_set1_epi16(2);
15115         assert_eq_m512i(r, e);
15116     }
15117 
15118     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_srai_epi16()15119     unsafe fn test_mm512_maskz_srai_epi16() {
15120         let a = _mm512_set1_epi16(8);
15121         let r = _mm512_maskz_srai_epi16::<2>(0, a);
15122         assert_eq_m512i(r, _mm512_setzero_si512());
15123         let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
15124         let e = _mm512_set1_epi16(2);
15125         assert_eq_m512i(r, e);
15126     }
15127 
15128     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_srai_epi16()15129     unsafe fn test_mm256_mask_srai_epi16() {
15130         let a = _mm256_set1_epi16(8);
15131         let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
15132         assert_eq_m256i(r, a);
15133         let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
15134         let e = _mm256_set1_epi16(2);
15135         assert_eq_m256i(r, e);
15136     }
15137 
15138     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_srai_epi16()15139     unsafe fn test_mm256_maskz_srai_epi16() {
15140         let a = _mm256_set1_epi16(8);
15141         let r = _mm256_maskz_srai_epi16::<2>(0, a);
15142         assert_eq_m256i(r, _mm256_setzero_si256());
15143         let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
15144         let e = _mm256_set1_epi16(2);
15145         assert_eq_m256i(r, e);
15146     }
15147 
15148     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_srai_epi16()15149     unsafe fn test_mm_mask_srai_epi16() {
15150         let a = _mm_set1_epi16(8);
15151         let r = _mm_mask_srai_epi16::<2>(a, 0, a);
15152         assert_eq_m128i(r, a);
15153         let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
15154         let e = _mm_set1_epi16(2);
15155         assert_eq_m128i(r, e);
15156     }
15157 
15158     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_srai_epi16()15159     unsafe fn test_mm_maskz_srai_epi16() {
15160         let a = _mm_set1_epi16(8);
15161         let r = _mm_maskz_srai_epi16::<2>(0, a);
15162         assert_eq_m128i(r, _mm_setzero_si128());
15163         let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
15164         let e = _mm_set1_epi16(2);
15165         assert_eq_m128i(r, e);
15166     }
15167 
15168     #[simd_test(enable = "avx512bw")]
test_mm512_srav_epi16()15169     unsafe fn test_mm512_srav_epi16() {
15170         let a = _mm512_set1_epi16(8);
15171         let count = _mm512_set1_epi16(2);
15172         let r = _mm512_srav_epi16(a, count);
15173         let e = _mm512_set1_epi16(2);
15174         assert_eq_m512i(r, e);
15175     }
15176 
15177     #[simd_test(enable = "avx512bw")]
test_mm512_mask_srav_epi16()15178     unsafe fn test_mm512_mask_srav_epi16() {
15179         let a = _mm512_set1_epi16(8);
15180         let count = _mm512_set1_epi16(2);
15181         let r = _mm512_mask_srav_epi16(a, 0, a, count);
15182         assert_eq_m512i(r, a);
15183         let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15184         let e = _mm512_set1_epi16(2);
15185         assert_eq_m512i(r, e);
15186     }
15187 
15188     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_srav_epi16()15189     unsafe fn test_mm512_maskz_srav_epi16() {
15190         let a = _mm512_set1_epi16(8);
15191         let count = _mm512_set1_epi16(2);
15192         let r = _mm512_maskz_srav_epi16(0, a, count);
15193         assert_eq_m512i(r, _mm512_setzero_si512());
15194         let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
15195         let e = _mm512_set1_epi16(2);
15196         assert_eq_m512i(r, e);
15197     }
15198 
15199     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_srav_epi16()15200     unsafe fn test_mm256_srav_epi16() {
15201         let a = _mm256_set1_epi16(8);
15202         let count = _mm256_set1_epi16(2);
15203         let r = _mm256_srav_epi16(a, count);
15204         let e = _mm256_set1_epi16(2);
15205         assert_eq_m256i(r, e);
15206     }
15207 
15208     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_srav_epi16()15209     unsafe fn test_mm256_mask_srav_epi16() {
15210         let a = _mm256_set1_epi16(8);
15211         let count = _mm256_set1_epi16(2);
15212         let r = _mm256_mask_srav_epi16(a, 0, a, count);
15213         assert_eq_m256i(r, a);
15214         let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
15215         let e = _mm256_set1_epi16(2);
15216         assert_eq_m256i(r, e);
15217     }
15218 
15219     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_srav_epi16()15220     unsafe fn test_mm256_maskz_srav_epi16() {
15221         let a = _mm256_set1_epi16(8);
15222         let count = _mm256_set1_epi16(2);
15223         let r = _mm256_maskz_srav_epi16(0, a, count);
15224         assert_eq_m256i(r, _mm256_setzero_si256());
15225         let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
15226         let e = _mm256_set1_epi16(2);
15227         assert_eq_m256i(r, e);
15228     }
15229 
15230     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_srav_epi16()15231     unsafe fn test_mm_srav_epi16() {
15232         let a = _mm_set1_epi16(8);
15233         let count = _mm_set1_epi16(2);
15234         let r = _mm_srav_epi16(a, count);
15235         let e = _mm_set1_epi16(2);
15236         assert_eq_m128i(r, e);
15237     }
15238 
15239     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_srav_epi16()15240     unsafe fn test_mm_mask_srav_epi16() {
15241         let a = _mm_set1_epi16(8);
15242         let count = _mm_set1_epi16(2);
15243         let r = _mm_mask_srav_epi16(a, 0, a, count);
15244         assert_eq_m128i(r, a);
15245         let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
15246         let e = _mm_set1_epi16(2);
15247         assert_eq_m128i(r, e);
15248     }
15249 
15250     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_srav_epi16()15251     unsafe fn test_mm_maskz_srav_epi16() {
15252         let a = _mm_set1_epi16(8);
15253         let count = _mm_set1_epi16(2);
15254         let r = _mm_maskz_srav_epi16(0, a, count);
15255         assert_eq_m128i(r, _mm_setzero_si128());
15256         let r = _mm_maskz_srav_epi16(0b11111111, a, count);
15257         let e = _mm_set1_epi16(2);
15258         assert_eq_m128i(r, e);
15259     }
15260 
15261     #[simd_test(enable = "avx512bw")]
test_mm512_permutex2var_epi16()15262     unsafe fn test_mm512_permutex2var_epi16() {
15263         #[rustfmt::skip]
15264         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15265                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15266         #[rustfmt::skip]
15267         let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15268                                    9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15269         let b = _mm512_set1_epi16(100);
15270         let r = _mm512_permutex2var_epi16(a, idx, b);
15271         #[rustfmt::skip]
15272         let e = _mm512_set_epi16(
15273             30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15274             22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15275         );
15276         assert_eq_m512i(r, e);
15277     }
15278 
15279     #[simd_test(enable = "avx512bw")]
test_mm512_mask_permutex2var_epi16()15280     unsafe fn test_mm512_mask_permutex2var_epi16() {
15281         #[rustfmt::skip]
15282         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15283                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15284         #[rustfmt::skip]
15285         let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15286                                    9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15287         let b = _mm512_set1_epi16(100);
15288         let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
15289         assert_eq_m512i(r, a);
15290         let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
15291         #[rustfmt::skip]
15292         let e = _mm512_set_epi16(
15293             30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15294             22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15295         );
15296         assert_eq_m512i(r, e);
15297     }
15298 
15299     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_permutex2var_epi16()15300     unsafe fn test_mm512_maskz_permutex2var_epi16() {
15301         #[rustfmt::skip]
15302         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15303                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15304         #[rustfmt::skip]
15305         let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15306                                    9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15307         let b = _mm512_set1_epi16(100);
15308         let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
15309         assert_eq_m512i(r, _mm512_setzero_si512());
15310         let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
15311         #[rustfmt::skip]
15312         let e = _mm512_set_epi16(
15313             30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15314             22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15315         );
15316         assert_eq_m512i(r, e);
15317     }
15318 
15319     #[simd_test(enable = "avx512bw")]
test_mm512_mask2_permutex2var_epi16()15320     unsafe fn test_mm512_mask2_permutex2var_epi16() {
15321         #[rustfmt::skip]
15322         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15323                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15324         #[rustfmt::skip]
15325         let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15326                                    9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15327         let b = _mm512_set1_epi16(100);
15328         let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
15329         assert_eq_m512i(r, idx);
15330         let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
15331         #[rustfmt::skip]
15332         let e = _mm512_set_epi16(
15333             30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15334             22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15335         );
15336         assert_eq_m512i(r, e);
15337     }
15338 
15339     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_permutex2var_epi16()15340     unsafe fn test_mm256_permutex2var_epi16() {
15341         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15342         #[rustfmt::skip]
15343         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15344         let b = _mm256_set1_epi16(100);
15345         let r = _mm256_permutex2var_epi16(a, idx, b);
15346         let e = _mm256_set_epi16(
15347             14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15348         );
15349         assert_eq_m256i(r, e);
15350     }
15351 
15352     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_permutex2var_epi16()15353     unsafe fn test_mm256_mask_permutex2var_epi16() {
15354         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15355         #[rustfmt::skip]
15356         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15357         let b = _mm256_set1_epi16(100);
15358         let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
15359         assert_eq_m256i(r, a);
15360         let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
15361         let e = _mm256_set_epi16(
15362             14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15363         );
15364         assert_eq_m256i(r, e);
15365     }
15366 
15367     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_permutex2var_epi16()15368     unsafe fn test_mm256_maskz_permutex2var_epi16() {
15369         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15370         #[rustfmt::skip]
15371         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15372         let b = _mm256_set1_epi16(100);
15373         let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
15374         assert_eq_m256i(r, _mm256_setzero_si256());
15375         let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
15376         let e = _mm256_set_epi16(
15377             14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15378         );
15379         assert_eq_m256i(r, e);
15380     }
15381 
15382     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask2_permutex2var_epi16()15383     unsafe fn test_mm256_mask2_permutex2var_epi16() {
15384         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15385         #[rustfmt::skip]
15386         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15387         let b = _mm256_set1_epi16(100);
15388         let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
15389         assert_eq_m256i(r, idx);
15390         let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
15391         #[rustfmt::skip]
15392         let e = _mm256_set_epi16(
15393             14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15394         );
15395         assert_eq_m256i(r, e);
15396     }
15397 
15398     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_permutex2var_epi16()15399     unsafe fn test_mm_permutex2var_epi16() {
15400         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15401         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
15402         let b = _mm_set1_epi16(100);
15403         let r = _mm_permutex2var_epi16(a, idx, b);
15404         let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
15405         assert_eq_m128i(r, e);
15406     }
15407 
15408     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_permutex2var_epi16()15409     unsafe fn test_mm_mask_permutex2var_epi16() {
15410         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15411         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
15412         let b = _mm_set1_epi16(100);
15413         let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
15414         assert_eq_m128i(r, a);
15415         let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
15416         let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
15417         assert_eq_m128i(r, e);
15418     }
15419 
15420     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_permutex2var_epi16()15421     unsafe fn test_mm_maskz_permutex2var_epi16() {
15422         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15423         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
15424         let b = _mm_set1_epi16(100);
15425         let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
15426         assert_eq_m128i(r, _mm_setzero_si128());
15427         let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
15428         let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
15429         assert_eq_m128i(r, e);
15430     }
15431 
15432     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask2_permutex2var_epi16()15433     unsafe fn test_mm_mask2_permutex2var_epi16() {
15434         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15435         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
15436         let b = _mm_set1_epi16(100);
15437         let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
15438         assert_eq_m128i(r, idx);
15439         let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
15440         let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
15441         assert_eq_m128i(r, e);
15442     }
15443 
15444     #[simd_test(enable = "avx512bw")]
test_mm512_permutexvar_epi16()15445     unsafe fn test_mm512_permutexvar_epi16() {
15446         let idx = _mm512_set1_epi16(1);
15447         #[rustfmt::skip]
15448         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15449                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15450         let r = _mm512_permutexvar_epi16(idx, a);
15451         let e = _mm512_set1_epi16(30);
15452         assert_eq_m512i(r, e);
15453     }
15454 
15455     #[simd_test(enable = "avx512bw")]
test_mm512_mask_permutexvar_epi16()15456     unsafe fn test_mm512_mask_permutexvar_epi16() {
15457         let idx = _mm512_set1_epi16(1);
15458         #[rustfmt::skip]
15459         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15460                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15461         let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
15462         assert_eq_m512i(r, a);
15463         let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
15464         let e = _mm512_set1_epi16(30);
15465         assert_eq_m512i(r, e);
15466     }
15467 
15468     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_permutexvar_epi16()15469     unsafe fn test_mm512_maskz_permutexvar_epi16() {
15470         let idx = _mm512_set1_epi16(1);
15471         #[rustfmt::skip]
15472         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
15473                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15474         let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
15475         assert_eq_m512i(r, _mm512_setzero_si512());
15476         let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
15477         let e = _mm512_set1_epi16(30);
15478         assert_eq_m512i(r, e);
15479     }
15480 
15481     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_permutexvar_epi16()15482     unsafe fn test_mm256_permutexvar_epi16() {
15483         let idx = _mm256_set1_epi16(1);
15484         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15485         let r = _mm256_permutexvar_epi16(idx, a);
15486         let e = _mm256_set1_epi16(14);
15487         assert_eq_m256i(r, e);
15488     }
15489 
15490     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_permutexvar_epi16()15491     unsafe fn test_mm256_mask_permutexvar_epi16() {
15492         let idx = _mm256_set1_epi16(1);
15493         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15494         let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
15495         assert_eq_m256i(r, a);
15496         let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
15497         let e = _mm256_set1_epi16(14);
15498         assert_eq_m256i(r, e);
15499     }
15500 
15501     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_permutexvar_epi16()15502     unsafe fn test_mm256_maskz_permutexvar_epi16() {
15503         let idx = _mm256_set1_epi16(1);
15504         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15505         let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
15506         assert_eq_m256i(r, _mm256_setzero_si256());
15507         let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
15508         let e = _mm256_set1_epi16(14);
15509         assert_eq_m256i(r, e);
15510     }
15511 
15512     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_permutexvar_epi16()15513     unsafe fn test_mm_permutexvar_epi16() {
15514         let idx = _mm_set1_epi16(1);
15515         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15516         let r = _mm_permutexvar_epi16(idx, a);
15517         let e = _mm_set1_epi16(6);
15518         assert_eq_m128i(r, e);
15519     }
15520 
15521     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_permutexvar_epi16()15522     unsafe fn test_mm_mask_permutexvar_epi16() {
15523         let idx = _mm_set1_epi16(1);
15524         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15525         let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
15526         assert_eq_m128i(r, a);
15527         let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
15528         let e = _mm_set1_epi16(6);
15529         assert_eq_m128i(r, e);
15530     }
15531 
15532     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_permutexvar_epi16()15533     unsafe fn test_mm_maskz_permutexvar_epi16() {
15534         let idx = _mm_set1_epi16(1);
15535         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15536         let r = _mm_maskz_permutexvar_epi16(0, idx, a);
15537         assert_eq_m128i(r, _mm_setzero_si128());
15538         let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
15539         let e = _mm_set1_epi16(6);
15540         assert_eq_m128i(r, e);
15541     }
15542 
15543     #[simd_test(enable = "avx512bw")]
test_mm512_mask_blend_epi16()15544     unsafe fn test_mm512_mask_blend_epi16() {
15545         let a = _mm512_set1_epi16(1);
15546         let b = _mm512_set1_epi16(2);
15547         let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
15548         #[rustfmt::skip]
15549         let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
15550                                  2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15551         assert_eq_m512i(r, e);
15552     }
15553 
15554     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_blend_epi16()15555     unsafe fn test_mm256_mask_blend_epi16() {
15556         let a = _mm256_set1_epi16(1);
15557         let b = _mm256_set1_epi16(2);
15558         let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
15559         let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15560         assert_eq_m256i(r, e);
15561     }
15562 
15563     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_blend_epi16()15564     unsafe fn test_mm_mask_blend_epi16() {
15565         let a = _mm_set1_epi16(1);
15566         let b = _mm_set1_epi16(2);
15567         let r = _mm_mask_blend_epi16(0b11110000, a, b);
15568         let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
15569         assert_eq_m128i(r, e);
15570     }
15571 
15572     #[simd_test(enable = "avx512bw")]
test_mm512_mask_blend_epi8()15573     unsafe fn test_mm512_mask_blend_epi8() {
15574         let a = _mm512_set1_epi8(1);
15575         let b = _mm512_set1_epi8(2);
15576         let r = _mm512_mask_blend_epi8(
15577             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
15578             a,
15579             b,
15580         );
15581         #[rustfmt::skip]
15582         let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
15583                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
15584                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
15585                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15586         assert_eq_m512i(r, e);
15587     }
15588 
15589     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_blend_epi8()15590     unsafe fn test_mm256_mask_blend_epi8() {
15591         let a = _mm256_set1_epi8(1);
15592         let b = _mm256_set1_epi8(2);
15593         let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
15594         #[rustfmt::skip]
15595         let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
15596                                 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15597         assert_eq_m256i(r, e);
15598     }
15599 
15600     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_blend_epi8()15601     unsafe fn test_mm_mask_blend_epi8() {
15602         let a = _mm_set1_epi8(1);
15603         let b = _mm_set1_epi8(2);
15604         let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
15605         let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
15606         assert_eq_m128i(r, e);
15607     }
15608 
15609     #[simd_test(enable = "avx512bw")]
test_mm512_broadcastw_epi16()15610     unsafe fn test_mm512_broadcastw_epi16() {
15611         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15612         let r = _mm512_broadcastw_epi16(a);
15613         let e = _mm512_set1_epi16(24);
15614         assert_eq_m512i(r, e);
15615     }
15616 
15617     #[simd_test(enable = "avx512bw")]
test_mm512_mask_broadcastw_epi16()15618     unsafe fn test_mm512_mask_broadcastw_epi16() {
15619         let src = _mm512_set1_epi16(1);
15620         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15621         let r = _mm512_mask_broadcastw_epi16(src, 0, a);
15622         assert_eq_m512i(r, src);
15623         let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
15624         let e = _mm512_set1_epi16(24);
15625         assert_eq_m512i(r, e);
15626     }
15627 
15628     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_broadcastw_epi16()15629     unsafe fn test_mm512_maskz_broadcastw_epi16() {
15630         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15631         let r = _mm512_maskz_broadcastw_epi16(0, a);
15632         assert_eq_m512i(r, _mm512_setzero_si512());
15633         let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
15634         let e = _mm512_set1_epi16(24);
15635         assert_eq_m512i(r, e);
15636     }
15637 
15638     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_broadcastw_epi16()15639     unsafe fn test_mm256_mask_broadcastw_epi16() {
15640         let src = _mm256_set1_epi16(1);
15641         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15642         let r = _mm256_mask_broadcastw_epi16(src, 0, a);
15643         assert_eq_m256i(r, src);
15644         let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
15645         let e = _mm256_set1_epi16(24);
15646         assert_eq_m256i(r, e);
15647     }
15648 
15649     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_broadcastw_epi16()15650     unsafe fn test_mm256_maskz_broadcastw_epi16() {
15651         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15652         let r = _mm256_maskz_broadcastw_epi16(0, a);
15653         assert_eq_m256i(r, _mm256_setzero_si256());
15654         let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
15655         let e = _mm256_set1_epi16(24);
15656         assert_eq_m256i(r, e);
15657     }
15658 
15659     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_broadcastw_epi16()15660     unsafe fn test_mm_mask_broadcastw_epi16() {
15661         let src = _mm_set1_epi16(1);
15662         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15663         let r = _mm_mask_broadcastw_epi16(src, 0, a);
15664         assert_eq_m128i(r, src);
15665         let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
15666         let e = _mm_set1_epi16(24);
15667         assert_eq_m128i(r, e);
15668     }
15669 
15670     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_broadcastw_epi16()15671     unsafe fn test_mm_maskz_broadcastw_epi16() {
15672         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
15673         let r = _mm_maskz_broadcastw_epi16(0, a);
15674         assert_eq_m128i(r, _mm_setzero_si128());
15675         let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
15676         let e = _mm_set1_epi16(24);
15677         assert_eq_m128i(r, e);
15678     }
15679 
15680     #[simd_test(enable = "avx512bw")]
test_mm512_broadcastb_epi8()15681     unsafe fn test_mm512_broadcastb_epi8() {
15682         let a = _mm_set_epi8(
15683             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15684         );
15685         let r = _mm512_broadcastb_epi8(a);
15686         let e = _mm512_set1_epi8(32);
15687         assert_eq_m512i(r, e);
15688     }
15689 
15690     #[simd_test(enable = "avx512bw")]
test_mm512_mask_broadcastb_epi8()15691     unsafe fn test_mm512_mask_broadcastb_epi8() {
15692         let src = _mm512_set1_epi8(1);
15693         let a = _mm_set_epi8(
15694             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15695         );
15696         let r = _mm512_mask_broadcastb_epi8(src, 0, a);
15697         assert_eq_m512i(r, src);
15698         let r = _mm512_mask_broadcastb_epi8(
15699             src,
15700             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
15701             a,
15702         );
15703         let e = _mm512_set1_epi8(32);
15704         assert_eq_m512i(r, e);
15705     }
15706 
15707     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_broadcastb_epi8()15708     unsafe fn test_mm512_maskz_broadcastb_epi8() {
15709         let a = _mm_set_epi8(
15710             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15711         );
15712         let r = _mm512_maskz_broadcastb_epi8(0, a);
15713         assert_eq_m512i(r, _mm512_setzero_si512());
15714         let r = _mm512_maskz_broadcastb_epi8(
15715             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
15716             a,
15717         );
15718         let e = _mm512_set1_epi8(32);
15719         assert_eq_m512i(r, e);
15720     }
15721 
15722     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_broadcastb_epi8()15723     unsafe fn test_mm256_mask_broadcastb_epi8() {
15724         let src = _mm256_set1_epi8(1);
15725         let a = _mm_set_epi8(
15726             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15727         );
15728         let r = _mm256_mask_broadcastb_epi8(src, 0, a);
15729         assert_eq_m256i(r, src);
15730         let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
15731         let e = _mm256_set1_epi8(32);
15732         assert_eq_m256i(r, e);
15733     }
15734 
15735     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_broadcastb_epi8()15736     unsafe fn test_mm256_maskz_broadcastb_epi8() {
15737         let a = _mm_set_epi8(
15738             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15739         );
15740         let r = _mm256_maskz_broadcastb_epi8(0, a);
15741         assert_eq_m256i(r, _mm256_setzero_si256());
15742         let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
15743         let e = _mm256_set1_epi8(32);
15744         assert_eq_m256i(r, e);
15745     }
15746 
15747     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_broadcastb_epi8()15748     unsafe fn test_mm_mask_broadcastb_epi8() {
15749         let src = _mm_set1_epi8(1);
15750         let a = _mm_set_epi8(
15751             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15752         );
15753         let r = _mm_mask_broadcastb_epi8(src, 0, a);
15754         assert_eq_m128i(r, src);
15755         let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
15756         let e = _mm_set1_epi8(32);
15757         assert_eq_m128i(r, e);
15758     }
15759 
15760     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_broadcastb_epi8()15761     unsafe fn test_mm_maskz_broadcastb_epi8() {
15762         let a = _mm_set_epi8(
15763             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15764         );
15765         let r = _mm_maskz_broadcastb_epi8(0, a);
15766         assert_eq_m128i(r, _mm_setzero_si128());
15767         let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
15768         let e = _mm_set1_epi8(32);
15769         assert_eq_m128i(r, e);
15770     }
15771 
15772     #[simd_test(enable = "avx512bw")]
test_mm512_unpackhi_epi16()15773     unsafe fn test_mm512_unpackhi_epi16() {
15774         #[rustfmt::skip]
15775         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15776                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
15777         #[rustfmt::skip]
15778         let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15779                                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
15780         let r = _mm512_unpackhi_epi16(a, b);
15781         #[rustfmt::skip]
15782         let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
15783                                  49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
15784         assert_eq_m512i(r, e);
15785     }
15786 
15787     #[simd_test(enable = "avx512bw")]
test_mm512_mask_unpackhi_epi16()15788     unsafe fn test_mm512_mask_unpackhi_epi16() {
15789         #[rustfmt::skip]
15790         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15791                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
15792         #[rustfmt::skip]
15793         let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15794                                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
15795         let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
15796         assert_eq_m512i(r, a);
15797         let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
15798         #[rustfmt::skip]
15799         let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
15800                                  49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
15801         assert_eq_m512i(r, e);
15802     }
15803 
15804     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_unpackhi_epi16()15805     unsafe fn test_mm512_maskz_unpackhi_epi16() {
15806         #[rustfmt::skip]
15807         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15808                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
15809         #[rustfmt::skip]
15810         let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15811                                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
15812         let r = _mm512_maskz_unpackhi_epi16(0, a, b);
15813         assert_eq_m512i(r, _mm512_setzero_si512());
15814         let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
15815         #[rustfmt::skip]
15816         let e = _mm512_set_epi16(33, 1,  34, 2,  35, 3,  36, 4,  41, 9,  42, 10, 43, 11, 44, 12,
15817                                  49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
15818         assert_eq_m512i(r, e);
15819     }
15820 
15821     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_unpackhi_epi16()15822     unsafe fn test_mm256_mask_unpackhi_epi16() {
15823         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
15824         let b = _mm256_set_epi16(
15825             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15826         );
15827         let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
15828         assert_eq_m256i(r, a);
15829         let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
15830         let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
15831         assert_eq_m256i(r, e);
15832     }
15833 
15834     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_unpackhi_epi16()15835     unsafe fn test_mm256_maskz_unpackhi_epi16() {
15836         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
15837         let b = _mm256_set_epi16(
15838             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15839         );
15840         let r = _mm256_maskz_unpackhi_epi16(0, a, b);
15841         assert_eq_m256i(r, _mm256_setzero_si256());
15842         let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
15843         let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
15844         assert_eq_m256i(r, e);
15845     }
15846 
15847     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_unpackhi_epi16()15848     unsafe fn test_mm_mask_unpackhi_epi16() {
15849         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
15850         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
15851         let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
15852         assert_eq_m128i(r, a);
15853         let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
15854         let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
15855         assert_eq_m128i(r, e);
15856     }
15857 
15858     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_unpackhi_epi16()15859     unsafe fn test_mm_maskz_unpackhi_epi16() {
15860         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
15861         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
15862         let r = _mm_maskz_unpackhi_epi16(0, a, b);
15863         assert_eq_m128i(r, _mm_setzero_si128());
15864         let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
15865         let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
15866         assert_eq_m128i(r, e);
15867     }
15868 
15869     #[simd_test(enable = "avx512bw")]
test_mm512_unpackhi_epi8()15870     unsafe fn test_mm512_unpackhi_epi8() {
15871         #[rustfmt::skip]
15872         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15873                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15874                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15875                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
15876         #[rustfmt::skip]
15877         let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
15878                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
15879                                 97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
15880                                 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
15881         let r = _mm512_unpackhi_epi8(a, b);
15882         #[rustfmt::skip]
15883         let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
15884                                 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
15885                                 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
15886                                 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
15887         assert_eq_m512i(r, e);
15888     }
15889 
15890     #[simd_test(enable = "avx512bw")]
test_mm512_mask_unpackhi_epi8()15891     unsafe fn test_mm512_mask_unpackhi_epi8() {
15892         #[rustfmt::skip]
15893         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15894                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15895                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15896                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
15897         #[rustfmt::skip]
15898         let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
15899                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
15900                                 97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
15901                                 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
15902         let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
15903         assert_eq_m512i(r, a);
15904         let r = _mm512_mask_unpackhi_epi8(
15905             a,
15906             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
15907             a,
15908             b,
15909         );
15910         #[rustfmt::skip]
15911         let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
15912                                 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
15913                                 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
15914                                 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
15915         assert_eq_m512i(r, e);
15916     }
15917 
15918     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_unpackhi_epi8()15919     unsafe fn test_mm512_maskz_unpackhi_epi8() {
15920         #[rustfmt::skip]
15921         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15922                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
15923                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
15924                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
15925         #[rustfmt::skip]
15926         let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
15927                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
15928                                 97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
15929                                 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
15930         let r = _mm512_maskz_unpackhi_epi8(0, a, b);
15931         assert_eq_m512i(r, _mm512_setzero_si512());
15932         let r = _mm512_maskz_unpackhi_epi8(
15933             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
15934             a,
15935             b,
15936         );
15937         #[rustfmt::skip]
15938         let e = _mm512_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
15939                                 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
15940                                 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
15941                                 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
15942         assert_eq_m512i(r, e);
15943     }
15944 
15945     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_unpackhi_epi8()15946     unsafe fn test_mm256_mask_unpackhi_epi8() {
15947         #[rustfmt::skip]
15948         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15949                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
15950         #[rustfmt::skip]
15951         let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
15952                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
15953         let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
15954         assert_eq_m256i(r, a);
15955         let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
15956         #[rustfmt::skip]
15957         let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
15958                                 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
15959         assert_eq_m256i(r, e);
15960     }
15961 
15962     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_unpackhi_epi8()15963     unsafe fn test_mm256_maskz_unpackhi_epi8() {
15964         #[rustfmt::skip]
15965         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
15966                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
15967         #[rustfmt::skip]
15968         let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
15969                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
15970         let r = _mm256_maskz_unpackhi_epi8(0, a, b);
15971         assert_eq_m256i(r, _mm256_setzero_si256());
15972         let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
15973         #[rustfmt::skip]
15974         let e = _mm256_set_epi8(65, 1,  66, 2,  67, 3,  68, 4,  69, 5,  70, 6,  71, 7,  72, 8,
15975                                 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
15976         assert_eq_m256i(r, e);
15977     }
15978 
15979     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_unpackhi_epi8()15980     unsafe fn test_mm_mask_unpackhi_epi8() {
15981         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
15982         let b = _mm_set_epi8(
15983             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
15984         );
15985         let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
15986         assert_eq_m128i(r, a);
15987         let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
15988         let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
15989         assert_eq_m128i(r, e);
15990     }
15991 
15992     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_unpackhi_epi8()15993     unsafe fn test_mm_maskz_unpackhi_epi8() {
15994         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
15995         let b = _mm_set_epi8(
15996             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
15997         );
15998         let r = _mm_maskz_unpackhi_epi8(0, a, b);
15999         assert_eq_m128i(r, _mm_setzero_si128());
16000         let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
16001         let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
16002         assert_eq_m128i(r, e);
16003     }
16004 
16005     #[simd_test(enable = "avx512bw")]
test_mm512_unpacklo_epi16()16006     unsafe fn test_mm512_unpacklo_epi16() {
16007         #[rustfmt::skip]
16008         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16009                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16010         #[rustfmt::skip]
16011         let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16012                                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16013         let r = _mm512_unpacklo_epi16(a, b);
16014         #[rustfmt::skip]
16015         let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
16016                                  53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
16017         assert_eq_m512i(r, e);
16018     }
16019 
16020     #[simd_test(enable = "avx512bw")]
test_mm512_mask_unpacklo_epi16()16021     unsafe fn test_mm512_mask_unpacklo_epi16() {
16022         #[rustfmt::skip]
16023         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16024                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16025         #[rustfmt::skip]
16026         let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16027                                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16028         let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
16029         assert_eq_m512i(r, a);
16030         let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
16031         #[rustfmt::skip]
16032         let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
16033                                  53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
16034         assert_eq_m512i(r, e);
16035     }
16036 
16037     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_unpacklo_epi16()16038     unsafe fn test_mm512_maskz_unpacklo_epi16() {
16039         #[rustfmt::skip]
16040         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16041                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16042         #[rustfmt::skip]
16043         let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16044                                  49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16045         let r = _mm512_maskz_unpacklo_epi16(0, a, b);
16046         assert_eq_m512i(r, _mm512_setzero_si512());
16047         let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
16048         #[rustfmt::skip]
16049         let e = _mm512_set_epi16(37, 5,  38, 6,  39, 7,  40, 8,  45, 13, 46, 14, 47, 15, 48, 16,
16050                                  53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
16051         assert_eq_m512i(r, e);
16052     }
16053 
16054     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_unpacklo_epi16()16055     unsafe fn test_mm256_mask_unpacklo_epi16() {
16056         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16057         let b = _mm256_set_epi16(
16058             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16059         );
16060         let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
16061         assert_eq_m256i(r, a);
16062         let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
16063         let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
16064         assert_eq_m256i(r, e);
16065     }
16066 
16067     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_unpacklo_epi16()16068     unsafe fn test_mm256_maskz_unpacklo_epi16() {
16069         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16070         let b = _mm256_set_epi16(
16071             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16072         );
16073         let r = _mm256_maskz_unpacklo_epi16(0, a, b);
16074         assert_eq_m256i(r, _mm256_setzero_si256());
16075         let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
16076         let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
16077         assert_eq_m256i(r, e);
16078     }
16079 
16080     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_unpacklo_epi16()16081     unsafe fn test_mm_mask_unpacklo_epi16() {
16082         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
16083         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
16084         let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
16085         assert_eq_m128i(r, a);
16086         let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
16087         let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
16088         assert_eq_m128i(r, e);
16089     }
16090 
16091     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_unpacklo_epi16()16092     unsafe fn test_mm_maskz_unpacklo_epi16() {
16093         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
16094         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
16095         let r = _mm_maskz_unpacklo_epi16(0, a, b);
16096         assert_eq_m128i(r, _mm_setzero_si128());
16097         let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
16098         let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
16099         assert_eq_m128i(r, e);
16100     }
16101 
16102     #[simd_test(enable = "avx512bw")]
test_mm512_unpacklo_epi8()16103     unsafe fn test_mm512_unpacklo_epi8() {
16104         #[rustfmt::skip]
16105         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16106                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16107                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16108                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16109         #[rustfmt::skip]
16110         let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
16111                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
16112                                 97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16113                                 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16114         let r = _mm512_unpacklo_epi8(a, b);
16115         #[rustfmt::skip]
16116         let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
16117                                 89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
16118                                 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
16119                                 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
16120         assert_eq_m512i(r, e);
16121     }
16122 
16123     #[simd_test(enable = "avx512bw")]
test_mm512_mask_unpacklo_epi8()16124     unsafe fn test_mm512_mask_unpacklo_epi8() {
16125         #[rustfmt::skip]
16126         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16127                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16128                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16129                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16130         #[rustfmt::skip]
16131         let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
16132                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
16133                                 97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16134                                 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16135         let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
16136         assert_eq_m512i(r, a);
16137         let r = _mm512_mask_unpacklo_epi8(
16138             a,
16139             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16140             a,
16141             b,
16142         );
16143         #[rustfmt::skip]
16144         let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
16145                                 89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
16146                                 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
16147                                 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
16148         assert_eq_m512i(r, e);
16149     }
16150 
16151     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_unpacklo_epi8()16152     unsafe fn test_mm512_maskz_unpacklo_epi8() {
16153         #[rustfmt::skip]
16154         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16155                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16156                                 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16157                                 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16158         #[rustfmt::skip]
16159         let b = _mm512_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
16160                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,
16161                                 97,  98,  99,  100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16162                                 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16163         let r = _mm512_maskz_unpacklo_epi8(0, a, b);
16164         assert_eq_m512i(r, _mm512_setzero_si512());
16165         let r = _mm512_maskz_unpacklo_epi8(
16166             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16167             a,
16168             b,
16169         );
16170         #[rustfmt::skip]
16171         let e = _mm512_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
16172                                 89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32,
16173                                 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
16174                                 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0,   64);
16175         assert_eq_m512i(r, e);
16176     }
16177 
16178     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_unpacklo_epi8()16179     unsafe fn test_mm256_mask_unpacklo_epi8() {
16180         #[rustfmt::skip]
16181         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16182                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16183         #[rustfmt::skip]
16184         let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
16185                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
16186         let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
16187         assert_eq_m256i(r, a);
16188         let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
16189         #[rustfmt::skip]
16190         let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
16191                                 89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
16192         assert_eq_m256i(r, e);
16193     }
16194 
16195     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_unpacklo_epi8()16196     unsafe fn test_mm256_maskz_unpacklo_epi8() {
16197         #[rustfmt::skip]
16198         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
16199                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16200         #[rustfmt::skip]
16201         let b = _mm256_set_epi8(65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,
16202                                 81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96);
16203         let r = _mm256_maskz_unpacklo_epi8(0, a, b);
16204         assert_eq_m256i(r, _mm256_setzero_si256());
16205         let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
16206         #[rustfmt::skip]
16207         let e = _mm256_set_epi8(73,  9,  74,  10, 75,  11, 76,  12, 77,  13, 78,  14, 79,  15, 80,  16,
16208                                 89,  25, 90,  26, 91,  27, 92,  28, 93,  29, 94,  30, 95,  31, 96,  32);
16209         assert_eq_m256i(r, e);
16210     }
16211 
16212     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_unpacklo_epi8()16213     unsafe fn test_mm_mask_unpacklo_epi8() {
16214         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16215         let b = _mm_set_epi8(
16216             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16217         );
16218         let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
16219         assert_eq_m128i(r, a);
16220         let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
16221         let e = _mm_set_epi8(
16222             73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16223         );
16224         assert_eq_m128i(r, e);
16225     }
16226 
16227     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_unpacklo_epi8()16228     unsafe fn test_mm_maskz_unpacklo_epi8() {
16229         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16230         let b = _mm_set_epi8(
16231             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16232         );
16233         let r = _mm_maskz_unpacklo_epi8(0, a, b);
16234         assert_eq_m128i(r, _mm_setzero_si128());
16235         let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
16236         let e = _mm_set_epi8(
16237             73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16238         );
16239         assert_eq_m128i(r, e);
16240     }
16241 
16242     #[simd_test(enable = "avx512bw")]
test_mm512_mask_mov_epi16()16243     unsafe fn test_mm512_mask_mov_epi16() {
16244         let src = _mm512_set1_epi16(1);
16245         let a = _mm512_set1_epi16(2);
16246         let r = _mm512_mask_mov_epi16(src, 0, a);
16247         assert_eq_m512i(r, src);
16248         let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
16249         assert_eq_m512i(r, a);
16250     }
16251 
16252     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_mov_epi16()16253     unsafe fn test_mm512_maskz_mov_epi16() {
16254         let a = _mm512_set1_epi16(2);
16255         let r = _mm512_maskz_mov_epi16(0, a);
16256         assert_eq_m512i(r, _mm512_setzero_si512());
16257         let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
16258         assert_eq_m512i(r, a);
16259     }
16260 
16261     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_mov_epi16()16262     unsafe fn test_mm256_mask_mov_epi16() {
16263         let src = _mm256_set1_epi16(1);
16264         let a = _mm256_set1_epi16(2);
16265         let r = _mm256_mask_mov_epi16(src, 0, a);
16266         assert_eq_m256i(r, src);
16267         let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
16268         assert_eq_m256i(r, a);
16269     }
16270 
16271     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_mov_epi16()16272     unsafe fn test_mm256_maskz_mov_epi16() {
16273         let a = _mm256_set1_epi16(2);
16274         let r = _mm256_maskz_mov_epi16(0, a);
16275         assert_eq_m256i(r, _mm256_setzero_si256());
16276         let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
16277         assert_eq_m256i(r, a);
16278     }
16279 
16280     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_mov_epi16()16281     unsafe fn test_mm_mask_mov_epi16() {
16282         let src = _mm_set1_epi16(1);
16283         let a = _mm_set1_epi16(2);
16284         let r = _mm_mask_mov_epi16(src, 0, a);
16285         assert_eq_m128i(r, src);
16286         let r = _mm_mask_mov_epi16(src, 0b11111111, a);
16287         assert_eq_m128i(r, a);
16288     }
16289 
16290     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_mov_epi16()16291     unsafe fn test_mm_maskz_mov_epi16() {
16292         let a = _mm_set1_epi16(2);
16293         let r = _mm_maskz_mov_epi16(0, a);
16294         assert_eq_m128i(r, _mm_setzero_si128());
16295         let r = _mm_maskz_mov_epi16(0b11111111, a);
16296         assert_eq_m128i(r, a);
16297     }
16298 
16299     #[simd_test(enable = "avx512bw")]
test_mm512_mask_mov_epi8()16300     unsafe fn test_mm512_mask_mov_epi8() {
16301         let src = _mm512_set1_epi8(1);
16302         let a = _mm512_set1_epi8(2);
16303         let r = _mm512_mask_mov_epi8(src, 0, a);
16304         assert_eq_m512i(r, src);
16305         let r = _mm512_mask_mov_epi8(
16306             src,
16307             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16308             a,
16309         );
16310         assert_eq_m512i(r, a);
16311     }
16312 
16313     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_mov_epi8()16314     unsafe fn test_mm512_maskz_mov_epi8() {
16315         let a = _mm512_set1_epi8(2);
16316         let r = _mm512_maskz_mov_epi8(0, a);
16317         assert_eq_m512i(r, _mm512_setzero_si512());
16318         let r = _mm512_maskz_mov_epi8(
16319             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16320             a,
16321         );
16322         assert_eq_m512i(r, a);
16323     }
16324 
16325     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_mov_epi8()16326     unsafe fn test_mm256_mask_mov_epi8() {
16327         let src = _mm256_set1_epi8(1);
16328         let a = _mm256_set1_epi8(2);
16329         let r = _mm256_mask_mov_epi8(src, 0, a);
16330         assert_eq_m256i(r, src);
16331         let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
16332         assert_eq_m256i(r, a);
16333     }
16334 
16335     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_mov_epi8()16336     unsafe fn test_mm256_maskz_mov_epi8() {
16337         let a = _mm256_set1_epi8(2);
16338         let r = _mm256_maskz_mov_epi8(0, a);
16339         assert_eq_m256i(r, _mm256_setzero_si256());
16340         let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
16341         assert_eq_m256i(r, a);
16342     }
16343 
16344     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_mov_epi8()16345     unsafe fn test_mm_mask_mov_epi8() {
16346         let src = _mm_set1_epi8(1);
16347         let a = _mm_set1_epi8(2);
16348         let r = _mm_mask_mov_epi8(src, 0, a);
16349         assert_eq_m128i(r, src);
16350         let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
16351         assert_eq_m128i(r, a);
16352     }
16353 
16354     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_mov_epi8()16355     unsafe fn test_mm_maskz_mov_epi8() {
16356         let a = _mm_set1_epi8(2);
16357         let r = _mm_maskz_mov_epi8(0, a);
16358         assert_eq_m128i(r, _mm_setzero_si128());
16359         let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
16360         assert_eq_m128i(r, a);
16361     }
16362 
16363     #[simd_test(enable = "avx512bw")]
test_mm512_mask_set1_epi16()16364     unsafe fn test_mm512_mask_set1_epi16() {
16365         let src = _mm512_set1_epi16(2);
16366         let a: i16 = 11;
16367         let r = _mm512_mask_set1_epi16(src, 0, a);
16368         assert_eq_m512i(r, src);
16369         let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
16370         let e = _mm512_set1_epi16(11);
16371         assert_eq_m512i(r, e);
16372     }
16373 
16374     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_set1_epi16()16375     unsafe fn test_mm512_maskz_set1_epi16() {
16376         let a: i16 = 11;
16377         let r = _mm512_maskz_set1_epi16(0, a);
16378         assert_eq_m512i(r, _mm512_setzero_si512());
16379         let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
16380         let e = _mm512_set1_epi16(11);
16381         assert_eq_m512i(r, e);
16382     }
16383 
16384     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_set1_epi16()16385     unsafe fn test_mm256_mask_set1_epi16() {
16386         let src = _mm256_set1_epi16(2);
16387         let a: i16 = 11;
16388         let r = _mm256_mask_set1_epi16(src, 0, a);
16389         assert_eq_m256i(r, src);
16390         let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
16391         let e = _mm256_set1_epi16(11);
16392         assert_eq_m256i(r, e);
16393     }
16394 
16395     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_set1_epi16()16396     unsafe fn test_mm256_maskz_set1_epi16() {
16397         let a: i16 = 11;
16398         let r = _mm256_maskz_set1_epi16(0, a);
16399         assert_eq_m256i(r, _mm256_setzero_si256());
16400         let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
16401         let e = _mm256_set1_epi16(11);
16402         assert_eq_m256i(r, e);
16403     }
16404 
16405     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_set1_epi16()16406     unsafe fn test_mm_mask_set1_epi16() {
16407         let src = _mm_set1_epi16(2);
16408         let a: i16 = 11;
16409         let r = _mm_mask_set1_epi16(src, 0, a);
16410         assert_eq_m128i(r, src);
16411         let r = _mm_mask_set1_epi16(src, 0b11111111, a);
16412         let e = _mm_set1_epi16(11);
16413         assert_eq_m128i(r, e);
16414     }
16415 
16416     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_set1_epi16()16417     unsafe fn test_mm_maskz_set1_epi16() {
16418         let a: i16 = 11;
16419         let r = _mm_maskz_set1_epi16(0, a);
16420         assert_eq_m128i(r, _mm_setzero_si128());
16421         let r = _mm_maskz_set1_epi16(0b11111111, a);
16422         let e = _mm_set1_epi16(11);
16423         assert_eq_m128i(r, e);
16424     }
16425 
16426     #[simd_test(enable = "avx512bw")]
test_mm512_mask_set1_epi8()16427     unsafe fn test_mm512_mask_set1_epi8() {
16428         let src = _mm512_set1_epi8(2);
16429         let a: i8 = 11;
16430         let r = _mm512_mask_set1_epi8(src, 0, a);
16431         assert_eq_m512i(r, src);
16432         let r = _mm512_mask_set1_epi8(
16433             src,
16434             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16435             a,
16436         );
16437         let e = _mm512_set1_epi8(11);
16438         assert_eq_m512i(r, e);
16439     }
16440 
16441     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_set1_epi8()16442     unsafe fn test_mm512_maskz_set1_epi8() {
16443         let a: i8 = 11;
16444         let r = _mm512_maskz_set1_epi8(0, a);
16445         assert_eq_m512i(r, _mm512_setzero_si512());
16446         let r = _mm512_maskz_set1_epi8(
16447             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16448             a,
16449         );
16450         let e = _mm512_set1_epi8(11);
16451         assert_eq_m512i(r, e);
16452     }
16453 
16454     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_set1_epi8()16455     unsafe fn test_mm256_mask_set1_epi8() {
16456         let src = _mm256_set1_epi8(2);
16457         let a: i8 = 11;
16458         let r = _mm256_mask_set1_epi8(src, 0, a);
16459         assert_eq_m256i(r, src);
16460         let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
16461         let e = _mm256_set1_epi8(11);
16462         assert_eq_m256i(r, e);
16463     }
16464 
16465     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_set1_epi8()16466     unsafe fn test_mm256_maskz_set1_epi8() {
16467         let a: i8 = 11;
16468         let r = _mm256_maskz_set1_epi8(0, a);
16469         assert_eq_m256i(r, _mm256_setzero_si256());
16470         let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
16471         let e = _mm256_set1_epi8(11);
16472         assert_eq_m256i(r, e);
16473     }
16474 
16475     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_set1_epi8()16476     unsafe fn test_mm_mask_set1_epi8() {
16477         let src = _mm_set1_epi8(2);
16478         let a: i8 = 11;
16479         let r = _mm_mask_set1_epi8(src, 0, a);
16480         assert_eq_m128i(r, src);
16481         let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
16482         let e = _mm_set1_epi8(11);
16483         assert_eq_m128i(r, e);
16484     }
16485 
16486     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_set1_epi8()16487     unsafe fn test_mm_maskz_set1_epi8() {
16488         let a: i8 = 11;
16489         let r = _mm_maskz_set1_epi8(0, a);
16490         assert_eq_m128i(r, _mm_setzero_si128());
16491         let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
16492         let e = _mm_set1_epi8(11);
16493         assert_eq_m128i(r, e);
16494     }
16495 
16496     #[simd_test(enable = "avx512bw")]
test_mm512_shufflelo_epi16()16497     unsafe fn test_mm512_shufflelo_epi16() {
16498         #[rustfmt::skip]
16499         let a = _mm512_set_epi16(
16500             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16501             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16502         );
16503         #[rustfmt::skip]
16504         let e = _mm512_set_epi16(
16505             0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
16506             16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
16507         );
16508         let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
16509         assert_eq_m512i(r, e);
16510     }
16511 
16512     #[simd_test(enable = "avx512bw")]
test_mm512_mask_shufflelo_epi16()16513     unsafe fn test_mm512_mask_shufflelo_epi16() {
16514         #[rustfmt::skip]
16515         let a = _mm512_set_epi16(
16516             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16517             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16518         );
16519         let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
16520         assert_eq_m512i(r, a);
16521         let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
16522             a,
16523             0b11111111_11111111_11111111_11111111,
16524             a,
16525         );
16526         #[rustfmt::skip]
16527         let e = _mm512_set_epi16(
16528             0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
16529             16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
16530         );
16531         assert_eq_m512i(r, e);
16532     }
16533 
16534     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_shufflelo_epi16()16535     unsafe fn test_mm512_maskz_shufflelo_epi16() {
16536         #[rustfmt::skip]
16537         let a = _mm512_set_epi16(
16538             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16539             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16540         );
16541         let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
16542         assert_eq_m512i(r, _mm512_setzero_si512());
16543         let r =
16544             _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
16545         #[rustfmt::skip]
16546         let e = _mm512_set_epi16(
16547             0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
16548             16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
16549         );
16550         assert_eq_m512i(r, e);
16551     }
16552 
16553     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_shufflelo_epi16()16554     unsafe fn test_mm256_mask_shufflelo_epi16() {
16555         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16556         let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
16557         assert_eq_m256i(r, a);
16558         let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
16559         let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
16560         assert_eq_m256i(r, e);
16561     }
16562 
16563     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_shufflelo_epi16()16564     unsafe fn test_mm256_maskz_shufflelo_epi16() {
16565         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16566         let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
16567         assert_eq_m256i(r, _mm256_setzero_si256());
16568         let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
16569         let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
16570         assert_eq_m256i(r, e);
16571     }
16572 
16573     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_shufflelo_epi16()16574     unsafe fn test_mm_mask_shufflelo_epi16() {
16575         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16576         let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
16577         assert_eq_m128i(r, a);
16578         let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
16579         let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
16580         assert_eq_m128i(r, e);
16581     }
16582 
16583     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_shufflelo_epi16()16584     unsafe fn test_mm_maskz_shufflelo_epi16() {
16585         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16586         let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
16587         assert_eq_m128i(r, _mm_setzero_si128());
16588         let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
16589         let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
16590         assert_eq_m128i(r, e);
16591     }
16592 
16593     #[simd_test(enable = "avx512bw")]
test_mm512_shufflehi_epi16()16594     unsafe fn test_mm512_shufflehi_epi16() {
16595         #[rustfmt::skip]
16596         let a = _mm512_set_epi16(
16597             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16598             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16599         );
16600         #[rustfmt::skip]
16601         let e = _mm512_set_epi16(
16602             3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
16603             19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
16604         );
16605         let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
16606         assert_eq_m512i(r, e);
16607     }
16608 
16609     #[simd_test(enable = "avx512bw")]
test_mm512_mask_shufflehi_epi16()16610     unsafe fn test_mm512_mask_shufflehi_epi16() {
16611         #[rustfmt::skip]
16612         let a = _mm512_set_epi16(
16613             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16614             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16615         );
16616         let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
16617         assert_eq_m512i(r, a);
16618         let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
16619             a,
16620             0b11111111_11111111_11111111_11111111,
16621             a,
16622         );
16623         #[rustfmt::skip]
16624         let e = _mm512_set_epi16(
16625             3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
16626             19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
16627         );
16628         assert_eq_m512i(r, e);
16629     }
16630 
16631     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_shufflehi_epi16()16632     unsafe fn test_mm512_maskz_shufflehi_epi16() {
16633         #[rustfmt::skip]
16634         let a = _mm512_set_epi16(
16635             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16636             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16637         );
16638         let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
16639         assert_eq_m512i(r, _mm512_setzero_si512());
16640         let r =
16641             _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
16642         #[rustfmt::skip]
16643         let e = _mm512_set_epi16(
16644             3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
16645             19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
16646         );
16647         assert_eq_m512i(r, e);
16648     }
16649 
16650     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_shufflehi_epi16()16651     unsafe fn test_mm256_mask_shufflehi_epi16() {
16652         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16653         let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
16654         assert_eq_m256i(r, a);
16655         let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
16656         let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
16657         assert_eq_m256i(r, e);
16658     }
16659 
16660     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_shufflehi_epi16()16661     unsafe fn test_mm256_maskz_shufflehi_epi16() {
16662         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16663         let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
16664         assert_eq_m256i(r, _mm256_setzero_si256());
16665         let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
16666         let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
16667         assert_eq_m256i(r, e);
16668     }
16669 
16670     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_shufflehi_epi16()16671     unsafe fn test_mm_mask_shufflehi_epi16() {
16672         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16673         let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
16674         assert_eq_m128i(r, a);
16675         let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
16676         let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
16677         assert_eq_m128i(r, e);
16678     }
16679 
16680     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_shufflehi_epi16()16681     unsafe fn test_mm_maskz_shufflehi_epi16() {
16682         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16683         let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
16684         assert_eq_m128i(r, _mm_setzero_si128());
16685         let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
16686         let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
16687         assert_eq_m128i(r, e);
16688     }
16689 
16690     #[simd_test(enable = "avx512bw")]
test_mm512_shuffle_epi8()16691     unsafe fn test_mm512_shuffle_epi8() {
16692         #[rustfmt::skip]
16693         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
16694                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16695                                 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
16696                                 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
16697         let b = _mm512_set1_epi8(1);
16698         let r = _mm512_shuffle_epi8(a, b);
16699         #[rustfmt::skip]
16700         let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16701                                 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
16702                                 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
16703                                 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
16704         assert_eq_m512i(r, e);
16705     }
16706 
16707     #[simd_test(enable = "avx512bw")]
test_mm512_mask_shuffle_epi8()16708     unsafe fn test_mm512_mask_shuffle_epi8() {
16709         #[rustfmt::skip]
16710         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
16711                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16712                                 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
16713                                 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
16714         let b = _mm512_set1_epi8(1);
16715         let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
16716         assert_eq_m512i(r, a);
16717         let r = _mm512_mask_shuffle_epi8(
16718             a,
16719             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16720             a,
16721             b,
16722         );
16723         #[rustfmt::skip]
16724         let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16725                                 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
16726                                 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
16727                                 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
16728         assert_eq_m512i(r, e);
16729     }
16730 
16731     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_shuffle_epi8()16732     unsafe fn test_mm512_maskz_shuffle_epi8() {
16733         #[rustfmt::skip]
16734         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
16735                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
16736                                 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
16737                                 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
16738         let b = _mm512_set1_epi8(1);
16739         let r = _mm512_maskz_shuffle_epi8(0, a, b);
16740         assert_eq_m512i(r, _mm512_setzero_si512());
16741         let r = _mm512_maskz_shuffle_epi8(
16742             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16743             a,
16744             b,
16745         );
16746         #[rustfmt::skip]
16747         let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16748                                 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
16749                                 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
16750                                 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
16751         assert_eq_m512i(r, e);
16752     }
16753 
16754     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_shuffle_epi8()16755     unsafe fn test_mm256_mask_shuffle_epi8() {
16756         #[rustfmt::skip]
16757         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
16758                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
16759         let b = _mm256_set1_epi8(1);
16760         let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
16761         assert_eq_m256i(r, a);
16762         let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
16763         #[rustfmt::skip]
16764         let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16765                                 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
16766         assert_eq_m256i(r, e);
16767     }
16768 
16769     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_shuffle_epi8()16770     unsafe fn test_mm256_maskz_shuffle_epi8() {
16771         #[rustfmt::skip]
16772         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
16773                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
16774         let b = _mm256_set1_epi8(1);
16775         let r = _mm256_maskz_shuffle_epi8(0, a, b);
16776         assert_eq_m256i(r, _mm256_setzero_si256());
16777         let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
16778         #[rustfmt::skip]
16779         let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16780                                 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
16781         assert_eq_m256i(r, e);
16782     }
16783 
16784     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_shuffle_epi8()16785     unsafe fn test_mm_mask_shuffle_epi8() {
16786         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16787         let b = _mm_set1_epi8(1);
16788         let r = _mm_mask_shuffle_epi8(a, 0, a, b);
16789         assert_eq_m128i(r, a);
16790         let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
16791         let e = _mm_set_epi8(
16792             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16793         );
16794         assert_eq_m128i(r, e);
16795     }
16796 
16797     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_shuffle_epi8()16798     unsafe fn test_mm_maskz_shuffle_epi8() {
16799         #[rustfmt::skip]
16800         let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
16801         let b = _mm_set1_epi8(1);
16802         let r = _mm_maskz_shuffle_epi8(0, a, b);
16803         assert_eq_m128i(r, _mm_setzero_si128());
16804         let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
16805         let e = _mm_set_epi8(
16806             14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
16807         );
16808         assert_eq_m128i(r, e);
16809     }
16810 
16811     #[simd_test(enable = "avx512bw")]
test_mm512_test_epi16_mask()16812     unsafe fn test_mm512_test_epi16_mask() {
16813         let a = _mm512_set1_epi16(1 << 0);
16814         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
16815         let r = _mm512_test_epi16_mask(a, b);
16816         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
16817         assert_eq!(r, e);
16818     }
16819 
16820     #[simd_test(enable = "avx512bw")]
test_mm512_mask_test_epi16_mask()16821     unsafe fn test_mm512_mask_test_epi16_mask() {
16822         let a = _mm512_set1_epi16(1 << 0);
16823         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
16824         let r = _mm512_mask_test_epi16_mask(0, a, b);
16825         assert_eq!(r, 0);
16826         let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
16827         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
16828         assert_eq!(r, e);
16829     }
16830 
16831     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_test_epi16_mask()16832     unsafe fn test_mm256_test_epi16_mask() {
16833         let a = _mm256_set1_epi16(1 << 0);
16834         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
16835         let r = _mm256_test_epi16_mask(a, b);
16836         let e: __mmask16 = 0b11111111_11111111;
16837         assert_eq!(r, e);
16838     }
16839 
16840     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_test_epi16_mask()16841     unsafe fn test_mm256_mask_test_epi16_mask() {
16842         let a = _mm256_set1_epi16(1 << 0);
16843         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
16844         let r = _mm256_mask_test_epi16_mask(0, a, b);
16845         assert_eq!(r, 0);
16846         let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
16847         let e: __mmask16 = 0b11111111_11111111;
16848         assert_eq!(r, e);
16849     }
16850 
16851     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_test_epi16_mask()16852     unsafe fn test_mm_test_epi16_mask() {
16853         let a = _mm_set1_epi16(1 << 0);
16854         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
16855         let r = _mm_test_epi16_mask(a, b);
16856         let e: __mmask8 = 0b11111111;
16857         assert_eq!(r, e);
16858     }
16859 
16860     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_test_epi16_mask()16861     unsafe fn test_mm_mask_test_epi16_mask() {
16862         let a = _mm_set1_epi16(1 << 0);
16863         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
16864         let r = _mm_mask_test_epi16_mask(0, a, b);
16865         assert_eq!(r, 0);
16866         let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
16867         let e: __mmask8 = 0b11111111;
16868         assert_eq!(r, e);
16869     }
16870 
16871     #[simd_test(enable = "avx512bw")]
test_mm512_test_epi8_mask()16872     unsafe fn test_mm512_test_epi8_mask() {
16873         let a = _mm512_set1_epi8(1 << 0);
16874         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
16875         let r = _mm512_test_epi8_mask(a, b);
16876         let e: __mmask64 =
16877             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
16878         assert_eq!(r, e);
16879     }
16880 
16881     #[simd_test(enable = "avx512bw")]
test_mm512_mask_test_epi8_mask()16882     unsafe fn test_mm512_mask_test_epi8_mask() {
16883         let a = _mm512_set1_epi8(1 << 0);
16884         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
16885         let r = _mm512_mask_test_epi8_mask(0, a, b);
16886         assert_eq!(r, 0);
16887         let r = _mm512_mask_test_epi8_mask(
16888             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16889             a,
16890             b,
16891         );
16892         let e: __mmask64 =
16893             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
16894         assert_eq!(r, e);
16895     }
16896 
16897     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_test_epi8_mask()16898     unsafe fn test_mm256_test_epi8_mask() {
16899         let a = _mm256_set1_epi8(1 << 0);
16900         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
16901         let r = _mm256_test_epi8_mask(a, b);
16902         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
16903         assert_eq!(r, e);
16904     }
16905 
16906     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_test_epi8_mask()16907     unsafe fn test_mm256_mask_test_epi8_mask() {
16908         let a = _mm256_set1_epi8(1 << 0);
16909         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
16910         let r = _mm256_mask_test_epi8_mask(0, a, b);
16911         assert_eq!(r, 0);
16912         let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
16913         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
16914         assert_eq!(r, e);
16915     }
16916 
16917     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_test_epi8_mask()16918     unsafe fn test_mm_test_epi8_mask() {
16919         let a = _mm_set1_epi8(1 << 0);
16920         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
16921         let r = _mm_test_epi8_mask(a, b);
16922         let e: __mmask16 = 0b11111111_11111111;
16923         assert_eq!(r, e);
16924     }
16925 
16926     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_test_epi8_mask()16927     unsafe fn test_mm_mask_test_epi8_mask() {
16928         let a = _mm_set1_epi8(1 << 0);
16929         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
16930         let r = _mm_mask_test_epi8_mask(0, a, b);
16931         assert_eq!(r, 0);
16932         let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
16933         let e: __mmask16 = 0b11111111_11111111;
16934         assert_eq!(r, e);
16935     }
16936 
16937     #[simd_test(enable = "avx512bw")]
test_mm512_testn_epi16_mask()16938     unsafe fn test_mm512_testn_epi16_mask() {
16939         let a = _mm512_set1_epi16(1 << 0);
16940         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
16941         let r = _mm512_testn_epi16_mask(a, b);
16942         let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
16943         assert_eq!(r, e);
16944     }
16945 
16946     #[simd_test(enable = "avx512bw")]
test_mm512_mask_testn_epi16_mask()16947     unsafe fn test_mm512_mask_testn_epi16_mask() {
16948         let a = _mm512_set1_epi16(1 << 0);
16949         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
16950         let r = _mm512_mask_testn_epi16_mask(0, a, b);
16951         assert_eq!(r, 0);
16952         let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
16953         let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
16954         assert_eq!(r, e);
16955     }
16956 
16957     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_testn_epi16_mask()16958     unsafe fn test_mm256_testn_epi16_mask() {
16959         let a = _mm256_set1_epi16(1 << 0);
16960         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
16961         let r = _mm256_testn_epi16_mask(a, b);
16962         let e: __mmask16 = 0b00000000_00000000;
16963         assert_eq!(r, e);
16964     }
16965 
16966     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_testn_epi16_mask()16967     unsafe fn test_mm256_mask_testn_epi16_mask() {
16968         let a = _mm256_set1_epi16(1 << 0);
16969         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
16970         let r = _mm256_mask_testn_epi16_mask(0, a, b);
16971         assert_eq!(r, 0);
16972         let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
16973         let e: __mmask16 = 0b00000000_00000000;
16974         assert_eq!(r, e);
16975     }
16976 
16977     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_testn_epi16_mask()16978     unsafe fn test_mm_testn_epi16_mask() {
16979         let a = _mm_set1_epi16(1 << 0);
16980         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
16981         let r = _mm_testn_epi16_mask(a, b);
16982         let e: __mmask8 = 0b00000000;
16983         assert_eq!(r, e);
16984     }
16985 
16986     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_testn_epi16_mask()16987     unsafe fn test_mm_mask_testn_epi16_mask() {
16988         let a = _mm_set1_epi16(1 << 0);
16989         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
16990         let r = _mm_mask_testn_epi16_mask(0, a, b);
16991         assert_eq!(r, 0);
16992         let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
16993         let e: __mmask8 = 0b00000000;
16994         assert_eq!(r, e);
16995     }
16996 
16997     #[simd_test(enable = "avx512bw")]
test_mm512_testn_epi8_mask()16998     unsafe fn test_mm512_testn_epi8_mask() {
16999         let a = _mm512_set1_epi8(1 << 0);
17000         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
17001         let r = _mm512_testn_epi8_mask(a, b);
17002         let e: __mmask64 =
17003             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
17004         assert_eq!(r, e);
17005     }
17006 
17007     #[simd_test(enable = "avx512bw")]
test_mm512_mask_testn_epi8_mask()17008     unsafe fn test_mm512_mask_testn_epi8_mask() {
17009         let a = _mm512_set1_epi8(1 << 0);
17010         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
17011         let r = _mm512_mask_testn_epi8_mask(0, a, b);
17012         assert_eq!(r, 0);
17013         let r = _mm512_mask_testn_epi8_mask(
17014             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17015             a,
17016             b,
17017         );
17018         let e: __mmask64 =
17019             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
17020         assert_eq!(r, e);
17021     }
17022 
17023     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_testn_epi8_mask()17024     unsafe fn test_mm256_testn_epi8_mask() {
17025         let a = _mm256_set1_epi8(1 << 0);
17026         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
17027         let r = _mm256_testn_epi8_mask(a, b);
17028         let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17029         assert_eq!(r, e);
17030     }
17031 
17032     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_testn_epi8_mask()17033     unsafe fn test_mm256_mask_testn_epi8_mask() {
17034         let a = _mm256_set1_epi8(1 << 0);
17035         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
17036         let r = _mm256_mask_testn_epi8_mask(0, a, b);
17037         assert_eq!(r, 0);
17038         let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
17039         let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17040         assert_eq!(r, e);
17041     }
17042 
17043     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_testn_epi8_mask()17044     unsafe fn test_mm_testn_epi8_mask() {
17045         let a = _mm_set1_epi8(1 << 0);
17046         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
17047         let r = _mm_testn_epi8_mask(a, b);
17048         let e: __mmask16 = 0b00000000_00000000;
17049         assert_eq!(r, e);
17050     }
17051 
17052     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_testn_epi8_mask()17053     unsafe fn test_mm_mask_testn_epi8_mask() {
17054         let a = _mm_set1_epi8(1 << 0);
17055         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
17056         let r = _mm_mask_testn_epi8_mask(0, a, b);
17057         assert_eq!(r, 0);
17058         let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
17059         let e: __mmask16 = 0b00000000_00000000;
17060         assert_eq!(r, e);
17061     }
17062 
17063     #[simd_test(enable = "avx512bw")]
test_store_mask64()17064     unsafe fn test_store_mask64() {
17065         let a: __mmask64 =
17066             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
17067         let mut r = 0;
17068         _store_mask64(&mut r as *mut _ as *mut u64, a);
17069         assert_eq!(r, a);
17070     }
17071 
17072     #[simd_test(enable = "avx512bw")]
test_store_mask32()17073     unsafe fn test_store_mask32() {
17074         let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
17075         let mut r = 0;
17076         _store_mask32(&mut r as *mut _ as *mut u32, a);
17077         assert_eq!(r, a);
17078     }
17079 
17080     #[simd_test(enable = "avx512bw")]
test_load_mask64()17081     unsafe fn test_load_mask64() {
17082         let p: __mmask64 =
17083             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
17084         let r = _load_mask64(&p);
17085         let e: __mmask64 =
17086             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
17087         assert_eq!(r, e);
17088     }
17089 
17090     #[simd_test(enable = "avx512bw")]
test_load_mask32()17091     unsafe fn test_load_mask32() {
17092         let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
17093         let r = _load_mask32(&p);
17094         let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
17095         assert_eq!(r, e);
17096     }
17097 
17098     #[simd_test(enable = "avx512bw")]
test_mm512_sad_epu8()17099     unsafe fn test_mm512_sad_epu8() {
17100         let a = _mm512_set1_epi8(2);
17101         let b = _mm512_set1_epi8(4);
17102         let r = _mm512_sad_epu8(a, b);
17103         let e = _mm512_set1_epi64(16);
17104         assert_eq_m512i(r, e);
17105     }
17106 
17107     #[simd_test(enable = "avx512bw")]
test_mm512_dbsad_epu8()17108     unsafe fn test_mm512_dbsad_epu8() {
17109         let a = _mm512_set1_epi8(2);
17110         let b = _mm512_set1_epi8(4);
17111         let r = _mm512_dbsad_epu8::<0>(a, b);
17112         let e = _mm512_set1_epi16(8);
17113         assert_eq_m512i(r, e);
17114     }
17115 
17116     #[simd_test(enable = "avx512bw")]
test_mm512_mask_dbsad_epu8()17117     unsafe fn test_mm512_mask_dbsad_epu8() {
17118         let src = _mm512_set1_epi16(1);
17119         let a = _mm512_set1_epi8(2);
17120         let b = _mm512_set1_epi8(4);
17121         let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
17122         assert_eq_m512i(r, src);
17123         let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
17124         let e = _mm512_set1_epi16(8);
17125         assert_eq_m512i(r, e);
17126     }
17127 
17128     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_dbsad_epu8()17129     unsafe fn test_mm512_maskz_dbsad_epu8() {
17130         let a = _mm512_set1_epi8(2);
17131         let b = _mm512_set1_epi8(4);
17132         let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
17133         assert_eq_m512i(r, _mm512_setzero_si512());
17134         let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
17135         let e = _mm512_set1_epi16(8);
17136         assert_eq_m512i(r, e);
17137     }
17138 
17139     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_dbsad_epu8()17140     unsafe fn test_mm256_dbsad_epu8() {
17141         let a = _mm256_set1_epi8(2);
17142         let b = _mm256_set1_epi8(4);
17143         let r = _mm256_dbsad_epu8::<0>(a, b);
17144         let e = _mm256_set1_epi16(8);
17145         assert_eq_m256i(r, e);
17146     }
17147 
17148     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_dbsad_epu8()17149     unsafe fn test_mm256_mask_dbsad_epu8() {
17150         let src = _mm256_set1_epi16(1);
17151         let a = _mm256_set1_epi8(2);
17152         let b = _mm256_set1_epi8(4);
17153         let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
17154         assert_eq_m256i(r, src);
17155         let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
17156         let e = _mm256_set1_epi16(8);
17157         assert_eq_m256i(r, e);
17158     }
17159 
17160     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_dbsad_epu8()17161     unsafe fn test_mm256_maskz_dbsad_epu8() {
17162         let a = _mm256_set1_epi8(2);
17163         let b = _mm256_set1_epi8(4);
17164         let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
17165         assert_eq_m256i(r, _mm256_setzero_si256());
17166         let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
17167         let e = _mm256_set1_epi16(8);
17168         assert_eq_m256i(r, e);
17169     }
17170 
17171     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_dbsad_epu8()17172     unsafe fn test_mm_dbsad_epu8() {
17173         let a = _mm_set1_epi8(2);
17174         let b = _mm_set1_epi8(4);
17175         let r = _mm_dbsad_epu8::<0>(a, b);
17176         let e = _mm_set1_epi16(8);
17177         assert_eq_m128i(r, e);
17178     }
17179 
17180     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_dbsad_epu8()17181     unsafe fn test_mm_mask_dbsad_epu8() {
17182         let src = _mm_set1_epi16(1);
17183         let a = _mm_set1_epi8(2);
17184         let b = _mm_set1_epi8(4);
17185         let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
17186         assert_eq_m128i(r, src);
17187         let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
17188         let e = _mm_set1_epi16(8);
17189         assert_eq_m128i(r, e);
17190     }
17191 
17192     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_dbsad_epu8()17193     unsafe fn test_mm_maskz_dbsad_epu8() {
17194         let a = _mm_set1_epi8(2);
17195         let b = _mm_set1_epi8(4);
17196         let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
17197         assert_eq_m128i(r, _mm_setzero_si128());
17198         let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
17199         let e = _mm_set1_epi16(8);
17200         assert_eq_m128i(r, e);
17201     }
17202 
17203     #[simd_test(enable = "avx512bw")]
test_mm512_movepi16_mask()17204     unsafe fn test_mm512_movepi16_mask() {
17205         let a = _mm512_set1_epi16(1 << 15);
17206         let r = _mm512_movepi16_mask(a);
17207         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17208         assert_eq!(r, e);
17209     }
17210 
17211     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_movepi16_mask()17212     unsafe fn test_mm256_movepi16_mask() {
17213         let a = _mm256_set1_epi16(1 << 15);
17214         let r = _mm256_movepi16_mask(a);
17215         let e: __mmask16 = 0b11111111_11111111;
17216         assert_eq!(r, e);
17217     }
17218 
17219     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_movepi16_mask()17220     unsafe fn test_mm_movepi16_mask() {
17221         let a = _mm_set1_epi16(1 << 15);
17222         let r = _mm_movepi16_mask(a);
17223         let e: __mmask8 = 0b11111111;
17224         assert_eq!(r, e);
17225     }
17226 
17227     #[simd_test(enable = "avx512bw")]
test_mm512_movepi8_mask()17228     unsafe fn test_mm512_movepi8_mask() {
17229         let a = _mm512_set1_epi8(1 << 7);
17230         let r = _mm512_movepi8_mask(a);
17231         let e: __mmask64 =
17232             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17233         assert_eq!(r, e);
17234     }
17235 
17236     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_movepi8_mask()17237     unsafe fn test_mm256_movepi8_mask() {
17238         let a = _mm256_set1_epi8(1 << 7);
17239         let r = _mm256_movepi8_mask(a);
17240         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17241         assert_eq!(r, e);
17242     }
17243 
17244     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_movepi8_mask()17245     unsafe fn test_mm_movepi8_mask() {
17246         let a = _mm_set1_epi8(1 << 7);
17247         let r = _mm_movepi8_mask(a);
17248         let e: __mmask16 = 0b11111111_11111111;
17249         assert_eq!(r, e);
17250     }
17251 
17252     #[simd_test(enable = "avx512bw")]
test_mm512_movm_epi16()17253     unsafe fn test_mm512_movm_epi16() {
17254         let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
17255         let r = _mm512_movm_epi16(a);
17256         let e = _mm512_set1_epi16(
17257             1 << 15
17258                 | 1 << 14
17259                 | 1 << 13
17260                 | 1 << 12
17261                 | 1 << 11
17262                 | 1 << 10
17263                 | 1 << 9
17264                 | 1 << 8
17265                 | 1 << 7
17266                 | 1 << 6
17267                 | 1 << 5
17268                 | 1 << 4
17269                 | 1 << 3
17270                 | 1 << 2
17271                 | 1 << 1
17272                 | 1 << 0,
17273         );
17274         assert_eq_m512i(r, e);
17275     }
17276 
17277     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_movm_epi16()17278     unsafe fn test_mm256_movm_epi16() {
17279         let a: __mmask16 = 0b11111111_11111111;
17280         let r = _mm256_movm_epi16(a);
17281         let e = _mm256_set1_epi16(
17282             1 << 15
17283                 | 1 << 14
17284                 | 1 << 13
17285                 | 1 << 12
17286                 | 1 << 11
17287                 | 1 << 10
17288                 | 1 << 9
17289                 | 1 << 8
17290                 | 1 << 7
17291                 | 1 << 6
17292                 | 1 << 5
17293                 | 1 << 4
17294                 | 1 << 3
17295                 | 1 << 2
17296                 | 1 << 1
17297                 | 1 << 0,
17298         );
17299         assert_eq_m256i(r, e);
17300     }
17301 
17302     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_movm_epi16()17303     unsafe fn test_mm_movm_epi16() {
17304         let a: __mmask8 = 0b11111111;
17305         let r = _mm_movm_epi16(a);
17306         let e = _mm_set1_epi16(
17307             1 << 15
17308                 | 1 << 14
17309                 | 1 << 13
17310                 | 1 << 12
17311                 | 1 << 11
17312                 | 1 << 10
17313                 | 1 << 9
17314                 | 1 << 8
17315                 | 1 << 7
17316                 | 1 << 6
17317                 | 1 << 5
17318                 | 1 << 4
17319                 | 1 << 3
17320                 | 1 << 2
17321                 | 1 << 1
17322                 | 1 << 0,
17323         );
17324         assert_eq_m128i(r, e);
17325     }
17326 
17327     #[simd_test(enable = "avx512bw")]
test_mm512_movm_epi8()17328     unsafe fn test_mm512_movm_epi8() {
17329         let a: __mmask64 =
17330             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17331         let r = _mm512_movm_epi8(a);
17332         let e =
17333             _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
17334         assert_eq_m512i(r, e);
17335     }
17336 
17337     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_movm_epi8()17338     unsafe fn test_mm256_movm_epi8() {
17339         let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
17340         let r = _mm256_movm_epi8(a);
17341         let e =
17342             _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
17343         assert_eq_m256i(r, e);
17344     }
17345 
17346     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_movm_epi8()17347     unsafe fn test_mm_movm_epi8() {
17348         let a: __mmask16 = 0b11111111_11111111;
17349         let r = _mm_movm_epi8(a);
17350         let e =
17351             _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
17352         assert_eq_m128i(r, e);
17353     }
17354 
17355     #[simd_test(enable = "avx512bw")]
test_kadd_mask32()17356     unsafe fn test_kadd_mask32() {
17357         let a: __mmask32 = 11;
17358         let b: __mmask32 = 22;
17359         let r = _kadd_mask32(a, b);
17360         let e: __mmask32 = 33;
17361         assert_eq!(r, e);
17362     }
17363 
17364     #[simd_test(enable = "avx512bw")]
test_kadd_mask64()17365     unsafe fn test_kadd_mask64() {
17366         let a: __mmask64 = 11;
17367         let b: __mmask64 = 22;
17368         let r = _kadd_mask64(a, b);
17369         let e: __mmask64 = 33;
17370         assert_eq!(r, e);
17371     }
17372 
17373     #[simd_test(enable = "avx512bw")]
test_kand_mask32()17374     unsafe fn test_kand_mask32() {
17375         let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
17376         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
17377         let r = _kand_mask32(a, b);
17378         let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
17379         assert_eq!(r, e);
17380     }
17381 
17382     #[simd_test(enable = "avx512bw")]
test_kand_mask64()17383     unsafe fn test_kand_mask64() {
17384         let a: __mmask64 =
17385             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17386         let b: __mmask64 =
17387             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17388         let r = _kand_mask64(a, b);
17389         let e: __mmask64 =
17390             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17391         assert_eq!(r, e);
17392     }
17393 
17394     #[simd_test(enable = "avx512bw")]
test_knot_mask32()17395     unsafe fn test_knot_mask32() {
17396         let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
17397         let r = _knot_mask32(a);
17398         let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
17399         assert_eq!(r, e);
17400     }
17401 
17402     #[simd_test(enable = "avx512bw")]
test_knot_mask64()17403     unsafe fn test_knot_mask64() {
17404         let a: __mmask64 =
17405             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17406         let r = _knot_mask64(a);
17407         let e: __mmask64 =
17408             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
17409         assert_eq!(r, e);
17410     }
17411 
17412     #[simd_test(enable = "avx512bw")]
test_kandn_mask32()17413     unsafe fn test_kandn_mask32() {
17414         let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
17415         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
17416         let r = _kandn_mask32(a, b);
17417         let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17418         assert_eq!(r, e);
17419     }
17420 
17421     #[simd_test(enable = "avx512bw")]
test_kandn_mask64()17422     unsafe fn test_kandn_mask64() {
17423         let a: __mmask64 =
17424             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17425         let b: __mmask64 =
17426             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17427         let r = _kandn_mask64(a, b);
17428         let e: __mmask64 =
17429             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
17430         assert_eq!(r, e);
17431     }
17432 
17433     #[simd_test(enable = "avx512bw")]
test_kor_mask32()17434     unsafe fn test_kor_mask32() {
17435         let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
17436         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
17437         let r = _kor_mask32(a, b);
17438         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17439         assert_eq!(r, e);
17440     }
17441 
17442     #[simd_test(enable = "avx512bw")]
test_kor_mask64()17443     unsafe fn test_kor_mask64() {
17444         let a: __mmask64 =
17445             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
17446         let b: __mmask64 =
17447             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17448         let r = _kor_mask64(a, b);
17449         let e: __mmask64 =
17450             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17451         assert_eq!(r, e);
17452     }
17453 
17454     #[simd_test(enable = "avx512bw")]
test_kxor_mask32()17455     unsafe fn test_kxor_mask32() {
17456         let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
17457         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
17458         let r = _kxor_mask32(a, b);
17459         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17460         assert_eq!(r, e);
17461     }
17462 
17463     #[simd_test(enable = "avx512bw")]
test_kxor_mask64()17464     unsafe fn test_kxor_mask64() {
17465         let a: __mmask64 =
17466             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
17467         let b: __mmask64 =
17468             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17469         let r = _kxor_mask64(a, b);
17470         let e: __mmask64 =
17471             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17472         assert_eq!(r, e);
17473     }
17474 
17475     #[simd_test(enable = "avx512bw")]
test_kxnor_mask32()17476     unsafe fn test_kxnor_mask32() {
17477         let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
17478         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
17479         let r = _kxnor_mask32(a, b);
17480         let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17481         assert_eq!(r, e);
17482     }
17483 
17484     #[simd_test(enable = "avx512bw")]
test_kxnor_mask64()17485     unsafe fn test_kxnor_mask64() {
17486         let a: __mmask64 =
17487             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
17488         let b: __mmask64 =
17489             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17490         let r = _kxnor_mask64(a, b);
17491         let e: __mmask64 =
17492             0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
17493         assert_eq!(r, e);
17494     }
17495 
17496     #[simd_test(enable = "avx512bw")]
test_mm512_cvtepi16_epi8()17497     unsafe fn test_mm512_cvtepi16_epi8() {
17498         let a = _mm512_set1_epi16(2);
17499         let r = _mm512_cvtepi16_epi8(a);
17500         let e = _mm256_set1_epi8(2);
17501         assert_eq_m256i(r, e);
17502     }
17503 
17504     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtepi16_epi8()17505     unsafe fn test_mm512_mask_cvtepi16_epi8() {
17506         let src = _mm256_set1_epi8(1);
17507         let a = _mm512_set1_epi16(2);
17508         let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
17509         assert_eq_m256i(r, src);
17510         let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
17511         let e = _mm256_set1_epi8(2);
17512         assert_eq_m256i(r, e);
17513     }
17514 
17515     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_cvtepi16_epi8()17516     unsafe fn test_mm512_maskz_cvtepi16_epi8() {
17517         let a = _mm512_set1_epi16(2);
17518         let r = _mm512_maskz_cvtepi16_epi8(0, a);
17519         assert_eq_m256i(r, _mm256_setzero_si256());
17520         let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
17521         let e = _mm256_set1_epi8(2);
17522         assert_eq_m256i(r, e);
17523     }
17524 
17525     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cvtepi16_epi8()17526     unsafe fn test_mm256_cvtepi16_epi8() {
17527         let a = _mm256_set1_epi16(2);
17528         let r = _mm256_cvtepi16_epi8(a);
17529         let e = _mm_set1_epi8(2);
17530         assert_eq_m128i(r, e);
17531     }
17532 
17533     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtepi16_epi8()17534     unsafe fn test_mm256_mask_cvtepi16_epi8() {
17535         let src = _mm_set1_epi8(1);
17536         let a = _mm256_set1_epi16(2);
17537         let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
17538         assert_eq_m128i(r, src);
17539         let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
17540         let e = _mm_set1_epi8(2);
17541         assert_eq_m128i(r, e);
17542     }
17543 
17544     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_cvtepi16_epi8()17545     unsafe fn test_mm256_maskz_cvtepi16_epi8() {
17546         let a = _mm256_set1_epi16(2);
17547         let r = _mm256_maskz_cvtepi16_epi8(0, a);
17548         assert_eq_m128i(r, _mm_setzero_si128());
17549         let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
17550         let e = _mm_set1_epi8(2);
17551         assert_eq_m128i(r, e);
17552     }
17553 
17554     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cvtepi16_epi8()17555     unsafe fn test_mm_cvtepi16_epi8() {
17556         let a = _mm_set1_epi16(2);
17557         let r = _mm_cvtepi16_epi8(a);
17558         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17559         assert_eq_m128i(r, e);
17560     }
17561 
17562     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtepi16_epi8()17563     unsafe fn test_mm_mask_cvtepi16_epi8() {
17564         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
17565         let a = _mm_set1_epi16(2);
17566         let r = _mm_mask_cvtepi16_epi8(src, 0, a);
17567         assert_eq_m128i(r, src);
17568         let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
17569         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17570         assert_eq_m128i(r, e);
17571     }
17572 
17573     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_cvtepi16_epi8()17574     unsafe fn test_mm_maskz_cvtepi16_epi8() {
17575         let a = _mm_set1_epi16(2);
17576         let r = _mm_maskz_cvtepi16_epi8(0, a);
17577         assert_eq_m128i(r, _mm_setzero_si128());
17578         let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
17579         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
17580         assert_eq_m128i(r, e);
17581     }
17582 
17583     #[simd_test(enable = "avx512bw")]
test_mm512_cvtsepi16_epi8()17584     unsafe fn test_mm512_cvtsepi16_epi8() {
17585         let a = _mm512_set1_epi16(i16::MAX);
17586         let r = _mm512_cvtsepi16_epi8(a);
17587         let e = _mm256_set1_epi8(i8::MAX);
17588         assert_eq_m256i(r, e);
17589     }
17590 
17591     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtsepi16_epi8()17592     unsafe fn test_mm512_mask_cvtsepi16_epi8() {
17593         let src = _mm256_set1_epi8(1);
17594         let a = _mm512_set1_epi16(i16::MAX);
17595         let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
17596         assert_eq_m256i(r, src);
17597         let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
17598         let e = _mm256_set1_epi8(i8::MAX);
17599         assert_eq_m256i(r, e);
17600     }
17601 
17602     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cvtsepi16_epi8()17603     unsafe fn test_mm256_cvtsepi16_epi8() {
17604         let a = _mm256_set1_epi16(i16::MAX);
17605         let r = _mm256_cvtsepi16_epi8(a);
17606         let e = _mm_set1_epi8(i8::MAX);
17607         assert_eq_m128i(r, e);
17608     }
17609 
17610     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtsepi16_epi8()17611     unsafe fn test_mm256_mask_cvtsepi16_epi8() {
17612         let src = _mm_set1_epi8(1);
17613         let a = _mm256_set1_epi16(i16::MAX);
17614         let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
17615         assert_eq_m128i(r, src);
17616         let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
17617         let e = _mm_set1_epi8(i8::MAX);
17618         assert_eq_m128i(r, e);
17619     }
17620 
17621     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_cvtsepi16_epi8()17622     unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
17623         let a = _mm256_set1_epi16(i16::MAX);
17624         let r = _mm256_maskz_cvtsepi16_epi8(0, a);
17625         assert_eq_m128i(r, _mm_setzero_si128());
17626         let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
17627         let e = _mm_set1_epi8(i8::MAX);
17628         assert_eq_m128i(r, e);
17629     }
17630 
17631     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cvtsepi16_epi8()17632     unsafe fn test_mm_cvtsepi16_epi8() {
17633         let a = _mm_set1_epi16(i16::MAX);
17634         let r = _mm_cvtsepi16_epi8(a);
17635         #[rustfmt::skip]
17636         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17637         assert_eq_m128i(r, e);
17638     }
17639 
17640     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtsepi16_epi8()17641     unsafe fn test_mm_mask_cvtsepi16_epi8() {
17642         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
17643         let a = _mm_set1_epi16(i16::MAX);
17644         let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
17645         assert_eq_m128i(r, src);
17646         let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
17647         #[rustfmt::skip]
17648         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17649         assert_eq_m128i(r, e);
17650     }
17651 
17652     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_cvtsepi16_epi8()17653     unsafe fn test_mm_maskz_cvtsepi16_epi8() {
17654         let a = _mm_set1_epi16(i16::MAX);
17655         let r = _mm_maskz_cvtsepi16_epi8(0, a);
17656         assert_eq_m128i(r, _mm_setzero_si128());
17657         let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
17658         #[rustfmt::skip]
17659         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
17660         assert_eq_m128i(r, e);
17661     }
17662 
17663     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_cvtsepi16_epi8()17664     unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
17665         let a = _mm512_set1_epi16(i16::MAX);
17666         let r = _mm512_maskz_cvtsepi16_epi8(0, a);
17667         assert_eq_m256i(r, _mm256_setzero_si256());
17668         let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
17669         let e = _mm256_set1_epi8(i8::MAX);
17670         assert_eq_m256i(r, e);
17671     }
17672 
17673     #[simd_test(enable = "avx512bw")]
test_mm512_cvtusepi16_epi8()17674     unsafe fn test_mm512_cvtusepi16_epi8() {
17675         let a = _mm512_set1_epi16(i16::MIN);
17676         let r = _mm512_cvtusepi16_epi8(a);
17677         let e = _mm256_set1_epi8(-1);
17678         assert_eq_m256i(r, e);
17679     }
17680 
17681     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtusepi16_epi8()17682     unsafe fn test_mm512_mask_cvtusepi16_epi8() {
17683         let src = _mm256_set1_epi8(1);
17684         let a = _mm512_set1_epi16(i16::MIN);
17685         let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
17686         assert_eq_m256i(r, src);
17687         let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
17688         let e = _mm256_set1_epi8(-1);
17689         assert_eq_m256i(r, e);
17690     }
17691 
17692     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_cvtusepi16_epi8()17693     unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
17694         let a = _mm512_set1_epi16(i16::MIN);
17695         let r = _mm512_maskz_cvtusepi16_epi8(0, a);
17696         assert_eq_m256i(r, _mm256_setzero_si256());
17697         let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
17698         let e = _mm256_set1_epi8(-1);
17699         assert_eq_m256i(r, e);
17700     }
17701 
17702     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_cvtusepi16_epi8()17703     unsafe fn test_mm256_cvtusepi16_epi8() {
17704         let a = _mm256_set1_epi16(i16::MIN);
17705         let r = _mm256_cvtusepi16_epi8(a);
17706         let e = _mm_set1_epi8(-1);
17707         assert_eq_m128i(r, e);
17708     }
17709 
17710     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtusepi16_epi8()17711     unsafe fn test_mm256_mask_cvtusepi16_epi8() {
17712         let src = _mm_set1_epi8(1);
17713         let a = _mm256_set1_epi16(i16::MIN);
17714         let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
17715         assert_eq_m128i(r, src);
17716         let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
17717         let e = _mm_set1_epi8(-1);
17718         assert_eq_m128i(r, e);
17719     }
17720 
17721     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_cvtusepi16_epi8()17722     unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
17723         let a = _mm256_set1_epi16(i16::MIN);
17724         let r = _mm256_maskz_cvtusepi16_epi8(0, a);
17725         assert_eq_m128i(r, _mm_setzero_si128());
17726         let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
17727         let e = _mm_set1_epi8(-1);
17728         assert_eq_m128i(r, e);
17729     }
17730 
17731     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_cvtusepi16_epi8()17732     unsafe fn test_mm_cvtusepi16_epi8() {
17733         let a = _mm_set1_epi16(i16::MIN);
17734         let r = _mm_cvtusepi16_epi8(a);
17735         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
17736         assert_eq_m128i(r, e);
17737     }
17738 
17739     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtusepi16_epi8()17740     unsafe fn test_mm_mask_cvtusepi16_epi8() {
17741         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
17742         let a = _mm_set1_epi16(i16::MIN);
17743         let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
17744         assert_eq_m128i(r, src);
17745         let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
17746         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
17747         assert_eq_m128i(r, e);
17748     }
17749 
17750     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_cvtusepi16_epi8()17751     unsafe fn test_mm_maskz_cvtusepi16_epi8() {
17752         let a = _mm_set1_epi16(i16::MIN);
17753         let r = _mm_maskz_cvtusepi16_epi8(0, a);
17754         assert_eq_m128i(r, _mm_setzero_si128());
17755         let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
17756         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
17757         assert_eq_m128i(r, e);
17758     }
17759 
17760     #[simd_test(enable = "avx512bw")]
test_mm512_cvtepi8_epi16()17761     unsafe fn test_mm512_cvtepi8_epi16() {
17762         let a = _mm256_set1_epi8(2);
17763         let r = _mm512_cvtepi8_epi16(a);
17764         let e = _mm512_set1_epi16(2);
17765         assert_eq_m512i(r, e);
17766     }
17767 
17768     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtepi8_epi16()17769     unsafe fn test_mm512_mask_cvtepi8_epi16() {
17770         let src = _mm512_set1_epi16(1);
17771         let a = _mm256_set1_epi8(2);
17772         let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
17773         assert_eq_m512i(r, src);
17774         let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
17775         let e = _mm512_set1_epi16(2);
17776         assert_eq_m512i(r, e);
17777     }
17778 
17779     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_cvtepi8_epi16()17780     unsafe fn test_mm512_maskz_cvtepi8_epi16() {
17781         let a = _mm256_set1_epi8(2);
17782         let r = _mm512_maskz_cvtepi8_epi16(0, a);
17783         assert_eq_m512i(r, _mm512_setzero_si512());
17784         let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
17785         let e = _mm512_set1_epi16(2);
17786         assert_eq_m512i(r, e);
17787     }
17788 
17789     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtepi8_epi16()17790     unsafe fn test_mm256_mask_cvtepi8_epi16() {
17791         let src = _mm256_set1_epi16(1);
17792         let a = _mm_set1_epi8(2);
17793         let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
17794         assert_eq_m256i(r, src);
17795         let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
17796         let e = _mm256_set1_epi16(2);
17797         assert_eq_m256i(r, e);
17798     }
17799 
17800     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_cvtepi8_epi16()17801     unsafe fn test_mm256_maskz_cvtepi8_epi16() {
17802         let a = _mm_set1_epi8(2);
17803         let r = _mm256_maskz_cvtepi8_epi16(0, a);
17804         assert_eq_m256i(r, _mm256_setzero_si256());
17805         let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
17806         let e = _mm256_set1_epi16(2);
17807         assert_eq_m256i(r, e);
17808     }
17809 
17810     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtepi8_epi16()17811     unsafe fn test_mm_mask_cvtepi8_epi16() {
17812         let src = _mm_set1_epi16(1);
17813         let a = _mm_set1_epi8(2);
17814         let r = _mm_mask_cvtepi8_epi16(src, 0, a);
17815         assert_eq_m128i(r, src);
17816         let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
17817         let e = _mm_set1_epi16(2);
17818         assert_eq_m128i(r, e);
17819     }
17820 
17821     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_cvtepi8_epi16()17822     unsafe fn test_mm_maskz_cvtepi8_epi16() {
17823         let a = _mm_set1_epi8(2);
17824         let r = _mm_maskz_cvtepi8_epi16(0, a);
17825         assert_eq_m128i(r, _mm_setzero_si128());
17826         let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
17827         let e = _mm_set1_epi16(2);
17828         assert_eq_m128i(r, e);
17829     }
17830 
17831     #[simd_test(enable = "avx512bw")]
test_mm512_cvtepu8_epi16()17832     unsafe fn test_mm512_cvtepu8_epi16() {
17833         let a = _mm256_set1_epi8(2);
17834         let r = _mm512_cvtepu8_epi16(a);
17835         let e = _mm512_set1_epi16(2);
17836         assert_eq_m512i(r, e);
17837     }
17838 
17839     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtepu8_epi16()17840     unsafe fn test_mm512_mask_cvtepu8_epi16() {
17841         let src = _mm512_set1_epi16(1);
17842         let a = _mm256_set1_epi8(2);
17843         let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
17844         assert_eq_m512i(r, src);
17845         let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
17846         let e = _mm512_set1_epi16(2);
17847         assert_eq_m512i(r, e);
17848     }
17849 
17850     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_cvtepu8_epi16()17851     unsafe fn test_mm512_maskz_cvtepu8_epi16() {
17852         let a = _mm256_set1_epi8(2);
17853         let r = _mm512_maskz_cvtepu8_epi16(0, a);
17854         assert_eq_m512i(r, _mm512_setzero_si512());
17855         let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
17856         let e = _mm512_set1_epi16(2);
17857         assert_eq_m512i(r, e);
17858     }
17859 
17860     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtepu8_epi16()17861     unsafe fn test_mm256_mask_cvtepu8_epi16() {
17862         let src = _mm256_set1_epi16(1);
17863         let a = _mm_set1_epi8(2);
17864         let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
17865         assert_eq_m256i(r, src);
17866         let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
17867         let e = _mm256_set1_epi16(2);
17868         assert_eq_m256i(r, e);
17869     }
17870 
17871     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_cvtepu8_epi16()17872     unsafe fn test_mm256_maskz_cvtepu8_epi16() {
17873         let a = _mm_set1_epi8(2);
17874         let r = _mm256_maskz_cvtepu8_epi16(0, a);
17875         assert_eq_m256i(r, _mm256_setzero_si256());
17876         let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
17877         let e = _mm256_set1_epi16(2);
17878         assert_eq_m256i(r, e);
17879     }
17880 
17881     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtepu8_epi16()17882     unsafe fn test_mm_mask_cvtepu8_epi16() {
17883         let src = _mm_set1_epi16(1);
17884         let a = _mm_set1_epi8(2);
17885         let r = _mm_mask_cvtepu8_epi16(src, 0, a);
17886         assert_eq_m128i(r, src);
17887         let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
17888         let e = _mm_set1_epi16(2);
17889         assert_eq_m128i(r, e);
17890     }
17891 
17892     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_cvtepu8_epi16()17893     unsafe fn test_mm_maskz_cvtepu8_epi16() {
17894         let a = _mm_set1_epi8(2);
17895         let r = _mm_maskz_cvtepu8_epi16(0, a);
17896         assert_eq_m128i(r, _mm_setzero_si128());
17897         let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
17898         let e = _mm_set1_epi16(2);
17899         assert_eq_m128i(r, e);
17900     }
17901 
17902     #[simd_test(enable = "avx512bw")]
test_mm512_bslli_epi128()17903     unsafe fn test_mm512_bslli_epi128() {
17904         #[rustfmt::skip]
17905         let a = _mm512_set_epi8(
17906             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17907             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17908             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17909             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17910         );
17911         let r = _mm512_bslli_epi128::<9>(a);
17912         #[rustfmt::skip]
17913         let e = _mm512_set_epi8(
17914             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17915             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17916             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17917             0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17918         );
17919         assert_eq_m512i(r, e);
17920     }
17921 
17922     #[simd_test(enable = "avx512bw")]
test_mm512_bsrli_epi128()17923     unsafe fn test_mm512_bsrli_epi128() {
17924         #[rustfmt::skip]
17925         let a = _mm512_set_epi8(
17926             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17927             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
17928             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
17929             49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
17930         );
17931         let r = _mm512_bsrli_epi128::<3>(a);
17932         #[rustfmt::skip]
17933         let e = _mm512_set_epi8(
17934             0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
17935             0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
17936             0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
17937             0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
17938         );
17939         assert_eq_m512i(r, e);
17940     }
17941 
17942     #[simd_test(enable = "avx512bw")]
test_mm512_alignr_epi8()17943     unsafe fn test_mm512_alignr_epi8() {
17944         #[rustfmt::skip]
17945         let a = _mm512_set_epi8(
17946             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17947             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17948             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17949             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17950         );
17951         let b = _mm512_set1_epi8(1);
17952         let r = _mm512_alignr_epi8::<14>(a, b);
17953         #[rustfmt::skip]
17954         let e = _mm512_set_epi8(
17955             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17956             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17957             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17958             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17959         );
17960         assert_eq_m512i(r, e);
17961     }
17962 
17963     #[simd_test(enable = "avx512bw")]
test_mm512_mask_alignr_epi8()17964     unsafe fn test_mm512_mask_alignr_epi8() {
17965         #[rustfmt::skip]
17966         let a = _mm512_set_epi8(
17967             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17968             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17969             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17970             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17971         );
17972         let b = _mm512_set1_epi8(1);
17973         let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
17974         assert_eq_m512i(r, a);
17975         let r = _mm512_mask_alignr_epi8::<14>(
17976             a,
17977             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17978             a,
17979             b,
17980         );
17981         #[rustfmt::skip]
17982         let e = _mm512_set_epi8(
17983             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17984             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17985             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17986             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
17987         );
17988         assert_eq_m512i(r, e);
17989     }
17990 
17991     #[simd_test(enable = "avx512bw")]
test_mm512_maskz_alignr_epi8()17992     unsafe fn test_mm512_maskz_alignr_epi8() {
17993         #[rustfmt::skip]
17994         let a = _mm512_set_epi8(
17995             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17996             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17997             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17998             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
17999         );
18000         let b = _mm512_set1_epi8(1);
18001         let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
18002         assert_eq_m512i(r, _mm512_setzero_si512());
18003         let r = _mm512_maskz_alignr_epi8::<14>(
18004             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18005             a,
18006             b,
18007         );
18008         #[rustfmt::skip]
18009         let e = _mm512_set_epi8(
18010             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18011             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18012             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18013             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18014         );
18015         assert_eq_m512i(r, e);
18016     }
18017 
18018     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_alignr_epi8()18019     unsafe fn test_mm256_mask_alignr_epi8() {
18020         #[rustfmt::skip]
18021         let a = _mm256_set_epi8(
18022             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18023             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18024         );
18025         let b = _mm256_set1_epi8(1);
18026         let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
18027         assert_eq_m256i(r, a);
18028         let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
18029         #[rustfmt::skip]
18030         let e = _mm256_set_epi8(
18031             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18032             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18033         );
18034         assert_eq_m256i(r, e);
18035     }
18036 
18037     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_maskz_alignr_epi8()18038     unsafe fn test_mm256_maskz_alignr_epi8() {
18039         #[rustfmt::skip]
18040         let a = _mm256_set_epi8(
18041             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18042             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18043         );
18044         let b = _mm256_set1_epi8(1);
18045         let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
18046         assert_eq_m256i(r, _mm256_setzero_si256());
18047         let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
18048         #[rustfmt::skip]
18049         let e = _mm256_set_epi8(
18050             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18051             0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18052         );
18053         assert_eq_m256i(r, e);
18054     }
18055 
18056     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_alignr_epi8()18057     unsafe fn test_mm_mask_alignr_epi8() {
18058         let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
18059         let b = _mm_set1_epi8(1);
18060         let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
18061         assert_eq_m128i(r, a);
18062         let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
18063         let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
18064         assert_eq_m128i(r, e);
18065     }
18066 
18067     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_maskz_alignr_epi8()18068     unsafe fn test_mm_maskz_alignr_epi8() {
18069         let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
18070         let b = _mm_set1_epi8(1);
18071         let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
18072         assert_eq_m128i(r, _mm_setzero_si128());
18073         let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
18074         let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
18075         assert_eq_m128i(r, e);
18076     }
18077 
18078     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtsepi16_storeu_epi8()18079     unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
18080         let a = _mm512_set1_epi16(i16::MAX);
18081         let mut r = _mm256_undefined_si256();
18082         _mm512_mask_cvtsepi16_storeu_epi8(
18083             &mut r as *mut _ as *mut i8,
18084             0b11111111_11111111_11111111_11111111,
18085             a,
18086         );
18087         let e = _mm256_set1_epi8(i8::MAX);
18088         assert_eq_m256i(r, e);
18089     }
18090 
18091     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtsepi16_storeu_epi8()18092     unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
18093         let a = _mm256_set1_epi16(i16::MAX);
18094         let mut r = _mm_undefined_si128();
18095         _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18096         let e = _mm_set1_epi8(i8::MAX);
18097         assert_eq_m128i(r, e);
18098     }
18099 
18100     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtsepi16_storeu_epi8()18101     unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
18102         let a = _mm_set1_epi16(i16::MAX);
18103         let mut r = _mm_set1_epi8(0);
18104         _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18105         #[rustfmt::skip]
18106         let e = _mm_set_epi8(
18107             0, 0, 0, 0, 0, 0, 0, 0,
18108             i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
18109         );
18110         assert_eq_m128i(r, e);
18111     }
18112 
18113     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtepi16_storeu_epi8()18114     unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
18115         let a = _mm512_set1_epi16(8);
18116         let mut r = _mm256_undefined_si256();
18117         _mm512_mask_cvtepi16_storeu_epi8(
18118             &mut r as *mut _ as *mut i8,
18119             0b11111111_11111111_11111111_11111111,
18120             a,
18121         );
18122         let e = _mm256_set1_epi8(8);
18123         assert_eq_m256i(r, e);
18124     }
18125 
18126     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtepi16_storeu_epi8()18127     unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
18128         let a = _mm256_set1_epi16(8);
18129         let mut r = _mm_undefined_si128();
18130         _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18131         let e = _mm_set1_epi8(8);
18132         assert_eq_m128i(r, e);
18133     }
18134 
18135     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtepi16_storeu_epi8()18136     unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
18137         let a = _mm_set1_epi16(8);
18138         let mut r = _mm_set1_epi8(0);
18139         _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18140         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
18141         assert_eq_m128i(r, e);
18142     }
18143 
18144     #[simd_test(enable = "avx512bw")]
test_mm512_mask_cvtusepi16_storeu_epi8()18145     unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
18146         let a = _mm512_set1_epi16(i16::MAX);
18147         let mut r = _mm256_undefined_si256();
18148         _mm512_mask_cvtusepi16_storeu_epi8(
18149             &mut r as *mut _ as *mut i8,
18150             0b11111111_11111111_11111111_11111111,
18151             a,
18152         );
18153         let e = _mm256_set1_epi8(u8::MAX as i8);
18154         assert_eq_m256i(r, e);
18155     }
18156 
18157     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm256_mask_cvtusepi16_storeu_epi8()18158     unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
18159         let a = _mm256_set1_epi16(i16::MAX);
18160         let mut r = _mm_undefined_si128();
18161         _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18162         let e = _mm_set1_epi8(u8::MAX as i8);
18163         assert_eq_m128i(r, e);
18164     }
18165 
18166     #[simd_test(enable = "avx512bw,avx512vl")]
test_mm_mask_cvtusepi16_storeu_epi8()18167     unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
18168         let a = _mm_set1_epi16(i16::MAX);
18169         let mut r = _mm_set1_epi8(0);
18170         _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18171         #[rustfmt::skip]
18172         let e = _mm_set_epi8(
18173             0, 0, 0, 0,
18174             0, 0, 0, 0,
18175             u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18176             u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18177         );
18178         assert_eq_m128i(r, e);
18179     }
18180 }
18181