1 use crate::{
2     core_arch::{simd::*, simd_llvm::*, x86::*},
3     mem::transmute,
4 };
5 
6 #[cfg(test)]
7 use stdarch_test::assert_instr;
8 
9 /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
10 ///
11 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmw_epi32&expand=553)
12 #[inline]
13 #[target_feature(enable = "avx512cd")]
14 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
_mm512_broadcastmw_epi32(k: __mmask16) -> __m512i15 pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
16     _mm512_set1_epi32(k as i32)
17 }
18 
19 /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
20 ///
21 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmw_epi32&expand=552)
22 #[inline]
23 #[target_feature(enable = "avx512cd,avx512vl")]
24 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
_mm256_broadcastmw_epi32(k: __mmask16) -> __m256i25 pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
26     _mm256_set1_epi32(k as i32)
27 }
28 
29 /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
30 ///
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmw_epi32&expand=551)
32 #[inline]
33 #[target_feature(enable = "avx512cd,avx512vl")]
34 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
_mm_broadcastmw_epi32(k: __mmask16) -> __m128i35 pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
36     _mm_set1_epi32(k as i32)
37 }
38 
39 /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
40 ///
41 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmb_epi64&expand=550)
42 #[inline]
43 #[target_feature(enable = "avx512cd")]
44 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
_mm512_broadcastmb_epi64(k: __mmask8) -> __m512i45 pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
46     _mm512_set1_epi64(k as i64)
47 }
48 
49 /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
50 ///
51 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmb_epi64&expand=549)
52 #[inline]
53 #[target_feature(enable = "avx512cd,avx512vl")]
54 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
_mm256_broadcastmb_epi64(k: __mmask8) -> __m256i55 pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
56     _mm256_set1_epi64x(k as i64)
57 }
58 
59 /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
60 ///
61 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmb_epi64&expand=548)
62 #[inline]
63 #[target_feature(enable = "avx512cd,avx512vl")]
64 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
_mm_broadcastmb_epi64(k: __mmask8) -> __m128i65 pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
66     _mm_set1_epi64x(k as i64)
67 }
68 
69 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
70 ///
71 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi32&expand=1248)
72 #[inline]
73 #[target_feature(enable = "avx512cd")]
74 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm512_conflict_epi32(a: __m512i) -> __m512i75 pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
76     transmute(vpconflictd(a.as_i32x16()))
77 }
78 
79 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
80 ///
81 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi32&expand=1249)
82 #[inline]
83 #[target_feature(enable = "avx512cd")]
84 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i85 pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
86     let conflict = _mm512_conflict_epi32(a).as_i32x16();
87     transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
88 }
89 
90 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
91 ///
92 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi32&expand=1250)
93 #[inline]
94 #[target_feature(enable = "avx512cd")]
95 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i96 pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
97     let conflict = _mm512_conflict_epi32(a).as_i32x16();
98     let zero = _mm512_setzero_si512().as_i32x16();
99     transmute(simd_select_bitmask(k, conflict, zero))
100 }
101 
102 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
103 ///
104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi32&expand=1245)
105 #[inline]
106 #[target_feature(enable = "avx512cd,avx512vl")]
107 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm256_conflict_epi32(a: __m256i) -> __m256i108 pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
109     transmute(vpconflictd256(a.as_i32x8()))
110 }
111 
112 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
113 ///
114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi32&expand=1246)
115 #[inline]
116 #[target_feature(enable = "avx512cd,avx512vl")]
117 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i118 pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
119     let conflict = _mm256_conflict_epi32(a).as_i32x8();
120     transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
121 }
122 
123 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
124 ///
125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi32&expand=1247)
126 #[inline]
127 #[target_feature(enable = "avx512cd,avx512vl")]
128 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i129 pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
130     let conflict = _mm256_conflict_epi32(a).as_i32x8();
131     let zero = _mm256_setzero_si256().as_i32x8();
132     transmute(simd_select_bitmask(k, conflict, zero))
133 }
134 
135 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
136 ///
137 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi32&expand=1242)
138 #[inline]
139 #[target_feature(enable = "avx512cd,avx512vl")]
140 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm_conflict_epi32(a: __m128i) -> __m128i141 pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
142     transmute(vpconflictd128(a.as_i32x4()))
143 }
144 
145 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
146 ///
147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi32&expand=1243)
148 #[inline]
149 #[target_feature(enable = "avx512cd,avx512vl")]
150 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i151 pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
152     let conflict = _mm_conflict_epi32(a).as_i32x4();
153     transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
154 }
155 
156 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
157 ///
158 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi32&expand=1244)
159 #[inline]
160 #[target_feature(enable = "avx512cd,avx512vl")]
161 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i162 pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
163     let conflict = _mm_conflict_epi32(a).as_i32x4();
164     let zero = _mm_setzero_si128().as_i32x4();
165     transmute(simd_select_bitmask(k, conflict, zero))
166 }
167 
168 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
169 ///
170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi64&expand=1257)
171 #[inline]
172 #[target_feature(enable = "avx512cd")]
173 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm512_conflict_epi64(a: __m512i) -> __m512i174 pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
175     transmute(vpconflictq(a.as_i64x8()))
176 }
177 
178 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
179 ///
180 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi64&expand=1258)
181 #[inline]
182 #[target_feature(enable = "avx512cd")]
183 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i184 pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
185     let conflict = _mm512_conflict_epi64(a).as_i64x8();
186     transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
187 }
188 
189 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
190 ///
191 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi64&expand=1259)
192 #[inline]
193 #[target_feature(enable = "avx512cd")]
194 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i195 pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
196     let conflict = _mm512_conflict_epi64(a).as_i64x8();
197     let zero = _mm512_setzero_si512().as_i64x8();
198     transmute(simd_select_bitmask(k, conflict, zero))
199 }
200 
201 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
202 ///
203 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi64&expand=1254)
204 #[inline]
205 #[target_feature(enable = "avx512cd,avx512vl")]
206 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm256_conflict_epi64(a: __m256i) -> __m256i207 pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
208     transmute(vpconflictq256(a.as_i64x4()))
209 }
210 
211 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
212 ///
213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi64&expand=1255)
214 #[inline]
215 #[target_feature(enable = "avx512cd,avx512vl")]
216 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i217 pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
218     let conflict = _mm256_conflict_epi64(a).as_i64x4();
219     transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
220 }
221 
222 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi64&expand=1256)
225 #[inline]
226 #[target_feature(enable = "avx512cd,avx512vl")]
227 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i228 pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
229     let conflict = _mm256_conflict_epi64(a).as_i64x4();
230     let zero = _mm256_setzero_si256().as_i64x4();
231     transmute(simd_select_bitmask(k, conflict, zero))
232 }
233 
234 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
235 ///
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi64&expand=1251)
237 #[inline]
238 #[target_feature(enable = "avx512cd,avx512vl")]
239 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm_conflict_epi64(a: __m128i) -> __m128i240 pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
241     transmute(vpconflictq128(a.as_i64x2()))
242 }
243 
244 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
245 ///
246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi64&expand=1252)
247 #[inline]
248 #[target_feature(enable = "avx512cd,avx512vl")]
249 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i250 pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
251     let conflict = _mm_conflict_epi64(a).as_i64x2();
252     transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
253 }
254 
255 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
256 ///
257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi64&expand=1253)
258 #[inline]
259 #[target_feature(enable = "avx512cd,avx512vl")]
260 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i261 pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
262     let conflict = _mm_conflict_epi64(a).as_i64x2();
263     let zero = _mm_setzero_si128().as_i64x2();
264     transmute(simd_select_bitmask(k, conflict, zero))
265 }
266 
267 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
268 ///
269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi32&expand=3491)
270 #[inline]
271 #[target_feature(enable = "avx512cd")]
272 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm512_lzcnt_epi32(a: __m512i) -> __m512i273 pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
274     transmute(vplzcntd(a.as_i32x16(), false))
275 }
276 
277 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
278 ///
279 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi32&expand=3492)
280 #[inline]
281 #[target_feature(enable = "avx512cd")]
282 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i283 pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
284     let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
285     transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
286 }
287 
288 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
289 ///
290 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi32&expand=3493)
291 #[inline]
292 #[target_feature(enable = "avx512cd")]
293 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i294 pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
295     let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
296     let zero = _mm512_setzero_si512().as_i32x16();
297     transmute(simd_select_bitmask(k, zerocount, zero))
298 }
299 
300 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
301 ///
302 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi32&expand=3488)
303 #[inline]
304 #[target_feature(enable = "avx512cd,avx512vl")]
305 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm256_lzcnt_epi32(a: __m256i) -> __m256i306 pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
307     transmute(vplzcntd256(a.as_i32x8(), false))
308 }
309 
310 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
311 ///
312 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi32&expand=3489)
313 #[inline]
314 #[target_feature(enable = "avx512cd,avx512vl")]
315 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i316 pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
317     let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
318     transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
319 }
320 
321 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
322 ///
323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi32&expand=3490)
324 #[inline]
325 #[target_feature(enable = "avx512cd,avx512vl")]
326 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i327 pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
328     let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
329     let zero = _mm256_setzero_si256().as_i32x8();
330     transmute(simd_select_bitmask(k, zerocount, zero))
331 }
332 
333 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
334 ///
335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi32&expand=3485)
336 #[inline]
337 #[target_feature(enable = "avx512cd,avx512vl")]
338 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm_lzcnt_epi32(a: __m128i) -> __m128i339 pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
340     transmute(vplzcntd128(a.as_i32x4(), false))
341 }
342 
343 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
344 ///
345 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi32&expand=3486)
346 #[inline]
347 #[target_feature(enable = "avx512cd,avx512vl")]
348 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i349 pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
350     let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
351     transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
352 }
353 
354 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355 ///
356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi32&expand=3487)
357 #[inline]
358 #[target_feature(enable = "avx512cd,avx512vl")]
359 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i360 pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
361     let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
362     let zero = _mm_setzero_si128().as_i32x4();
363     transmute(simd_select_bitmask(k, zerocount, zero))
364 }
365 
366 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
367 ///
368 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi64&expand=3500)
369 #[inline]
370 #[target_feature(enable = "avx512cd")]
371 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm512_lzcnt_epi64(a: __m512i) -> __m512i372 pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
373     transmute(vplzcntq(a.as_i64x8(), false))
374 }
375 
376 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
377 ///
378 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi64&expand=3501)
379 #[inline]
380 #[target_feature(enable = "avx512cd")]
381 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i382 pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
383     let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
384     transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
385 }
386 
387 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388 ///
389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi64&expand=3502)
390 #[inline]
391 #[target_feature(enable = "avx512cd")]
392 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i393 pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
394     let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
395     let zero = _mm512_setzero_si512().as_i64x8();
396     transmute(simd_select_bitmask(k, zerocount, zero))
397 }
398 
399 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
400 ///
401 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi64&expand=3497)
402 #[inline]
403 #[target_feature(enable = "avx512cd,avx512vl")]
404 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm256_lzcnt_epi64(a: __m256i) -> __m256i405 pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
406     transmute(vplzcntq256(a.as_i64x4(), false))
407 }
408 
409 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
410 ///
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi64&expand=3498)
412 #[inline]
413 #[target_feature(enable = "avx512cd,avx512vl")]
414 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i415 pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
416     let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
417     transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
418 }
419 
420 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
421 ///
422 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi64&expand=3499)
423 #[inline]
424 #[target_feature(enable = "avx512cd,avx512vl")]
425 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i426 pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
427     let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
428     let zero = _mm256_setzero_si256().as_i64x4();
429     transmute(simd_select_bitmask(k, zerocount, zero))
430 }
431 
432 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
433 ///
434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi64&expand=3494)
435 #[inline]
436 #[target_feature(enable = "avx512cd,avx512vl")]
437 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm_lzcnt_epi64(a: __m128i) -> __m128i438 pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
439     transmute(vplzcntq128(a.as_i64x2(), false))
440 }
441 
442 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
443 ///
444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi64&expand=3495)
445 #[inline]
446 #[target_feature(enable = "avx512cd,avx512vl")]
447 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i448 pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
449     let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
450     transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
451 }
452 
453 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454 ///
455 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi64&expand=3496)
456 #[inline]
457 #[target_feature(enable = "avx512cd,avx512vl")]
458 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i459 pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
460     let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
461     let zero = _mm_setzero_si128().as_i64x2();
462     transmute(simd_select_bitmask(k, zerocount, zero))
463 }
464 
465 #[allow(improper_ctypes)]
466 extern "C" {
467     #[link_name = "llvm.x86.avx512.conflict.d.512"]
vpconflictd(a: i32x16) -> i32x16468     fn vpconflictd(a: i32x16) -> i32x16;
469     #[link_name = "llvm.x86.avx512.conflict.d.256"]
vpconflictd256(a: i32x8) -> i32x8470     fn vpconflictd256(a: i32x8) -> i32x8;
471     #[link_name = "llvm.x86.avx512.conflict.d.128"]
vpconflictd128(a: i32x4) -> i32x4472     fn vpconflictd128(a: i32x4) -> i32x4;
473 
474     #[link_name = "llvm.x86.avx512.conflict.q.512"]
vpconflictq(a: i64x8) -> i64x8475     fn vpconflictq(a: i64x8) -> i64x8;
476     #[link_name = "llvm.x86.avx512.conflict.q.256"]
vpconflictq256(a: i64x4) -> i64x4477     fn vpconflictq256(a: i64x4) -> i64x4;
478     #[link_name = "llvm.x86.avx512.conflict.q.128"]
vpconflictq128(a: i64x2) -> i64x2479     fn vpconflictq128(a: i64x2) -> i64x2;
480 
481     #[link_name = "llvm.ctlz.v16i32"]
vplzcntd(a: i32x16, nonzero: bool) -> i32x16482     fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16;
483     #[link_name = "llvm.ctlz.v8i32"]
vplzcntd256(a: i32x8, nonzero: bool) -> i32x8484     fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8;
485     #[link_name = "llvm.ctlz.v4i32"]
vplzcntd128(a: i32x4, nonzero: bool) -> i32x4486     fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4;
487 
488     #[link_name = "llvm.ctlz.v8i64"]
vplzcntq(a: i64x8, nonzero: bool) -> i64x8489     fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8;
490     #[link_name = "llvm.ctlz.v4i64"]
vplzcntq256(a: i64x4, nonzero: bool) -> i64x4491     fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4;
492     #[link_name = "llvm.ctlz.v2i64"]
vplzcntq128(a: i64x2, nonzero: bool) -> i64x2493     fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2;
494 }
495 
496 #[cfg(test)]
497 mod tests {
498 
499     use crate::core_arch::x86::*;
500     use stdarch_test::simd_test;
501 
502     #[simd_test(enable = "avx512cd")]
test_mm512_broadcastmw_epi32()503     unsafe fn test_mm512_broadcastmw_epi32() {
504         let a: __mmask16 = 2;
505         let r = _mm512_broadcastmw_epi32(a);
506         let e = _mm512_set1_epi32(2);
507         assert_eq_m512i(r, e);
508     }
509 
510     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_broadcastmw_epi32()511     unsafe fn test_mm256_broadcastmw_epi32() {
512         let a: __mmask16 = 2;
513         let r = _mm256_broadcastmw_epi32(a);
514         let e = _mm256_set1_epi32(2);
515         assert_eq_m256i(r, e);
516     }
517 
518     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_broadcastmw_epi32()519     unsafe fn test_mm_broadcastmw_epi32() {
520         let a: __mmask16 = 2;
521         let r = _mm_broadcastmw_epi32(a);
522         let e = _mm_set1_epi32(2);
523         assert_eq_m128i(r, e);
524     }
525 
526     #[simd_test(enable = "avx512cd")]
test_mm512_broadcastmb_epi64()527     unsafe fn test_mm512_broadcastmb_epi64() {
528         let a: __mmask8 = 2;
529         let r = _mm512_broadcastmb_epi64(a);
530         let e = _mm512_set1_epi64(2);
531         assert_eq_m512i(r, e);
532     }
533 
534     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_broadcastmb_epi64()535     unsafe fn test_mm256_broadcastmb_epi64() {
536         let a: __mmask8 = 2;
537         let r = _mm256_broadcastmb_epi64(a);
538         let e = _mm256_set1_epi64x(2);
539         assert_eq_m256i(r, e);
540     }
541 
542     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_broadcastmb_epi64()543     unsafe fn test_mm_broadcastmb_epi64() {
544         let a: __mmask8 = 2;
545         let r = _mm_broadcastmb_epi64(a);
546         let e = _mm_set1_epi64x(2);
547         assert_eq_m128i(r, e);
548     }
549 
550     #[simd_test(enable = "avx512cd")]
test_mm512_conflict_epi32()551     unsafe fn test_mm512_conflict_epi32() {
552         let a = _mm512_set1_epi32(1);
553         let r = _mm512_conflict_epi32(a);
554         let e = _mm512_set_epi32(
555             1 << 14
556                 | 1 << 13
557                 | 1 << 12
558                 | 1 << 11
559                 | 1 << 10
560                 | 1 << 9
561                 | 1 << 8
562                 | 1 << 7
563                 | 1 << 6
564                 | 1 << 5
565                 | 1 << 4
566                 | 1 << 3
567                 | 1 << 2
568                 | 1 << 1
569                 | 1 << 0,
570             1 << 13
571                 | 1 << 12
572                 | 1 << 11
573                 | 1 << 10
574                 | 1 << 9
575                 | 1 << 8
576                 | 1 << 7
577                 | 1 << 6
578                 | 1 << 5
579                 | 1 << 4
580                 | 1 << 3
581                 | 1 << 2
582                 | 1 << 1
583                 | 1 << 0,
584             1 << 12
585                 | 1 << 11
586                 | 1 << 10
587                 | 1 << 9
588                 | 1 << 8
589                 | 1 << 7
590                 | 1 << 6
591                 | 1 << 5
592                 | 1 << 4
593                 | 1 << 3
594                 | 1 << 2
595                 | 1 << 1
596                 | 1 << 0,
597             1 << 11
598                 | 1 << 10
599                 | 1 << 9
600                 | 1 << 8
601                 | 1 << 7
602                 | 1 << 6
603                 | 1 << 5
604                 | 1 << 4
605                 | 1 << 3
606                 | 1 << 2
607                 | 1 << 1
608                 | 1 << 0,
609             1 << 10
610                 | 1 << 9
611                 | 1 << 8
612                 | 1 << 7
613                 | 1 << 6
614                 | 1 << 5
615                 | 1 << 4
616                 | 1 << 3
617                 | 1 << 2
618                 | 1 << 1
619                 | 1 << 0,
620             1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
621             1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
622             1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
623             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
624             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
625             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
626             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
627             1 << 2 | 1 << 1 | 1 << 0,
628             1 << 1 | 1 << 0,
629             1 << 0,
630             0,
631         );
632         assert_eq_m512i(r, e);
633     }
634 
635     #[simd_test(enable = "avx512cd")]
test_mm512_mask_conflict_epi32()636     unsafe fn test_mm512_mask_conflict_epi32() {
637         let a = _mm512_set1_epi32(1);
638         let r = _mm512_mask_conflict_epi32(a, 0, a);
639         assert_eq_m512i(r, a);
640         let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a);
641         let e = _mm512_set_epi32(
642             1 << 14
643                 | 1 << 13
644                 | 1 << 12
645                 | 1 << 11
646                 | 1 << 10
647                 | 1 << 9
648                 | 1 << 8
649                 | 1 << 7
650                 | 1 << 6
651                 | 1 << 5
652                 | 1 << 4
653                 | 1 << 3
654                 | 1 << 2
655                 | 1 << 1
656                 | 1 << 0,
657             1 << 13
658                 | 1 << 12
659                 | 1 << 11
660                 | 1 << 10
661                 | 1 << 9
662                 | 1 << 8
663                 | 1 << 7
664                 | 1 << 6
665                 | 1 << 5
666                 | 1 << 4
667                 | 1 << 3
668                 | 1 << 2
669                 | 1 << 1
670                 | 1 << 0,
671             1 << 12
672                 | 1 << 11
673                 | 1 << 10
674                 | 1 << 9
675                 | 1 << 8
676                 | 1 << 7
677                 | 1 << 6
678                 | 1 << 5
679                 | 1 << 4
680                 | 1 << 3
681                 | 1 << 2
682                 | 1 << 1
683                 | 1 << 0,
684             1 << 11
685                 | 1 << 10
686                 | 1 << 9
687                 | 1 << 8
688                 | 1 << 7
689                 | 1 << 6
690                 | 1 << 5
691                 | 1 << 4
692                 | 1 << 3
693                 | 1 << 2
694                 | 1 << 1
695                 | 1 << 0,
696             1 << 10
697                 | 1 << 9
698                 | 1 << 8
699                 | 1 << 7
700                 | 1 << 6
701                 | 1 << 5
702                 | 1 << 4
703                 | 1 << 3
704                 | 1 << 2
705                 | 1 << 1
706                 | 1 << 0,
707             1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
708             1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
709             1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
710             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
711             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
712             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
713             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
714             1 << 2 | 1 << 1 | 1 << 0,
715             1 << 1 | 1 << 0,
716             1 << 0,
717             0,
718         );
719         assert_eq_m512i(r, e);
720     }
721 
722     #[simd_test(enable = "avx512cd")]
test_mm512_maskz_conflict_epi32()723     unsafe fn test_mm512_maskz_conflict_epi32() {
724         let a = _mm512_set1_epi32(1);
725         let r = _mm512_maskz_conflict_epi32(0, a);
726         assert_eq_m512i(r, _mm512_setzero_si512());
727         let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a);
728         let e = _mm512_set_epi32(
729             1 << 14
730                 | 1 << 13
731                 | 1 << 12
732                 | 1 << 11
733                 | 1 << 10
734                 | 1 << 9
735                 | 1 << 8
736                 | 1 << 7
737                 | 1 << 6
738                 | 1 << 5
739                 | 1 << 4
740                 | 1 << 3
741                 | 1 << 2
742                 | 1 << 1
743                 | 1 << 0,
744             1 << 13
745                 | 1 << 12
746                 | 1 << 11
747                 | 1 << 10
748                 | 1 << 9
749                 | 1 << 8
750                 | 1 << 7
751                 | 1 << 6
752                 | 1 << 5
753                 | 1 << 4
754                 | 1 << 3
755                 | 1 << 2
756                 | 1 << 1
757                 | 1 << 0,
758             1 << 12
759                 | 1 << 11
760                 | 1 << 10
761                 | 1 << 9
762                 | 1 << 8
763                 | 1 << 7
764                 | 1 << 6
765                 | 1 << 5
766                 | 1 << 4
767                 | 1 << 3
768                 | 1 << 2
769                 | 1 << 1
770                 | 1 << 0,
771             1 << 11
772                 | 1 << 10
773                 | 1 << 9
774                 | 1 << 8
775                 | 1 << 7
776                 | 1 << 6
777                 | 1 << 5
778                 | 1 << 4
779                 | 1 << 3
780                 | 1 << 2
781                 | 1 << 1
782                 | 1 << 0,
783             1 << 10
784                 | 1 << 9
785                 | 1 << 8
786                 | 1 << 7
787                 | 1 << 6
788                 | 1 << 5
789                 | 1 << 4
790                 | 1 << 3
791                 | 1 << 2
792                 | 1 << 1
793                 | 1 << 0,
794             1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
795             1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
796             1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
797             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
798             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
799             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
800             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
801             1 << 2 | 1 << 1 | 1 << 0,
802             1 << 1 | 1 << 0,
803             1 << 0,
804             0,
805         );
806         assert_eq_m512i(r, e);
807     }
808 
809     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_conflict_epi32()810     unsafe fn test_mm256_conflict_epi32() {
811         let a = _mm256_set1_epi32(1);
812         let r = _mm256_conflict_epi32(a);
813         let e = _mm256_set_epi32(
814             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
815             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
816             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
817             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
818             1 << 2 | 1 << 1 | 1 << 0,
819             1 << 1 | 1 << 0,
820             1 << 0,
821             0,
822         );
823         assert_eq_m256i(r, e);
824     }
825 
826     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_conflict_epi32()827     unsafe fn test_mm256_mask_conflict_epi32() {
828         let a = _mm256_set1_epi32(1);
829         let r = _mm256_mask_conflict_epi32(a, 0, a);
830         assert_eq_m256i(r, a);
831         let r = _mm256_mask_conflict_epi32(a, 0b11111111, a);
832         let e = _mm256_set_epi32(
833             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
834             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
835             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
836             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
837             1 << 2 | 1 << 1 | 1 << 0,
838             1 << 1 | 1 << 0,
839             1 << 0,
840             0,
841         );
842         assert_eq_m256i(r, e);
843     }
844 
845     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_conflict_epi32()846     unsafe fn test_mm256_maskz_conflict_epi32() {
847         let a = _mm256_set1_epi32(1);
848         let r = _mm256_maskz_conflict_epi32(0, a);
849         assert_eq_m256i(r, _mm256_setzero_si256());
850         let r = _mm256_maskz_conflict_epi32(0b11111111, a);
851         let e = _mm256_set_epi32(
852             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
853             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
854             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
855             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
856             1 << 2 | 1 << 1 | 1 << 0,
857             1 << 1 | 1 << 0,
858             1 << 0,
859             0,
860         );
861         assert_eq_m256i(r, e);
862     }
863 
864     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_conflict_epi32()865     unsafe fn test_mm_conflict_epi32() {
866         let a = _mm_set1_epi32(1);
867         let r = _mm_conflict_epi32(a);
868         let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
869         assert_eq_m128i(r, e);
870     }
871 
872     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_conflict_epi32()873     unsafe fn test_mm_mask_conflict_epi32() {
874         let a = _mm_set1_epi32(1);
875         let r = _mm_mask_conflict_epi32(a, 0, a);
876         assert_eq_m128i(r, a);
877         let r = _mm_mask_conflict_epi32(a, 0b00001111, a);
878         let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
879         assert_eq_m128i(r, e);
880     }
881 
882     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_conflict_epi32()883     unsafe fn test_mm_maskz_conflict_epi32() {
884         let a = _mm_set1_epi32(1);
885         let r = _mm_maskz_conflict_epi32(0, a);
886         assert_eq_m128i(r, _mm_setzero_si128());
887         let r = _mm_maskz_conflict_epi32(0b00001111, a);
888         let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
889         assert_eq_m128i(r, e);
890     }
891 
892     #[simd_test(enable = "avx512cd")]
test_mm512_conflict_epi64()893     unsafe fn test_mm512_conflict_epi64() {
894         let a = _mm512_set1_epi64(1);
895         let r = _mm512_conflict_epi64(a);
896         let e = _mm512_set_epi64(
897             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
898             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
899             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
900             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
901             1 << 2 | 1 << 1 | 1 << 0,
902             1 << 1 | 1 << 0,
903             1 << 0,
904             0,
905         );
906         assert_eq_m512i(r, e);
907     }
908 
909     #[simd_test(enable = "avx512cd")]
test_mm512_mask_conflict_epi64()910     unsafe fn test_mm512_mask_conflict_epi64() {
911         let a = _mm512_set1_epi64(1);
912         let r = _mm512_mask_conflict_epi64(a, 0, a);
913         assert_eq_m512i(r, a);
914         let r = _mm512_mask_conflict_epi64(a, 0b11111111, a);
915         let e = _mm512_set_epi64(
916             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
917             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
918             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
919             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
920             1 << 2 | 1 << 1 | 1 << 0,
921             1 << 1 | 1 << 0,
922             1 << 0,
923             0,
924         );
925         assert_eq_m512i(r, e);
926     }
927 
928     #[simd_test(enable = "avx512cd")]
test_mm512_maskz_conflict_epi64()929     unsafe fn test_mm512_maskz_conflict_epi64() {
930         let a = _mm512_set1_epi64(1);
931         let r = _mm512_maskz_conflict_epi64(0, a);
932         assert_eq_m512i(r, _mm512_setzero_si512());
933         let r = _mm512_maskz_conflict_epi64(0b11111111, a);
934         let e = _mm512_set_epi64(
935             1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
936             1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
937             1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
938             1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
939             1 << 2 | 1 << 1 | 1 << 0,
940             1 << 1 | 1 << 0,
941             1 << 0,
942             0,
943         );
944         assert_eq_m512i(r, e);
945     }
946 
947     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_conflict_epi64()948     unsafe fn test_mm256_conflict_epi64() {
949         let a = _mm256_set1_epi64x(1);
950         let r = _mm256_conflict_epi64(a);
951         let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
952         assert_eq_m256i(r, e);
953     }
954 
955     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_conflict_epi64()956     unsafe fn test_mm256_mask_conflict_epi64() {
957         let a = _mm256_set1_epi64x(1);
958         let r = _mm256_mask_conflict_epi64(a, 0, a);
959         assert_eq_m256i(r, a);
960         let r = _mm256_mask_conflict_epi64(a, 0b00001111, a);
961         let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
962         assert_eq_m256i(r, e);
963     }
964 
965     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_conflict_epi64()966     unsafe fn test_mm256_maskz_conflict_epi64() {
967         let a = _mm256_set1_epi64x(1);
968         let r = _mm256_maskz_conflict_epi64(0, a);
969         assert_eq_m256i(r, _mm256_setzero_si256());
970         let r = _mm256_maskz_conflict_epi64(0b00001111, a);
971         let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
972         assert_eq_m256i(r, e);
973     }
974 
975     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_conflict_epi64()976     unsafe fn test_mm_conflict_epi64() {
977         let a = _mm_set1_epi64x(1);
978         let r = _mm_conflict_epi64(a);
979         let e = _mm_set_epi64x(1 << 0, 0);
980         assert_eq_m128i(r, e);
981     }
982 
983     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_conflict_epi64()984     unsafe fn test_mm_mask_conflict_epi64() {
985         let a = _mm_set1_epi64x(1);
986         let r = _mm_mask_conflict_epi64(a, 0, a);
987         assert_eq_m128i(r, a);
988         let r = _mm_mask_conflict_epi64(a, 0b00000011, a);
989         let e = _mm_set_epi64x(1 << 0, 0);
990         assert_eq_m128i(r, e);
991     }
992 
993     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_conflict_epi64()994     unsafe fn test_mm_maskz_conflict_epi64() {
995         let a = _mm_set1_epi64x(1);
996         let r = _mm_maskz_conflict_epi64(0, a);
997         assert_eq_m128i(r, _mm_setzero_si128());
998         let r = _mm_maskz_conflict_epi64(0b00000011, a);
999         let e = _mm_set_epi64x(1 << 0, 0);
1000         assert_eq_m128i(r, e);
1001     }
1002 
1003     #[simd_test(enable = "avx512cd")]
test_mm512_lzcnt_epi32()1004     unsafe fn test_mm512_lzcnt_epi32() {
1005         let a = _mm512_set1_epi32(1);
1006         let r = _mm512_lzcnt_epi32(a);
1007         let e = _mm512_set1_epi32(31);
1008         assert_eq_m512i(r, e);
1009     }
1010 
1011     #[simd_test(enable = "avx512cd")]
test_mm512_mask_lzcnt_epi32()1012     unsafe fn test_mm512_mask_lzcnt_epi32() {
1013         let a = _mm512_set1_epi32(1);
1014         let r = _mm512_mask_lzcnt_epi32(a, 0, a);
1015         assert_eq_m512i(r, a);
1016         let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a);
1017         let e = _mm512_set1_epi32(31);
1018         assert_eq_m512i(r, e);
1019     }
1020 
1021     #[simd_test(enable = "avx512cd")]
test_mm512_maskz_lzcnt_epi32()1022     unsafe fn test_mm512_maskz_lzcnt_epi32() {
1023         let a = _mm512_set1_epi32(2);
1024         let r = _mm512_maskz_lzcnt_epi32(0, a);
1025         assert_eq_m512i(r, _mm512_setzero_si512());
1026         let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a);
1027         let e = _mm512_set1_epi32(30);
1028         assert_eq_m512i(r, e);
1029     }
1030 
1031     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_lzcnt_epi32()1032     unsafe fn test_mm256_lzcnt_epi32() {
1033         let a = _mm256_set1_epi32(1);
1034         let r = _mm256_lzcnt_epi32(a);
1035         let e = _mm256_set1_epi32(31);
1036         assert_eq_m256i(r, e);
1037     }
1038 
1039     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_lzcnt_epi32()1040     unsafe fn test_mm256_mask_lzcnt_epi32() {
1041         let a = _mm256_set1_epi32(1);
1042         let r = _mm256_mask_lzcnt_epi32(a, 0, a);
1043         assert_eq_m256i(r, a);
1044         let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a);
1045         let e = _mm256_set1_epi32(31);
1046         assert_eq_m256i(r, e);
1047     }
1048 
1049     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_lzcnt_epi32()1050     unsafe fn test_mm256_maskz_lzcnt_epi32() {
1051         let a = _mm256_set1_epi32(1);
1052         let r = _mm256_maskz_lzcnt_epi32(0, a);
1053         assert_eq_m256i(r, _mm256_setzero_si256());
1054         let r = _mm256_maskz_lzcnt_epi32(0b11111111, a);
1055         let e = _mm256_set1_epi32(31);
1056         assert_eq_m256i(r, e);
1057     }
1058 
1059     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_lzcnt_epi32()1060     unsafe fn test_mm_lzcnt_epi32() {
1061         let a = _mm_set1_epi32(1);
1062         let r = _mm_lzcnt_epi32(a);
1063         let e = _mm_set1_epi32(31);
1064         assert_eq_m128i(r, e);
1065     }
1066 
1067     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_lzcnt_epi32()1068     unsafe fn test_mm_mask_lzcnt_epi32() {
1069         let a = _mm_set1_epi32(1);
1070         let r = _mm_mask_lzcnt_epi32(a, 0, a);
1071         assert_eq_m128i(r, a);
1072         let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a);
1073         let e = _mm_set1_epi32(31);
1074         assert_eq_m128i(r, e);
1075     }
1076 
1077     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_lzcnt_epi32()1078     unsafe fn test_mm_maskz_lzcnt_epi32() {
1079         let a = _mm_set1_epi32(1);
1080         let r = _mm_maskz_lzcnt_epi32(0, a);
1081         assert_eq_m128i(r, _mm_setzero_si128());
1082         let r = _mm_maskz_lzcnt_epi32(0b00001111, a);
1083         let e = _mm_set1_epi32(31);
1084         assert_eq_m128i(r, e);
1085     }
1086 
1087     #[simd_test(enable = "avx512cd")]
test_mm512_lzcnt_epi64()1088     unsafe fn test_mm512_lzcnt_epi64() {
1089         let a = _mm512_set1_epi64(1);
1090         let r = _mm512_lzcnt_epi64(a);
1091         let e = _mm512_set1_epi64(63);
1092         assert_eq_m512i(r, e);
1093     }
1094 
1095     #[simd_test(enable = "avx512cd")]
test_mm512_mask_lzcnt_epi64()1096     unsafe fn test_mm512_mask_lzcnt_epi64() {
1097         let a = _mm512_set1_epi64(1);
1098         let r = _mm512_mask_lzcnt_epi64(a, 0, a);
1099         assert_eq_m512i(r, a);
1100         let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a);
1101         let e = _mm512_set1_epi64(63);
1102         assert_eq_m512i(r, e);
1103     }
1104 
1105     #[simd_test(enable = "avx512cd")]
test_mm512_maskz_lzcnt_epi64()1106     unsafe fn test_mm512_maskz_lzcnt_epi64() {
1107         let a = _mm512_set1_epi64(2);
1108         let r = _mm512_maskz_lzcnt_epi64(0, a);
1109         assert_eq_m512i(r, _mm512_setzero_si512());
1110         let r = _mm512_maskz_lzcnt_epi64(0b11111111, a);
1111         let e = _mm512_set1_epi64(62);
1112         assert_eq_m512i(r, e);
1113     }
1114 
1115     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_lzcnt_epi64()1116     unsafe fn test_mm256_lzcnt_epi64() {
1117         let a = _mm256_set1_epi64x(1);
1118         let r = _mm256_lzcnt_epi64(a);
1119         let e = _mm256_set1_epi64x(63);
1120         assert_eq_m256i(r, e);
1121     }
1122 
1123     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_lzcnt_epi64()1124     unsafe fn test_mm256_mask_lzcnt_epi64() {
1125         let a = _mm256_set1_epi64x(1);
1126         let r = _mm256_mask_lzcnt_epi64(a, 0, a);
1127         assert_eq_m256i(r, a);
1128         let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a);
1129         let e = _mm256_set1_epi64x(63);
1130         assert_eq_m256i(r, e);
1131     }
1132 
1133     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_lzcnt_epi64()1134     unsafe fn test_mm256_maskz_lzcnt_epi64() {
1135         let a = _mm256_set1_epi64x(1);
1136         let r = _mm256_maskz_lzcnt_epi64(0, a);
1137         assert_eq_m256i(r, _mm256_setzero_si256());
1138         let r = _mm256_maskz_lzcnt_epi64(0b00001111, a);
1139         let e = _mm256_set1_epi64x(63);
1140         assert_eq_m256i(r, e);
1141     }
1142 
1143     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_lzcnt_epi64()1144     unsafe fn test_mm_lzcnt_epi64() {
1145         let a = _mm_set1_epi64x(1);
1146         let r = _mm_lzcnt_epi64(a);
1147         let e = _mm_set1_epi64x(63);
1148         assert_eq_m128i(r, e);
1149     }
1150 
1151     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_lzcnt_epi64()1152     unsafe fn test_mm_mask_lzcnt_epi64() {
1153         let a = _mm_set1_epi64x(1);
1154         let r = _mm_mask_lzcnt_epi64(a, 0, a);
1155         assert_eq_m128i(r, a);
1156         let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a);
1157         let e = _mm_set1_epi64x(63);
1158         assert_eq_m128i(r, e);
1159     }
1160 
1161     #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_lzcnt_epi64()1162     unsafe fn test_mm_maskz_lzcnt_epi64() {
1163         let a = _mm_set1_epi64x(1);
1164         let r = _mm_maskz_lzcnt_epi64(0, a);
1165         assert_eq_m128i(r, _mm_setzero_si128());
1166         let r = _mm_maskz_lzcnt_epi64(0b00001111, a);
1167         let e = _mm_set1_epi64x(63);
1168         assert_eq_m128i(r, e);
1169     }
1170 }
1171