1 use crate::{
2 core_arch::{simd::*, simd_llvm::*, x86::*},
3 mem::transmute,
4 };
5
6 #[cfg(test)]
7 use stdarch_test::assert_instr;
8
9 /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
10 ///
11 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmw_epi32&expand=553)
12 #[inline]
13 #[target_feature(enable = "avx512cd")]
14 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
_mm512_broadcastmw_epi32(k: __mmask16) -> __m512i15 pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
16 _mm512_set1_epi32(k as i32)
17 }
18
19 /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
20 ///
21 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmw_epi32&expand=552)
22 #[inline]
23 #[target_feature(enable = "avx512cd,avx512vl")]
24 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
_mm256_broadcastmw_epi32(k: __mmask16) -> __m256i25 pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
26 _mm256_set1_epi32(k as i32)
27 }
28
29 /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
30 ///
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmw_epi32&expand=551)
32 #[inline]
33 #[target_feature(enable = "avx512cd,avx512vl")]
34 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
_mm_broadcastmw_epi32(k: __mmask16) -> __m128i35 pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
36 _mm_set1_epi32(k as i32)
37 }
38
39 /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
40 ///
41 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmb_epi64&expand=550)
42 #[inline]
43 #[target_feature(enable = "avx512cd")]
44 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
_mm512_broadcastmb_epi64(k: __mmask8) -> __m512i45 pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
46 _mm512_set1_epi64(k as i64)
47 }
48
49 /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
50 ///
51 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmb_epi64&expand=549)
52 #[inline]
53 #[target_feature(enable = "avx512cd,avx512vl")]
54 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
_mm256_broadcastmb_epi64(k: __mmask8) -> __m256i55 pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
56 _mm256_set1_epi64x(k as i64)
57 }
58
59 /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
60 ///
61 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmb_epi64&expand=548)
62 #[inline]
63 #[target_feature(enable = "avx512cd,avx512vl")]
64 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
_mm_broadcastmb_epi64(k: __mmask8) -> __m128i65 pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
66 _mm_set1_epi64x(k as i64)
67 }
68
69 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
70 ///
71 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi32&expand=1248)
72 #[inline]
73 #[target_feature(enable = "avx512cd")]
74 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm512_conflict_epi32(a: __m512i) -> __m512i75 pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
76 transmute(vpconflictd(a.as_i32x16()))
77 }
78
79 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
80 ///
81 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi32&expand=1249)
82 #[inline]
83 #[target_feature(enable = "avx512cd")]
84 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i85 pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
86 let conflict = _mm512_conflict_epi32(a).as_i32x16();
87 transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
88 }
89
90 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
91 ///
92 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi32&expand=1250)
93 #[inline]
94 #[target_feature(enable = "avx512cd")]
95 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i96 pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
97 let conflict = _mm512_conflict_epi32(a).as_i32x16();
98 let zero = _mm512_setzero_si512().as_i32x16();
99 transmute(simd_select_bitmask(k, conflict, zero))
100 }
101
102 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
103 ///
104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi32&expand=1245)
105 #[inline]
106 #[target_feature(enable = "avx512cd,avx512vl")]
107 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm256_conflict_epi32(a: __m256i) -> __m256i108 pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
109 transmute(vpconflictd256(a.as_i32x8()))
110 }
111
112 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
113 ///
114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi32&expand=1246)
115 #[inline]
116 #[target_feature(enable = "avx512cd,avx512vl")]
117 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i118 pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
119 let conflict = _mm256_conflict_epi32(a).as_i32x8();
120 transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
121 }
122
123 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
124 ///
125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi32&expand=1247)
126 #[inline]
127 #[target_feature(enable = "avx512cd,avx512vl")]
128 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i129 pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
130 let conflict = _mm256_conflict_epi32(a).as_i32x8();
131 let zero = _mm256_setzero_si256().as_i32x8();
132 transmute(simd_select_bitmask(k, conflict, zero))
133 }
134
135 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
136 ///
137 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi32&expand=1242)
138 #[inline]
139 #[target_feature(enable = "avx512cd,avx512vl")]
140 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm_conflict_epi32(a: __m128i) -> __m128i141 pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
142 transmute(vpconflictd128(a.as_i32x4()))
143 }
144
145 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
146 ///
147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi32&expand=1243)
148 #[inline]
149 #[target_feature(enable = "avx512cd,avx512vl")]
150 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i151 pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
152 let conflict = _mm_conflict_epi32(a).as_i32x4();
153 transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
154 }
155
156 /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
157 ///
158 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi32&expand=1244)
159 #[inline]
160 #[target_feature(enable = "avx512cd,avx512vl")]
161 #[cfg_attr(test, assert_instr(vpconflictd))]
_mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i162 pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
163 let conflict = _mm_conflict_epi32(a).as_i32x4();
164 let zero = _mm_setzero_si128().as_i32x4();
165 transmute(simd_select_bitmask(k, conflict, zero))
166 }
167
168 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
169 ///
170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi64&expand=1257)
171 #[inline]
172 #[target_feature(enable = "avx512cd")]
173 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm512_conflict_epi64(a: __m512i) -> __m512i174 pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
175 transmute(vpconflictq(a.as_i64x8()))
176 }
177
178 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
179 ///
180 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi64&expand=1258)
181 #[inline]
182 #[target_feature(enable = "avx512cd")]
183 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i184 pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
185 let conflict = _mm512_conflict_epi64(a).as_i64x8();
186 transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
187 }
188
189 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
190 ///
191 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi64&expand=1259)
192 #[inline]
193 #[target_feature(enable = "avx512cd")]
194 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i195 pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
196 let conflict = _mm512_conflict_epi64(a).as_i64x8();
197 let zero = _mm512_setzero_si512().as_i64x8();
198 transmute(simd_select_bitmask(k, conflict, zero))
199 }
200
201 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
202 ///
203 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi64&expand=1254)
204 #[inline]
205 #[target_feature(enable = "avx512cd,avx512vl")]
206 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm256_conflict_epi64(a: __m256i) -> __m256i207 pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
208 transmute(vpconflictq256(a.as_i64x4()))
209 }
210
211 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
212 ///
213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi64&expand=1255)
214 #[inline]
215 #[target_feature(enable = "avx512cd,avx512vl")]
216 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i217 pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
218 let conflict = _mm256_conflict_epi64(a).as_i64x4();
219 transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
220 }
221
222 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi64&expand=1256)
225 #[inline]
226 #[target_feature(enable = "avx512cd,avx512vl")]
227 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i228 pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
229 let conflict = _mm256_conflict_epi64(a).as_i64x4();
230 let zero = _mm256_setzero_si256().as_i64x4();
231 transmute(simd_select_bitmask(k, conflict, zero))
232 }
233
234 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
235 ///
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi64&expand=1251)
237 #[inline]
238 #[target_feature(enable = "avx512cd,avx512vl")]
239 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm_conflict_epi64(a: __m128i) -> __m128i240 pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
241 transmute(vpconflictq128(a.as_i64x2()))
242 }
243
244 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
245 ///
246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi64&expand=1252)
247 #[inline]
248 #[target_feature(enable = "avx512cd,avx512vl")]
249 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i250 pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
251 let conflict = _mm_conflict_epi64(a).as_i64x2();
252 transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
253 }
254
255 /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
256 ///
257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi64&expand=1253)
258 #[inline]
259 #[target_feature(enable = "avx512cd,avx512vl")]
260 #[cfg_attr(test, assert_instr(vpconflictq))]
_mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i261 pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
262 let conflict = _mm_conflict_epi64(a).as_i64x2();
263 let zero = _mm_setzero_si128().as_i64x2();
264 transmute(simd_select_bitmask(k, conflict, zero))
265 }
266
267 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
268 ///
269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi32&expand=3491)
270 #[inline]
271 #[target_feature(enable = "avx512cd")]
272 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm512_lzcnt_epi32(a: __m512i) -> __m512i273 pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
274 transmute(vplzcntd(a.as_i32x16(), false))
275 }
276
277 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
278 ///
279 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi32&expand=3492)
280 #[inline]
281 #[target_feature(enable = "avx512cd")]
282 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i283 pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
284 let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
285 transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
286 }
287
288 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
289 ///
290 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi32&expand=3493)
291 #[inline]
292 #[target_feature(enable = "avx512cd")]
293 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i294 pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
295 let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
296 let zero = _mm512_setzero_si512().as_i32x16();
297 transmute(simd_select_bitmask(k, zerocount, zero))
298 }
299
300 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
301 ///
302 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi32&expand=3488)
303 #[inline]
304 #[target_feature(enable = "avx512cd,avx512vl")]
305 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm256_lzcnt_epi32(a: __m256i) -> __m256i306 pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
307 transmute(vplzcntd256(a.as_i32x8(), false))
308 }
309
310 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
311 ///
312 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi32&expand=3489)
313 #[inline]
314 #[target_feature(enable = "avx512cd,avx512vl")]
315 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i316 pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
317 let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
318 transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
319 }
320
321 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
322 ///
323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi32&expand=3490)
324 #[inline]
325 #[target_feature(enable = "avx512cd,avx512vl")]
326 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i327 pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
328 let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
329 let zero = _mm256_setzero_si256().as_i32x8();
330 transmute(simd_select_bitmask(k, zerocount, zero))
331 }
332
333 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
334 ///
335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi32&expand=3485)
336 #[inline]
337 #[target_feature(enable = "avx512cd,avx512vl")]
338 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm_lzcnt_epi32(a: __m128i) -> __m128i339 pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
340 transmute(vplzcntd128(a.as_i32x4(), false))
341 }
342
343 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
344 ///
345 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi32&expand=3486)
346 #[inline]
347 #[target_feature(enable = "avx512cd,avx512vl")]
348 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i349 pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
350 let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
351 transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
352 }
353
354 /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355 ///
356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi32&expand=3487)
357 #[inline]
358 #[target_feature(enable = "avx512cd,avx512vl")]
359 #[cfg_attr(test, assert_instr(vplzcntd))]
_mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i360 pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
361 let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
362 let zero = _mm_setzero_si128().as_i32x4();
363 transmute(simd_select_bitmask(k, zerocount, zero))
364 }
365
366 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
367 ///
368 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi64&expand=3500)
369 #[inline]
370 #[target_feature(enable = "avx512cd")]
371 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm512_lzcnt_epi64(a: __m512i) -> __m512i372 pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
373 transmute(vplzcntq(a.as_i64x8(), false))
374 }
375
376 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
377 ///
378 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi64&expand=3501)
379 #[inline]
380 #[target_feature(enable = "avx512cd")]
381 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i382 pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
383 let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
384 transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
385 }
386
387 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388 ///
389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi64&expand=3502)
390 #[inline]
391 #[target_feature(enable = "avx512cd")]
392 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i393 pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
394 let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
395 let zero = _mm512_setzero_si512().as_i64x8();
396 transmute(simd_select_bitmask(k, zerocount, zero))
397 }
398
399 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
400 ///
401 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi64&expand=3497)
402 #[inline]
403 #[target_feature(enable = "avx512cd,avx512vl")]
404 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm256_lzcnt_epi64(a: __m256i) -> __m256i405 pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
406 transmute(vplzcntq256(a.as_i64x4(), false))
407 }
408
409 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
410 ///
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi64&expand=3498)
412 #[inline]
413 #[target_feature(enable = "avx512cd,avx512vl")]
414 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i415 pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
416 let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
417 transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
418 }
419
420 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
421 ///
422 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi64&expand=3499)
423 #[inline]
424 #[target_feature(enable = "avx512cd,avx512vl")]
425 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i426 pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
427 let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
428 let zero = _mm256_setzero_si256().as_i64x4();
429 transmute(simd_select_bitmask(k, zerocount, zero))
430 }
431
432 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
433 ///
434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi64&expand=3494)
435 #[inline]
436 #[target_feature(enable = "avx512cd,avx512vl")]
437 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm_lzcnt_epi64(a: __m128i) -> __m128i438 pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
439 transmute(vplzcntq128(a.as_i64x2(), false))
440 }
441
442 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
443 ///
444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi64&expand=3495)
445 #[inline]
446 #[target_feature(enable = "avx512cd,avx512vl")]
447 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i448 pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
449 let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
450 transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
451 }
452
453 /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
454 ///
455 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi64&expand=3496)
456 #[inline]
457 #[target_feature(enable = "avx512cd,avx512vl")]
458 #[cfg_attr(test, assert_instr(vplzcntq))]
_mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i459 pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
460 let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
461 let zero = _mm_setzero_si128().as_i64x2();
462 transmute(simd_select_bitmask(k, zerocount, zero))
463 }
464
465 #[allow(improper_ctypes)]
466 extern "C" {
467 #[link_name = "llvm.x86.avx512.conflict.d.512"]
vpconflictd(a: i32x16) -> i32x16468 fn vpconflictd(a: i32x16) -> i32x16;
469 #[link_name = "llvm.x86.avx512.conflict.d.256"]
vpconflictd256(a: i32x8) -> i32x8470 fn vpconflictd256(a: i32x8) -> i32x8;
471 #[link_name = "llvm.x86.avx512.conflict.d.128"]
vpconflictd128(a: i32x4) -> i32x4472 fn vpconflictd128(a: i32x4) -> i32x4;
473
474 #[link_name = "llvm.x86.avx512.conflict.q.512"]
vpconflictq(a: i64x8) -> i64x8475 fn vpconflictq(a: i64x8) -> i64x8;
476 #[link_name = "llvm.x86.avx512.conflict.q.256"]
vpconflictq256(a: i64x4) -> i64x4477 fn vpconflictq256(a: i64x4) -> i64x4;
478 #[link_name = "llvm.x86.avx512.conflict.q.128"]
vpconflictq128(a: i64x2) -> i64x2479 fn vpconflictq128(a: i64x2) -> i64x2;
480
481 #[link_name = "llvm.ctlz.v16i32"]
vplzcntd(a: i32x16, nonzero: bool) -> i32x16482 fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16;
483 #[link_name = "llvm.ctlz.v8i32"]
vplzcntd256(a: i32x8, nonzero: bool) -> i32x8484 fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8;
485 #[link_name = "llvm.ctlz.v4i32"]
vplzcntd128(a: i32x4, nonzero: bool) -> i32x4486 fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4;
487
488 #[link_name = "llvm.ctlz.v8i64"]
vplzcntq(a: i64x8, nonzero: bool) -> i64x8489 fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8;
490 #[link_name = "llvm.ctlz.v4i64"]
vplzcntq256(a: i64x4, nonzero: bool) -> i64x4491 fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4;
492 #[link_name = "llvm.ctlz.v2i64"]
vplzcntq128(a: i64x2, nonzero: bool) -> i64x2493 fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2;
494 }
495
496 #[cfg(test)]
497 mod tests {
498
499 use crate::core_arch::x86::*;
500 use stdarch_test::simd_test;
501
502 #[simd_test(enable = "avx512cd")]
test_mm512_broadcastmw_epi32()503 unsafe fn test_mm512_broadcastmw_epi32() {
504 let a: __mmask16 = 2;
505 let r = _mm512_broadcastmw_epi32(a);
506 let e = _mm512_set1_epi32(2);
507 assert_eq_m512i(r, e);
508 }
509
510 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_broadcastmw_epi32()511 unsafe fn test_mm256_broadcastmw_epi32() {
512 let a: __mmask16 = 2;
513 let r = _mm256_broadcastmw_epi32(a);
514 let e = _mm256_set1_epi32(2);
515 assert_eq_m256i(r, e);
516 }
517
518 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_broadcastmw_epi32()519 unsafe fn test_mm_broadcastmw_epi32() {
520 let a: __mmask16 = 2;
521 let r = _mm_broadcastmw_epi32(a);
522 let e = _mm_set1_epi32(2);
523 assert_eq_m128i(r, e);
524 }
525
526 #[simd_test(enable = "avx512cd")]
test_mm512_broadcastmb_epi64()527 unsafe fn test_mm512_broadcastmb_epi64() {
528 let a: __mmask8 = 2;
529 let r = _mm512_broadcastmb_epi64(a);
530 let e = _mm512_set1_epi64(2);
531 assert_eq_m512i(r, e);
532 }
533
534 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_broadcastmb_epi64()535 unsafe fn test_mm256_broadcastmb_epi64() {
536 let a: __mmask8 = 2;
537 let r = _mm256_broadcastmb_epi64(a);
538 let e = _mm256_set1_epi64x(2);
539 assert_eq_m256i(r, e);
540 }
541
542 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_broadcastmb_epi64()543 unsafe fn test_mm_broadcastmb_epi64() {
544 let a: __mmask8 = 2;
545 let r = _mm_broadcastmb_epi64(a);
546 let e = _mm_set1_epi64x(2);
547 assert_eq_m128i(r, e);
548 }
549
550 #[simd_test(enable = "avx512cd")]
test_mm512_conflict_epi32()551 unsafe fn test_mm512_conflict_epi32() {
552 let a = _mm512_set1_epi32(1);
553 let r = _mm512_conflict_epi32(a);
554 let e = _mm512_set_epi32(
555 1 << 14
556 | 1 << 13
557 | 1 << 12
558 | 1 << 11
559 | 1 << 10
560 | 1 << 9
561 | 1 << 8
562 | 1 << 7
563 | 1 << 6
564 | 1 << 5
565 | 1 << 4
566 | 1 << 3
567 | 1 << 2
568 | 1 << 1
569 | 1 << 0,
570 1 << 13
571 | 1 << 12
572 | 1 << 11
573 | 1 << 10
574 | 1 << 9
575 | 1 << 8
576 | 1 << 7
577 | 1 << 6
578 | 1 << 5
579 | 1 << 4
580 | 1 << 3
581 | 1 << 2
582 | 1 << 1
583 | 1 << 0,
584 1 << 12
585 | 1 << 11
586 | 1 << 10
587 | 1 << 9
588 | 1 << 8
589 | 1 << 7
590 | 1 << 6
591 | 1 << 5
592 | 1 << 4
593 | 1 << 3
594 | 1 << 2
595 | 1 << 1
596 | 1 << 0,
597 1 << 11
598 | 1 << 10
599 | 1 << 9
600 | 1 << 8
601 | 1 << 7
602 | 1 << 6
603 | 1 << 5
604 | 1 << 4
605 | 1 << 3
606 | 1 << 2
607 | 1 << 1
608 | 1 << 0,
609 1 << 10
610 | 1 << 9
611 | 1 << 8
612 | 1 << 7
613 | 1 << 6
614 | 1 << 5
615 | 1 << 4
616 | 1 << 3
617 | 1 << 2
618 | 1 << 1
619 | 1 << 0,
620 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
621 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
622 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
623 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
624 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
625 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
626 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
627 1 << 2 | 1 << 1 | 1 << 0,
628 1 << 1 | 1 << 0,
629 1 << 0,
630 0,
631 );
632 assert_eq_m512i(r, e);
633 }
634
635 #[simd_test(enable = "avx512cd")]
test_mm512_mask_conflict_epi32()636 unsafe fn test_mm512_mask_conflict_epi32() {
637 let a = _mm512_set1_epi32(1);
638 let r = _mm512_mask_conflict_epi32(a, 0, a);
639 assert_eq_m512i(r, a);
640 let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a);
641 let e = _mm512_set_epi32(
642 1 << 14
643 | 1 << 13
644 | 1 << 12
645 | 1 << 11
646 | 1 << 10
647 | 1 << 9
648 | 1 << 8
649 | 1 << 7
650 | 1 << 6
651 | 1 << 5
652 | 1 << 4
653 | 1 << 3
654 | 1 << 2
655 | 1 << 1
656 | 1 << 0,
657 1 << 13
658 | 1 << 12
659 | 1 << 11
660 | 1 << 10
661 | 1 << 9
662 | 1 << 8
663 | 1 << 7
664 | 1 << 6
665 | 1 << 5
666 | 1 << 4
667 | 1 << 3
668 | 1 << 2
669 | 1 << 1
670 | 1 << 0,
671 1 << 12
672 | 1 << 11
673 | 1 << 10
674 | 1 << 9
675 | 1 << 8
676 | 1 << 7
677 | 1 << 6
678 | 1 << 5
679 | 1 << 4
680 | 1 << 3
681 | 1 << 2
682 | 1 << 1
683 | 1 << 0,
684 1 << 11
685 | 1 << 10
686 | 1 << 9
687 | 1 << 8
688 | 1 << 7
689 | 1 << 6
690 | 1 << 5
691 | 1 << 4
692 | 1 << 3
693 | 1 << 2
694 | 1 << 1
695 | 1 << 0,
696 1 << 10
697 | 1 << 9
698 | 1 << 8
699 | 1 << 7
700 | 1 << 6
701 | 1 << 5
702 | 1 << 4
703 | 1 << 3
704 | 1 << 2
705 | 1 << 1
706 | 1 << 0,
707 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
708 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
709 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
710 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
711 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
712 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
713 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
714 1 << 2 | 1 << 1 | 1 << 0,
715 1 << 1 | 1 << 0,
716 1 << 0,
717 0,
718 );
719 assert_eq_m512i(r, e);
720 }
721
722 #[simd_test(enable = "avx512cd")]
test_mm512_maskz_conflict_epi32()723 unsafe fn test_mm512_maskz_conflict_epi32() {
724 let a = _mm512_set1_epi32(1);
725 let r = _mm512_maskz_conflict_epi32(0, a);
726 assert_eq_m512i(r, _mm512_setzero_si512());
727 let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a);
728 let e = _mm512_set_epi32(
729 1 << 14
730 | 1 << 13
731 | 1 << 12
732 | 1 << 11
733 | 1 << 10
734 | 1 << 9
735 | 1 << 8
736 | 1 << 7
737 | 1 << 6
738 | 1 << 5
739 | 1 << 4
740 | 1 << 3
741 | 1 << 2
742 | 1 << 1
743 | 1 << 0,
744 1 << 13
745 | 1 << 12
746 | 1 << 11
747 | 1 << 10
748 | 1 << 9
749 | 1 << 8
750 | 1 << 7
751 | 1 << 6
752 | 1 << 5
753 | 1 << 4
754 | 1 << 3
755 | 1 << 2
756 | 1 << 1
757 | 1 << 0,
758 1 << 12
759 | 1 << 11
760 | 1 << 10
761 | 1 << 9
762 | 1 << 8
763 | 1 << 7
764 | 1 << 6
765 | 1 << 5
766 | 1 << 4
767 | 1 << 3
768 | 1 << 2
769 | 1 << 1
770 | 1 << 0,
771 1 << 11
772 | 1 << 10
773 | 1 << 9
774 | 1 << 8
775 | 1 << 7
776 | 1 << 6
777 | 1 << 5
778 | 1 << 4
779 | 1 << 3
780 | 1 << 2
781 | 1 << 1
782 | 1 << 0,
783 1 << 10
784 | 1 << 9
785 | 1 << 8
786 | 1 << 7
787 | 1 << 6
788 | 1 << 5
789 | 1 << 4
790 | 1 << 3
791 | 1 << 2
792 | 1 << 1
793 | 1 << 0,
794 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
795 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
796 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
797 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
798 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
799 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
800 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
801 1 << 2 | 1 << 1 | 1 << 0,
802 1 << 1 | 1 << 0,
803 1 << 0,
804 0,
805 );
806 assert_eq_m512i(r, e);
807 }
808
809 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_conflict_epi32()810 unsafe fn test_mm256_conflict_epi32() {
811 let a = _mm256_set1_epi32(1);
812 let r = _mm256_conflict_epi32(a);
813 let e = _mm256_set_epi32(
814 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
815 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
816 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
817 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
818 1 << 2 | 1 << 1 | 1 << 0,
819 1 << 1 | 1 << 0,
820 1 << 0,
821 0,
822 );
823 assert_eq_m256i(r, e);
824 }
825
826 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_conflict_epi32()827 unsafe fn test_mm256_mask_conflict_epi32() {
828 let a = _mm256_set1_epi32(1);
829 let r = _mm256_mask_conflict_epi32(a, 0, a);
830 assert_eq_m256i(r, a);
831 let r = _mm256_mask_conflict_epi32(a, 0b11111111, a);
832 let e = _mm256_set_epi32(
833 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
834 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
835 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
836 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
837 1 << 2 | 1 << 1 | 1 << 0,
838 1 << 1 | 1 << 0,
839 1 << 0,
840 0,
841 );
842 assert_eq_m256i(r, e);
843 }
844
845 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_conflict_epi32()846 unsafe fn test_mm256_maskz_conflict_epi32() {
847 let a = _mm256_set1_epi32(1);
848 let r = _mm256_maskz_conflict_epi32(0, a);
849 assert_eq_m256i(r, _mm256_setzero_si256());
850 let r = _mm256_maskz_conflict_epi32(0b11111111, a);
851 let e = _mm256_set_epi32(
852 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
853 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
854 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
855 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
856 1 << 2 | 1 << 1 | 1 << 0,
857 1 << 1 | 1 << 0,
858 1 << 0,
859 0,
860 );
861 assert_eq_m256i(r, e);
862 }
863
864 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_conflict_epi32()865 unsafe fn test_mm_conflict_epi32() {
866 let a = _mm_set1_epi32(1);
867 let r = _mm_conflict_epi32(a);
868 let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
869 assert_eq_m128i(r, e);
870 }
871
872 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_conflict_epi32()873 unsafe fn test_mm_mask_conflict_epi32() {
874 let a = _mm_set1_epi32(1);
875 let r = _mm_mask_conflict_epi32(a, 0, a);
876 assert_eq_m128i(r, a);
877 let r = _mm_mask_conflict_epi32(a, 0b00001111, a);
878 let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
879 assert_eq_m128i(r, e);
880 }
881
882 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_conflict_epi32()883 unsafe fn test_mm_maskz_conflict_epi32() {
884 let a = _mm_set1_epi32(1);
885 let r = _mm_maskz_conflict_epi32(0, a);
886 assert_eq_m128i(r, _mm_setzero_si128());
887 let r = _mm_maskz_conflict_epi32(0b00001111, a);
888 let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
889 assert_eq_m128i(r, e);
890 }
891
892 #[simd_test(enable = "avx512cd")]
test_mm512_conflict_epi64()893 unsafe fn test_mm512_conflict_epi64() {
894 let a = _mm512_set1_epi64(1);
895 let r = _mm512_conflict_epi64(a);
896 let e = _mm512_set_epi64(
897 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
898 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
899 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
900 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
901 1 << 2 | 1 << 1 | 1 << 0,
902 1 << 1 | 1 << 0,
903 1 << 0,
904 0,
905 );
906 assert_eq_m512i(r, e);
907 }
908
909 #[simd_test(enable = "avx512cd")]
test_mm512_mask_conflict_epi64()910 unsafe fn test_mm512_mask_conflict_epi64() {
911 let a = _mm512_set1_epi64(1);
912 let r = _mm512_mask_conflict_epi64(a, 0, a);
913 assert_eq_m512i(r, a);
914 let r = _mm512_mask_conflict_epi64(a, 0b11111111, a);
915 let e = _mm512_set_epi64(
916 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
917 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
918 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
919 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
920 1 << 2 | 1 << 1 | 1 << 0,
921 1 << 1 | 1 << 0,
922 1 << 0,
923 0,
924 );
925 assert_eq_m512i(r, e);
926 }
927
928 #[simd_test(enable = "avx512cd")]
test_mm512_maskz_conflict_epi64()929 unsafe fn test_mm512_maskz_conflict_epi64() {
930 let a = _mm512_set1_epi64(1);
931 let r = _mm512_maskz_conflict_epi64(0, a);
932 assert_eq_m512i(r, _mm512_setzero_si512());
933 let r = _mm512_maskz_conflict_epi64(0b11111111, a);
934 let e = _mm512_set_epi64(
935 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
936 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
937 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
938 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
939 1 << 2 | 1 << 1 | 1 << 0,
940 1 << 1 | 1 << 0,
941 1 << 0,
942 0,
943 );
944 assert_eq_m512i(r, e);
945 }
946
947 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_conflict_epi64()948 unsafe fn test_mm256_conflict_epi64() {
949 let a = _mm256_set1_epi64x(1);
950 let r = _mm256_conflict_epi64(a);
951 let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
952 assert_eq_m256i(r, e);
953 }
954
955 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_conflict_epi64()956 unsafe fn test_mm256_mask_conflict_epi64() {
957 let a = _mm256_set1_epi64x(1);
958 let r = _mm256_mask_conflict_epi64(a, 0, a);
959 assert_eq_m256i(r, a);
960 let r = _mm256_mask_conflict_epi64(a, 0b00001111, a);
961 let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
962 assert_eq_m256i(r, e);
963 }
964
965 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_conflict_epi64()966 unsafe fn test_mm256_maskz_conflict_epi64() {
967 let a = _mm256_set1_epi64x(1);
968 let r = _mm256_maskz_conflict_epi64(0, a);
969 assert_eq_m256i(r, _mm256_setzero_si256());
970 let r = _mm256_maskz_conflict_epi64(0b00001111, a);
971 let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
972 assert_eq_m256i(r, e);
973 }
974
975 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_conflict_epi64()976 unsafe fn test_mm_conflict_epi64() {
977 let a = _mm_set1_epi64x(1);
978 let r = _mm_conflict_epi64(a);
979 let e = _mm_set_epi64x(1 << 0, 0);
980 assert_eq_m128i(r, e);
981 }
982
983 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_conflict_epi64()984 unsafe fn test_mm_mask_conflict_epi64() {
985 let a = _mm_set1_epi64x(1);
986 let r = _mm_mask_conflict_epi64(a, 0, a);
987 assert_eq_m128i(r, a);
988 let r = _mm_mask_conflict_epi64(a, 0b00000011, a);
989 let e = _mm_set_epi64x(1 << 0, 0);
990 assert_eq_m128i(r, e);
991 }
992
993 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_conflict_epi64()994 unsafe fn test_mm_maskz_conflict_epi64() {
995 let a = _mm_set1_epi64x(1);
996 let r = _mm_maskz_conflict_epi64(0, a);
997 assert_eq_m128i(r, _mm_setzero_si128());
998 let r = _mm_maskz_conflict_epi64(0b00000011, a);
999 let e = _mm_set_epi64x(1 << 0, 0);
1000 assert_eq_m128i(r, e);
1001 }
1002
1003 #[simd_test(enable = "avx512cd")]
test_mm512_lzcnt_epi32()1004 unsafe fn test_mm512_lzcnt_epi32() {
1005 let a = _mm512_set1_epi32(1);
1006 let r = _mm512_lzcnt_epi32(a);
1007 let e = _mm512_set1_epi32(31);
1008 assert_eq_m512i(r, e);
1009 }
1010
1011 #[simd_test(enable = "avx512cd")]
test_mm512_mask_lzcnt_epi32()1012 unsafe fn test_mm512_mask_lzcnt_epi32() {
1013 let a = _mm512_set1_epi32(1);
1014 let r = _mm512_mask_lzcnt_epi32(a, 0, a);
1015 assert_eq_m512i(r, a);
1016 let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a);
1017 let e = _mm512_set1_epi32(31);
1018 assert_eq_m512i(r, e);
1019 }
1020
1021 #[simd_test(enable = "avx512cd")]
test_mm512_maskz_lzcnt_epi32()1022 unsafe fn test_mm512_maskz_lzcnt_epi32() {
1023 let a = _mm512_set1_epi32(2);
1024 let r = _mm512_maskz_lzcnt_epi32(0, a);
1025 assert_eq_m512i(r, _mm512_setzero_si512());
1026 let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a);
1027 let e = _mm512_set1_epi32(30);
1028 assert_eq_m512i(r, e);
1029 }
1030
1031 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_lzcnt_epi32()1032 unsafe fn test_mm256_lzcnt_epi32() {
1033 let a = _mm256_set1_epi32(1);
1034 let r = _mm256_lzcnt_epi32(a);
1035 let e = _mm256_set1_epi32(31);
1036 assert_eq_m256i(r, e);
1037 }
1038
1039 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_lzcnt_epi32()1040 unsafe fn test_mm256_mask_lzcnt_epi32() {
1041 let a = _mm256_set1_epi32(1);
1042 let r = _mm256_mask_lzcnt_epi32(a, 0, a);
1043 assert_eq_m256i(r, a);
1044 let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a);
1045 let e = _mm256_set1_epi32(31);
1046 assert_eq_m256i(r, e);
1047 }
1048
1049 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_lzcnt_epi32()1050 unsafe fn test_mm256_maskz_lzcnt_epi32() {
1051 let a = _mm256_set1_epi32(1);
1052 let r = _mm256_maskz_lzcnt_epi32(0, a);
1053 assert_eq_m256i(r, _mm256_setzero_si256());
1054 let r = _mm256_maskz_lzcnt_epi32(0b11111111, a);
1055 let e = _mm256_set1_epi32(31);
1056 assert_eq_m256i(r, e);
1057 }
1058
1059 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_lzcnt_epi32()1060 unsafe fn test_mm_lzcnt_epi32() {
1061 let a = _mm_set1_epi32(1);
1062 let r = _mm_lzcnt_epi32(a);
1063 let e = _mm_set1_epi32(31);
1064 assert_eq_m128i(r, e);
1065 }
1066
1067 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_lzcnt_epi32()1068 unsafe fn test_mm_mask_lzcnt_epi32() {
1069 let a = _mm_set1_epi32(1);
1070 let r = _mm_mask_lzcnt_epi32(a, 0, a);
1071 assert_eq_m128i(r, a);
1072 let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a);
1073 let e = _mm_set1_epi32(31);
1074 assert_eq_m128i(r, e);
1075 }
1076
1077 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_lzcnt_epi32()1078 unsafe fn test_mm_maskz_lzcnt_epi32() {
1079 let a = _mm_set1_epi32(1);
1080 let r = _mm_maskz_lzcnt_epi32(0, a);
1081 assert_eq_m128i(r, _mm_setzero_si128());
1082 let r = _mm_maskz_lzcnt_epi32(0b00001111, a);
1083 let e = _mm_set1_epi32(31);
1084 assert_eq_m128i(r, e);
1085 }
1086
1087 #[simd_test(enable = "avx512cd")]
test_mm512_lzcnt_epi64()1088 unsafe fn test_mm512_lzcnt_epi64() {
1089 let a = _mm512_set1_epi64(1);
1090 let r = _mm512_lzcnt_epi64(a);
1091 let e = _mm512_set1_epi64(63);
1092 assert_eq_m512i(r, e);
1093 }
1094
1095 #[simd_test(enable = "avx512cd")]
test_mm512_mask_lzcnt_epi64()1096 unsafe fn test_mm512_mask_lzcnt_epi64() {
1097 let a = _mm512_set1_epi64(1);
1098 let r = _mm512_mask_lzcnt_epi64(a, 0, a);
1099 assert_eq_m512i(r, a);
1100 let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a);
1101 let e = _mm512_set1_epi64(63);
1102 assert_eq_m512i(r, e);
1103 }
1104
1105 #[simd_test(enable = "avx512cd")]
test_mm512_maskz_lzcnt_epi64()1106 unsafe fn test_mm512_maskz_lzcnt_epi64() {
1107 let a = _mm512_set1_epi64(2);
1108 let r = _mm512_maskz_lzcnt_epi64(0, a);
1109 assert_eq_m512i(r, _mm512_setzero_si512());
1110 let r = _mm512_maskz_lzcnt_epi64(0b11111111, a);
1111 let e = _mm512_set1_epi64(62);
1112 assert_eq_m512i(r, e);
1113 }
1114
1115 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_lzcnt_epi64()1116 unsafe fn test_mm256_lzcnt_epi64() {
1117 let a = _mm256_set1_epi64x(1);
1118 let r = _mm256_lzcnt_epi64(a);
1119 let e = _mm256_set1_epi64x(63);
1120 assert_eq_m256i(r, e);
1121 }
1122
1123 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_mask_lzcnt_epi64()1124 unsafe fn test_mm256_mask_lzcnt_epi64() {
1125 let a = _mm256_set1_epi64x(1);
1126 let r = _mm256_mask_lzcnt_epi64(a, 0, a);
1127 assert_eq_m256i(r, a);
1128 let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a);
1129 let e = _mm256_set1_epi64x(63);
1130 assert_eq_m256i(r, e);
1131 }
1132
1133 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm256_maskz_lzcnt_epi64()1134 unsafe fn test_mm256_maskz_lzcnt_epi64() {
1135 let a = _mm256_set1_epi64x(1);
1136 let r = _mm256_maskz_lzcnt_epi64(0, a);
1137 assert_eq_m256i(r, _mm256_setzero_si256());
1138 let r = _mm256_maskz_lzcnt_epi64(0b00001111, a);
1139 let e = _mm256_set1_epi64x(63);
1140 assert_eq_m256i(r, e);
1141 }
1142
1143 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_lzcnt_epi64()1144 unsafe fn test_mm_lzcnt_epi64() {
1145 let a = _mm_set1_epi64x(1);
1146 let r = _mm_lzcnt_epi64(a);
1147 let e = _mm_set1_epi64x(63);
1148 assert_eq_m128i(r, e);
1149 }
1150
1151 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_mask_lzcnt_epi64()1152 unsafe fn test_mm_mask_lzcnt_epi64() {
1153 let a = _mm_set1_epi64x(1);
1154 let r = _mm_mask_lzcnt_epi64(a, 0, a);
1155 assert_eq_m128i(r, a);
1156 let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a);
1157 let e = _mm_set1_epi64x(63);
1158 assert_eq_m128i(r, e);
1159 }
1160
1161 #[simd_test(enable = "avx512cd,avx512vl")]
test_mm_maskz_lzcnt_epi64()1162 unsafe fn test_mm_maskz_lzcnt_epi64() {
1163 let a = _mm_set1_epi64x(1);
1164 let r = _mm_maskz_lzcnt_epi64(0, a);
1165 assert_eq_m128i(r, _mm_setzero_si128());
1166 let r = _mm_maskz_lzcnt_epi64(0b00001111, a);
1167 let e = _mm_set1_epi64x(63);
1168 assert_eq_m128i(r, e);
1169 }
1170 }
1171