1 //! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ)
2 //!
3 //! The intrinsics here correspond to those in the `immintrin.h` C header.
4 //!
5 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7 //!
8 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10 use crate::core_arch::simd::i32x16;
11 use crate::core_arch::simd::i32x4;
12 use crate::core_arch::simd::i32x8;
13 use crate::core_arch::simd::i64x2;
14 use crate::core_arch::simd::i64x4;
15 use crate::core_arch::simd::i64x8;
16 use crate::core_arch::simd_llvm::simd_select_bitmask;
17 use crate::core_arch::x86::__m128i;
18 use crate::core_arch::x86::__m256i;
19 use crate::core_arch::x86::__m512i;
20 use crate::core_arch::x86::__mmask16;
21 use crate::core_arch::x86::__mmask8;
22 use crate::core_arch::x86::_mm256_setzero_si256;
23 use crate::core_arch::x86::_mm512_setzero_si512;
24 use crate::core_arch::x86::_mm_setzero_si128;
25 use crate::core_arch::x86::m128iExt;
26 use crate::core_arch::x86::m256iExt;
27 use crate::core_arch::x86::m512iExt;
28 use crate::mem::transmute;
29
30 #[cfg(test)]
31 use stdarch_test::assert_instr;
32
33 #[allow(improper_ctypes)]
34 extern "C" {
35 #[link_name = "llvm.ctpop.v16i32"]
popcnt_v16i32(x: i32x16) -> i32x1636 fn popcnt_v16i32(x: i32x16) -> i32x16;
37 #[link_name = "llvm.ctpop.v8i32"]
popcnt_v8i32(x: i32x8) -> i32x838 fn popcnt_v8i32(x: i32x8) -> i32x8;
39 #[link_name = "llvm.ctpop.v4i32"]
popcnt_v4i32(x: i32x4) -> i32x440 fn popcnt_v4i32(x: i32x4) -> i32x4;
41
42 #[link_name = "llvm.ctpop.v8i64"]
popcnt_v8i64(x: i64x8) -> i64x843 fn popcnt_v8i64(x: i64x8) -> i64x8;
44 #[link_name = "llvm.ctpop.v4i64"]
popcnt_v4i64(x: i64x4) -> i64x445 fn popcnt_v4i64(x: i64x4) -> i64x4;
46 #[link_name = "llvm.ctpop.v2i64"]
popcnt_v2i64(x: i64x2) -> i64x247 fn popcnt_v2i64(x: i64x2) -> i64x2;
48 }
49
50 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
51 ///
52 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi32)
53 #[inline]
54 #[target_feature(enable = "avx512vpopcntdq")]
55 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm512_popcnt_epi32(a: __m512i) -> __m512i56 pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
57 transmute(popcnt_v16i32(a.as_i32x16()))
58 }
59
60 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
61 ///
62 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
63 /// Otherwise the computation result is written into the result.
64 ///
65 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi32)
66 #[inline]
67 #[target_feature(enable = "avx512vpopcntdq")]
68 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i69 pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
70 let zero = _mm512_setzero_si512().as_i32x16();
71 transmute(simd_select_bitmask(k, popcnt_v16i32(a.as_i32x16()), zero))
72 }
73
74 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
75 ///
76 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
77 /// Otherwise the computation result is written into the result.
78 ///
79 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi32)
80 #[inline]
81 #[target_feature(enable = "avx512vpopcntdq")]
82 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i83 pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
84 transmute(simd_select_bitmask(
85 k,
86 popcnt_v16i32(a.as_i32x16()),
87 src.as_i32x16(),
88 ))
89 }
90
91 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
92 ///
93 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi32)
94 #[inline]
95 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
96 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm256_popcnt_epi32(a: __m256i) -> __m256i97 pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
98 transmute(popcnt_v8i32(a.as_i32x8()))
99 }
100
101 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
102 ///
103 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
104 /// Otherwise the computation result is written into the result.
105 ///
106 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi32)
107 #[inline]
108 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
109 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i110 pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
111 let zero = _mm256_setzero_si256().as_i32x8();
112 transmute(simd_select_bitmask(k, popcnt_v8i32(a.as_i32x8()), zero))
113 }
114
115 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
116 ///
117 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
118 /// Otherwise the computation result is written into the result.
119 ///
120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi32)
121 #[inline]
122 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
123 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i124 pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
125 transmute(simd_select_bitmask(
126 k,
127 popcnt_v8i32(a.as_i32x8()),
128 src.as_i32x8(),
129 ))
130 }
131
132 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
133 ///
134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi32)
135 #[inline]
136 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
137 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm_popcnt_epi32(a: __m128i) -> __m128i138 pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
139 transmute(popcnt_v4i32(a.as_i32x4()))
140 }
141
142 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
143 ///
144 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
145 /// Otherwise the computation result is written into the result.
146 ///
147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi32)
148 #[inline]
149 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
150 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i151 pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
152 let zero = _mm_setzero_si128().as_i32x4();
153 transmute(simd_select_bitmask(k, popcnt_v4i32(a.as_i32x4()), zero))
154 }
155
156 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
157 ///
158 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
159 /// Otherwise the computation result is written into the result.
160 ///
161 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi32)
162 #[inline]
163 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
164 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i165 pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
166 transmute(simd_select_bitmask(
167 k,
168 popcnt_v4i32(a.as_i32x4()),
169 src.as_i32x4(),
170 ))
171 }
172
173 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
174 ///
175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi64)
176 #[inline]
177 #[target_feature(enable = "avx512vpopcntdq")]
178 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm512_popcnt_epi64(a: __m512i) -> __m512i179 pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
180 transmute(popcnt_v8i64(a.as_i64x8()))
181 }
182
183 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
184 ///
185 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
186 /// Otherwise the computation result is written into the result.
187 ///
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi64)
189 #[inline]
190 #[target_feature(enable = "avx512vpopcntdq")]
191 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i192 pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
193 let zero = _mm512_setzero_si512().as_i64x8();
194 transmute(simd_select_bitmask(k, popcnt_v8i64(a.as_i64x8()), zero))
195 }
196
197 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
198 ///
199 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
200 /// Otherwise the computation result is written into the result.
201 ///
202 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi64)
203 #[inline]
204 #[target_feature(enable = "avx512vpopcntdq")]
205 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i206 pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
207 transmute(simd_select_bitmask(
208 k,
209 popcnt_v8i64(a.as_i64x8()),
210 src.as_i64x8(),
211 ))
212 }
213
214 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
215 ///
216 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi64)
217 #[inline]
218 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
219 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm256_popcnt_epi64(a: __m256i) -> __m256i220 pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
221 transmute(popcnt_v4i64(a.as_i64x4()))
222 }
223
224 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
225 ///
226 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
227 /// Otherwise the computation result is written into the result.
228 ///
229 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi64)
230 #[inline]
231 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
232 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i233 pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
234 let zero = _mm256_setzero_si256().as_i64x4();
235 transmute(simd_select_bitmask(k, popcnt_v4i64(a.as_i64x4()), zero))
236 }
237
238 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
239 ///
240 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
241 /// Otherwise the computation result is written into the result.
242 ///
243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi64)
244 #[inline]
245 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
246 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i247 pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
248 transmute(simd_select_bitmask(
249 k,
250 popcnt_v4i64(a.as_i64x4()),
251 src.as_i64x4(),
252 ))
253 }
254
255 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
256 ///
257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi64)
258 #[inline]
259 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
260 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm_popcnt_epi64(a: __m128i) -> __m128i261 pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
262 transmute(popcnt_v2i64(a.as_i64x2()))
263 }
264
265 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
266 ///
267 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
268 /// Otherwise the computation result is written into the result.
269 ///
270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi64)
271 #[inline]
272 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
273 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i274 pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
275 let zero = _mm_setzero_si128().as_i64x2();
276 transmute(simd_select_bitmask(k, popcnt_v2i64(a.as_i64x2()), zero))
277 }
278
279 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
280 ///
281 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
282 /// Otherwise the computation result is written into the result.
283 ///
284 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi64)
285 #[inline]
286 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
287 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i288 pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
289 transmute(simd_select_bitmask(
290 k,
291 popcnt_v2i64(a.as_i64x2()),
292 src.as_i64x2(),
293 ))
294 }
295
296 #[cfg(test)]
297 mod tests {
298 use stdarch_test::simd_test;
299
300 use crate::core_arch::x86::*;
301
302 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_popcnt_epi32()303 unsafe fn test_mm512_popcnt_epi32() {
304 let test_data = _mm512_set_epi32(
305 0,
306 1,
307 -1,
308 2,
309 7,
310 0xFF_FE,
311 0x7F_FF_FF_FF,
312 -100,
313 0x40_00_00_00,
314 103,
315 371,
316 552,
317 432_948,
318 818_826_998,
319 255,
320 256,
321 );
322 let actual_result = _mm512_popcnt_epi32(test_data);
323 let reference_result =
324 _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1);
325 assert_eq_m512i(actual_result, reference_result);
326 }
327
328 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_mask_popcnt_epi32()329 unsafe fn test_mm512_mask_popcnt_epi32() {
330 let test_data = _mm512_set_epi32(
331 0,
332 1,
333 -1,
334 2,
335 7,
336 0xFF_FE,
337 0x7F_FF_FF_FF,
338 -100,
339 0x40_00_00_00,
340 103,
341 371,
342 552,
343 432_948,
344 818_826_998,
345 255,
346 256,
347 );
348 let mask = 0xFF_00;
349 let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data);
350 let reference_result = _mm512_set_epi32(
351 0,
352 1,
353 32,
354 1,
355 3,
356 15,
357 31,
358 28,
359 0x40_00_00_00,
360 103,
361 371,
362 552,
363 432_948,
364 818_826_998,
365 255,
366 256,
367 );
368 assert_eq_m512i(actual_result, reference_result);
369 }
370
371 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_maskz_popcnt_epi32()372 unsafe fn test_mm512_maskz_popcnt_epi32() {
373 let test_data = _mm512_set_epi32(
374 0,
375 1,
376 -1,
377 2,
378 7,
379 0xFF_FE,
380 0x7F_FF_FF_FF,
381 -100,
382 0x40_00_00_00,
383 103,
384 371,
385 552,
386 432_948,
387 818_826_998,
388 255,
389 256,
390 );
391 let mask = 0xFF_00;
392 let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data);
393 let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0);
394 assert_eq_m512i(actual_result, reference_result);
395 }
396
397 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm256_popcnt_epi32()398 unsafe fn test_mm256_popcnt_epi32() {
399 let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
400 let actual_result = _mm256_popcnt_epi32(test_data);
401 let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28);
402 assert_eq_m256i(actual_result, reference_result);
403 }
404
405 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm256_mask_popcnt_epi32()406 unsafe fn test_mm256_mask_popcnt_epi32() {
407 let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
408 let mask = 0xF0;
409 let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data);
410 let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
411 assert_eq_m256i(actual_result, reference_result);
412 }
413
414 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm256_maskz_popcnt_epi32()415 unsafe fn test_mm256_maskz_popcnt_epi32() {
416 let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
417 let mask = 0xF0;
418 let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data);
419 let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0);
420 assert_eq_m256i(actual_result, reference_result);
421 }
422
423 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm_popcnt_epi32()424 unsafe fn test_mm_popcnt_epi32() {
425 let test_data = _mm_set_epi32(0, 1, -1, -100);
426 let actual_result = _mm_popcnt_epi32(test_data);
427 let reference_result = _mm_set_epi32(0, 1, 32, 28);
428 assert_eq_m128i(actual_result, reference_result);
429 }
430
431 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm_mask_popcnt_epi32()432 unsafe fn test_mm_mask_popcnt_epi32() {
433 let test_data = _mm_set_epi32(0, 1, -1, -100);
434 let mask = 0xE;
435 let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data);
436 let reference_result = _mm_set_epi32(0, 1, 32, -100);
437 assert_eq_m128i(actual_result, reference_result);
438 }
439
440 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm_maskz_popcnt_epi32()441 unsafe fn test_mm_maskz_popcnt_epi32() {
442 let test_data = _mm_set_epi32(0, 1, -1, -100);
443 let mask = 0xE;
444 let actual_result = _mm_maskz_popcnt_epi32(mask, test_data);
445 let reference_result = _mm_set_epi32(0, 1, 32, 0);
446 assert_eq_m128i(actual_result, reference_result);
447 }
448
449 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_popcnt_epi64()450 unsafe fn test_mm512_popcnt_epi64() {
451 let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
452 let actual_result = _mm512_popcnt_epi64(test_data);
453 let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60);
454 assert_eq_m512i(actual_result, reference_result);
455 }
456
457 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_mask_popcnt_epi64()458 unsafe fn test_mm512_mask_popcnt_epi64() {
459 let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
460 let mask = 0xF0;
461 let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data);
462 let reference_result =
463 _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
464 assert_eq_m512i(actual_result, reference_result);
465 }
466
467 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_maskz_popcnt_epi64()468 unsafe fn test_mm512_maskz_popcnt_epi64() {
469 let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
470 let mask = 0xF0;
471 let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data);
472 let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0);
473 assert_eq_m512i(actual_result, reference_result);
474 }
475
476 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm256_popcnt_epi64()477 unsafe fn test_mm256_popcnt_epi64() {
478 let test_data = _mm256_set_epi64x(0, 1, -1, -100);
479 let actual_result = _mm256_popcnt_epi64(test_data);
480 let reference_result = _mm256_set_epi64x(0, 1, 64, 60);
481 assert_eq_m256i(actual_result, reference_result);
482 }
483
484 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm256_mask_popcnt_epi64()485 unsafe fn test_mm256_mask_popcnt_epi64() {
486 let test_data = _mm256_set_epi64x(0, 1, -1, -100);
487 let mask = 0xE;
488 let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data);
489 let reference_result = _mm256_set_epi64x(0, 1, 64, -100);
490 assert_eq_m256i(actual_result, reference_result);
491 }
492
493 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm256_maskz_popcnt_epi64()494 unsafe fn test_mm256_maskz_popcnt_epi64() {
495 let test_data = _mm256_set_epi64x(0, 1, -1, -100);
496 let mask = 0xE;
497 let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data);
498 let reference_result = _mm256_set_epi64x(0, 1, 64, 0);
499 assert_eq_m256i(actual_result, reference_result);
500 }
501
502 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm_popcnt_epi64()503 unsafe fn test_mm_popcnt_epi64() {
504 let test_data = _mm_set_epi64x(0, 1);
505 let actual_result = _mm_popcnt_epi64(test_data);
506 let reference_result = _mm_set_epi64x(0, 1);
507 assert_eq_m128i(actual_result, reference_result);
508 let test_data = _mm_set_epi64x(-1, -100);
509 let actual_result = _mm_popcnt_epi64(test_data);
510 let reference_result = _mm_set_epi64x(64, 60);
511 assert_eq_m128i(actual_result, reference_result);
512 }
513
514 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm_mask_popcnt_epi64()515 unsafe fn test_mm_mask_popcnt_epi64() {
516 let test_data = _mm_set_epi64x(0, -100);
517 let mask = 0x2;
518 let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
519 let reference_result = _mm_set_epi64x(0, -100);
520 assert_eq_m128i(actual_result, reference_result);
521 let test_data = _mm_set_epi64x(-1, 1);
522 let mask = 0x2;
523 let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
524 let reference_result = _mm_set_epi64x(64, 1);
525 assert_eq_m128i(actual_result, reference_result);
526 }
527
528 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm_maskz_popcnt_epi64()529 unsafe fn test_mm_maskz_popcnt_epi64() {
530 let test_data = _mm_set_epi64x(0, 1);
531 let mask = 0x2;
532 let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
533 let reference_result = _mm_set_epi64x(0, 0);
534 assert_eq_m128i(actual_result, reference_result);
535 let test_data = _mm_set_epi64x(-1, -100);
536 let mask = 0x2;
537 let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
538 let reference_result = _mm_set_epi64x(64, 0);
539 assert_eq_m128i(actual_result, reference_result);
540 }
541 }
542