1 //! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ)
2 //!
3 //! The intrinsics here correspond to those in the `immintrin.h` C header.
4 //!
5 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7 //!
8 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9 
10 use crate::core_arch::simd::i32x16;
11 use crate::core_arch::simd::i32x4;
12 use crate::core_arch::simd::i32x8;
13 use crate::core_arch::simd::i64x2;
14 use crate::core_arch::simd::i64x4;
15 use crate::core_arch::simd::i64x8;
16 use crate::core_arch::simd_llvm::simd_select_bitmask;
17 use crate::core_arch::x86::__m128i;
18 use crate::core_arch::x86::__m256i;
19 use crate::core_arch::x86::__m512i;
20 use crate::core_arch::x86::__mmask16;
21 use crate::core_arch::x86::__mmask8;
22 use crate::core_arch::x86::_mm256_setzero_si256;
23 use crate::core_arch::x86::_mm512_setzero_si512;
24 use crate::core_arch::x86::_mm_setzero_si128;
25 use crate::core_arch::x86::m128iExt;
26 use crate::core_arch::x86::m256iExt;
27 use crate::core_arch::x86::m512iExt;
28 use crate::mem::transmute;
29 
30 #[cfg(test)]
31 use stdarch_test::assert_instr;
32 
33 #[allow(improper_ctypes)]
34 extern "C" {
35     #[link_name = "llvm.ctpop.v16i32"]
popcnt_v16i32(x: i32x16) -> i32x1636     fn popcnt_v16i32(x: i32x16) -> i32x16;
37     #[link_name = "llvm.ctpop.v8i32"]
popcnt_v8i32(x: i32x8) -> i32x838     fn popcnt_v8i32(x: i32x8) -> i32x8;
39     #[link_name = "llvm.ctpop.v4i32"]
popcnt_v4i32(x: i32x4) -> i32x440     fn popcnt_v4i32(x: i32x4) -> i32x4;
41 
42     #[link_name = "llvm.ctpop.v8i64"]
popcnt_v8i64(x: i64x8) -> i64x843     fn popcnt_v8i64(x: i64x8) -> i64x8;
44     #[link_name = "llvm.ctpop.v4i64"]
popcnt_v4i64(x: i64x4) -> i64x445     fn popcnt_v4i64(x: i64x4) -> i64x4;
46     #[link_name = "llvm.ctpop.v2i64"]
popcnt_v2i64(x: i64x2) -> i64x247     fn popcnt_v2i64(x: i64x2) -> i64x2;
48 }
49 
50 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
51 ///
52 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi32)
53 #[inline]
54 #[target_feature(enable = "avx512vpopcntdq")]
55 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm512_popcnt_epi32(a: __m512i) -> __m512i56 pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
57     transmute(popcnt_v16i32(a.as_i32x16()))
58 }
59 
60 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
61 ///
62 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
63 /// Otherwise the computation result is written into the result.
64 ///
65 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi32)
66 #[inline]
67 #[target_feature(enable = "avx512vpopcntdq")]
68 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i69 pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
70     let zero = _mm512_setzero_si512().as_i32x16();
71     transmute(simd_select_bitmask(k, popcnt_v16i32(a.as_i32x16()), zero))
72 }
73 
74 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
75 ///
76 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
77 /// Otherwise the computation result is written into the result.
78 ///
79 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi32)
80 #[inline]
81 #[target_feature(enable = "avx512vpopcntdq")]
82 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i83 pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
84     transmute(simd_select_bitmask(
85         k,
86         popcnt_v16i32(a.as_i32x16()),
87         src.as_i32x16(),
88     ))
89 }
90 
91 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
92 ///
93 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi32)
94 #[inline]
95 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
96 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm256_popcnt_epi32(a: __m256i) -> __m256i97 pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
98     transmute(popcnt_v8i32(a.as_i32x8()))
99 }
100 
101 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
102 ///
103 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
104 /// Otherwise the computation result is written into the result.
105 ///
106 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi32)
107 #[inline]
108 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
109 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i110 pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
111     let zero = _mm256_setzero_si256().as_i32x8();
112     transmute(simd_select_bitmask(k, popcnt_v8i32(a.as_i32x8()), zero))
113 }
114 
115 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
116 ///
117 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
118 /// Otherwise the computation result is written into the result.
119 ///
120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi32)
121 #[inline]
122 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
123 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i124 pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
125     transmute(simd_select_bitmask(
126         k,
127         popcnt_v8i32(a.as_i32x8()),
128         src.as_i32x8(),
129     ))
130 }
131 
132 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
133 ///
134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi32)
135 #[inline]
136 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
137 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm_popcnt_epi32(a: __m128i) -> __m128i138 pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
139     transmute(popcnt_v4i32(a.as_i32x4()))
140 }
141 
142 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
143 ///
144 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
145 /// Otherwise the computation result is written into the result.
146 ///
147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi32)
148 #[inline]
149 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
150 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i151 pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
152     let zero = _mm_setzero_si128().as_i32x4();
153     transmute(simd_select_bitmask(k, popcnt_v4i32(a.as_i32x4()), zero))
154 }
155 
156 /// For each packed 32-bit integer maps the value to the number of logical 1 bits.
157 ///
158 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
159 /// Otherwise the computation result is written into the result.
160 ///
161 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi32)
162 #[inline]
163 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
164 #[cfg_attr(test, assert_instr(vpopcntd))]
_mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i165 pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
166     transmute(simd_select_bitmask(
167         k,
168         popcnt_v4i32(a.as_i32x4()),
169         src.as_i32x4(),
170     ))
171 }
172 
173 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
174 ///
175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi64)
176 #[inline]
177 #[target_feature(enable = "avx512vpopcntdq")]
178 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm512_popcnt_epi64(a: __m512i) -> __m512i179 pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
180     transmute(popcnt_v8i64(a.as_i64x8()))
181 }
182 
183 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
184 ///
185 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
186 /// Otherwise the computation result is written into the result.
187 ///
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi64)
189 #[inline]
190 #[target_feature(enable = "avx512vpopcntdq")]
191 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i192 pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
193     let zero = _mm512_setzero_si512().as_i64x8();
194     transmute(simd_select_bitmask(k, popcnt_v8i64(a.as_i64x8()), zero))
195 }
196 
197 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
198 ///
199 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
200 /// Otherwise the computation result is written into the result.
201 ///
202 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi64)
203 #[inline]
204 #[target_feature(enable = "avx512vpopcntdq")]
205 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i206 pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
207     transmute(simd_select_bitmask(
208         k,
209         popcnt_v8i64(a.as_i64x8()),
210         src.as_i64x8(),
211     ))
212 }
213 
214 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
215 ///
216 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi64)
217 #[inline]
218 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
219 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm256_popcnt_epi64(a: __m256i) -> __m256i220 pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
221     transmute(popcnt_v4i64(a.as_i64x4()))
222 }
223 
224 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
225 ///
226 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
227 /// Otherwise the computation result is written into the result.
228 ///
229 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi64)
230 #[inline]
231 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
232 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i233 pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
234     let zero = _mm256_setzero_si256().as_i64x4();
235     transmute(simd_select_bitmask(k, popcnt_v4i64(a.as_i64x4()), zero))
236 }
237 
238 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
239 ///
240 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
241 /// Otherwise the computation result is written into the result.
242 ///
243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi64)
244 #[inline]
245 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
246 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i247 pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
248     transmute(simd_select_bitmask(
249         k,
250         popcnt_v4i64(a.as_i64x4()),
251         src.as_i64x4(),
252     ))
253 }
254 
255 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
256 ///
257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi64)
258 #[inline]
259 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
260 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm_popcnt_epi64(a: __m128i) -> __m128i261 pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
262     transmute(popcnt_v2i64(a.as_i64x2()))
263 }
264 
265 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
266 ///
267 /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
268 /// Otherwise the computation result is written into the result.
269 ///
270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi64)
271 #[inline]
272 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
273 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i274 pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
275     let zero = _mm_setzero_si128().as_i64x2();
276     transmute(simd_select_bitmask(k, popcnt_v2i64(a.as_i64x2()), zero))
277 }
278 
279 /// For each packed 64-bit integer maps the value to the number of logical 1 bits.
280 ///
281 /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
282 /// Otherwise the computation result is written into the result.
283 ///
284 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi64)
285 #[inline]
286 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
287 #[cfg_attr(test, assert_instr(vpopcntq))]
_mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i288 pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
289     transmute(simd_select_bitmask(
290         k,
291         popcnt_v2i64(a.as_i64x2()),
292         src.as_i64x2(),
293     ))
294 }
295 
296 #[cfg(test)]
297 mod tests {
298     use stdarch_test::simd_test;
299 
300     use crate::core_arch::x86::*;
301 
302     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_popcnt_epi32()303     unsafe fn test_mm512_popcnt_epi32() {
304         let test_data = _mm512_set_epi32(
305             0,
306             1,
307             -1,
308             2,
309             7,
310             0xFF_FE,
311             0x7F_FF_FF_FF,
312             -100,
313             0x40_00_00_00,
314             103,
315             371,
316             552,
317             432_948,
318             818_826_998,
319             255,
320             256,
321         );
322         let actual_result = _mm512_popcnt_epi32(test_data);
323         let reference_result =
324             _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1);
325         assert_eq_m512i(actual_result, reference_result);
326     }
327 
328     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_mask_popcnt_epi32()329     unsafe fn test_mm512_mask_popcnt_epi32() {
330         let test_data = _mm512_set_epi32(
331             0,
332             1,
333             -1,
334             2,
335             7,
336             0xFF_FE,
337             0x7F_FF_FF_FF,
338             -100,
339             0x40_00_00_00,
340             103,
341             371,
342             552,
343             432_948,
344             818_826_998,
345             255,
346             256,
347         );
348         let mask = 0xFF_00;
349         let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data);
350         let reference_result = _mm512_set_epi32(
351             0,
352             1,
353             32,
354             1,
355             3,
356             15,
357             31,
358             28,
359             0x40_00_00_00,
360             103,
361             371,
362             552,
363             432_948,
364             818_826_998,
365             255,
366             256,
367         );
368         assert_eq_m512i(actual_result, reference_result);
369     }
370 
371     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_maskz_popcnt_epi32()372     unsafe fn test_mm512_maskz_popcnt_epi32() {
373         let test_data = _mm512_set_epi32(
374             0,
375             1,
376             -1,
377             2,
378             7,
379             0xFF_FE,
380             0x7F_FF_FF_FF,
381             -100,
382             0x40_00_00_00,
383             103,
384             371,
385             552,
386             432_948,
387             818_826_998,
388             255,
389             256,
390         );
391         let mask = 0xFF_00;
392         let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data);
393         let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0);
394         assert_eq_m512i(actual_result, reference_result);
395     }
396 
397     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm256_popcnt_epi32()398     unsafe fn test_mm256_popcnt_epi32() {
399         let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
400         let actual_result = _mm256_popcnt_epi32(test_data);
401         let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28);
402         assert_eq_m256i(actual_result, reference_result);
403     }
404 
405     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm256_mask_popcnt_epi32()406     unsafe fn test_mm256_mask_popcnt_epi32() {
407         let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
408         let mask = 0xF0;
409         let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data);
410         let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
411         assert_eq_m256i(actual_result, reference_result);
412     }
413 
414     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm256_maskz_popcnt_epi32()415     unsafe fn test_mm256_maskz_popcnt_epi32() {
416         let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
417         let mask = 0xF0;
418         let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data);
419         let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0);
420         assert_eq_m256i(actual_result, reference_result);
421     }
422 
423     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm_popcnt_epi32()424     unsafe fn test_mm_popcnt_epi32() {
425         let test_data = _mm_set_epi32(0, 1, -1, -100);
426         let actual_result = _mm_popcnt_epi32(test_data);
427         let reference_result = _mm_set_epi32(0, 1, 32, 28);
428         assert_eq_m128i(actual_result, reference_result);
429     }
430 
431     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm_mask_popcnt_epi32()432     unsafe fn test_mm_mask_popcnt_epi32() {
433         let test_data = _mm_set_epi32(0, 1, -1, -100);
434         let mask = 0xE;
435         let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data);
436         let reference_result = _mm_set_epi32(0, 1, 32, -100);
437         assert_eq_m128i(actual_result, reference_result);
438     }
439 
440     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
test_mm_maskz_popcnt_epi32()441     unsafe fn test_mm_maskz_popcnt_epi32() {
442         let test_data = _mm_set_epi32(0, 1, -1, -100);
443         let mask = 0xE;
444         let actual_result = _mm_maskz_popcnt_epi32(mask, test_data);
445         let reference_result = _mm_set_epi32(0, 1, 32, 0);
446         assert_eq_m128i(actual_result, reference_result);
447     }
448 
449     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_popcnt_epi64()450     unsafe fn test_mm512_popcnt_epi64() {
451         let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
452         let actual_result = _mm512_popcnt_epi64(test_data);
453         let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60);
454         assert_eq_m512i(actual_result, reference_result);
455     }
456 
457     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_mask_popcnt_epi64()458     unsafe fn test_mm512_mask_popcnt_epi64() {
459         let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
460         let mask = 0xF0;
461         let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data);
462         let reference_result =
463             _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
464         assert_eq_m512i(actual_result, reference_result);
465     }
466 
467     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
test_mm512_maskz_popcnt_epi64()468     unsafe fn test_mm512_maskz_popcnt_epi64() {
469         let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
470         let mask = 0xF0;
471         let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data);
472         let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0);
473         assert_eq_m512i(actual_result, reference_result);
474     }
475 
476     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm256_popcnt_epi64()477     unsafe fn test_mm256_popcnt_epi64() {
478         let test_data = _mm256_set_epi64x(0, 1, -1, -100);
479         let actual_result = _mm256_popcnt_epi64(test_data);
480         let reference_result = _mm256_set_epi64x(0, 1, 64, 60);
481         assert_eq_m256i(actual_result, reference_result);
482     }
483 
484     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm256_mask_popcnt_epi64()485     unsafe fn test_mm256_mask_popcnt_epi64() {
486         let test_data = _mm256_set_epi64x(0, 1, -1, -100);
487         let mask = 0xE;
488         let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data);
489         let reference_result = _mm256_set_epi64x(0, 1, 64, -100);
490         assert_eq_m256i(actual_result, reference_result);
491     }
492 
493     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm256_maskz_popcnt_epi64()494     unsafe fn test_mm256_maskz_popcnt_epi64() {
495         let test_data = _mm256_set_epi64x(0, 1, -1, -100);
496         let mask = 0xE;
497         let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data);
498         let reference_result = _mm256_set_epi64x(0, 1, 64, 0);
499         assert_eq_m256i(actual_result, reference_result);
500     }
501 
502     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm_popcnt_epi64()503     unsafe fn test_mm_popcnt_epi64() {
504         let test_data = _mm_set_epi64x(0, 1);
505         let actual_result = _mm_popcnt_epi64(test_data);
506         let reference_result = _mm_set_epi64x(0, 1);
507         assert_eq_m128i(actual_result, reference_result);
508         let test_data = _mm_set_epi64x(-1, -100);
509         let actual_result = _mm_popcnt_epi64(test_data);
510         let reference_result = _mm_set_epi64x(64, 60);
511         assert_eq_m128i(actual_result, reference_result);
512     }
513 
514     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm_mask_popcnt_epi64()515     unsafe fn test_mm_mask_popcnt_epi64() {
516         let test_data = _mm_set_epi64x(0, -100);
517         let mask = 0x2;
518         let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
519         let reference_result = _mm_set_epi64x(0, -100);
520         assert_eq_m128i(actual_result, reference_result);
521         let test_data = _mm_set_epi64x(-1, 1);
522         let mask = 0x2;
523         let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
524         let reference_result = _mm_set_epi64x(64, 1);
525         assert_eq_m128i(actual_result, reference_result);
526     }
527 
528     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
test_mm_maskz_popcnt_epi64()529     unsafe fn test_mm_maskz_popcnt_epi64() {
530         let test_data = _mm_set_epi64x(0, 1);
531         let mask = 0x2;
532         let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
533         let reference_result = _mm_set_epi64x(0, 0);
534         assert_eq_m128i(actual_result, reference_result);
535         let test_data = _mm_set_epi64x(-1, -100);
536         let mask = 0x2;
537         let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
538         let reference_result = _mm_set_epi64x(64, 0);
539         assert_eq_m128i(actual_result, reference_result);
540     }
541 }
542