1 #include <benchmark/benchmark.h>
2 #include <bitset>
3 #include <cstdint>
4 #include <stdio.h>
5 
6 #include "../common.h"
7 #include "03_popcnt_ispc.h"
8 
9 static Docs docs("Check popcnt implmentation of stdlib functions:\n"
10                  "[int32, int64] x [uniform, varying] x [all, even] versions.\n"
11                  "Observations:\n"
12                  " - popcnt is very lightweight, so 8 popcnt are chained.\n"
13                  " - chaining doesn't cause optimizing out popcnt, 8 seems to be good enough"
14                  " - varying versions have overhead on insert/extract\n"
15                  " - for even versions mask is statically known and compiler is able to optimize out inactive lanes\n"
16                  "Expectation:\n"
17                  " - No regressions\n");
18 
19 // Minimum size is maximum target width, i.e. 64.
20 // Larger buffer is better, but preferably to stay within L1.
21 #define ARGS Arg(8192)
22 //#define ARGS RangeMultiplier(2)->Range(64, 64<<15)->Complexity(benchmark::oN)
23 
init(T * src,T * dst,int count)24 template <typename T> static void init(T *src, T *dst, int count) {
25     for (int i = 0; i < count; i++) {
26         src[i] = static_cast<T>(i);
27         dst[i] = 0;
28     }
29 }
30 
check_all(T * src,T * dst,int count)31 template <typename T> static void check_all(T *src, T *dst, int count) {
32     for (int i = 0; i < count; i++) {
33         // for single popcnt() here's the formula, but we chain 8 popcnt()
34         // int count = std::bitset< std::numeric_limits<T>::digits >(src[i]).count();
35         int count = (i == 0) ? 0 : 1;
36         if (dst[i] != count) {
37             printf("Error i=%d\n", i);
38             return;
39         }
40     }
41 }
42 
check_even(T * src,T * dst,int count)43 template <typename T> static void check_even(T *src, T *dst, int count) {
44     for (int i = 0; i < count; i += 2) {
45         // for single popcnt() here's the formula, but we chain 8 popcnt()
46         // int count = std::bitset< std::numeric_limits<T>::digits >(src[i]).count();
47         int count = (i == 0) ? 0 : 1;
48         if (dst[i] != count) {
49             printf("Error i=%d\n", i);
50             return;
51         }
52     }
53 }
54 
55 #define POPCNT(T_C, T_ISPC, V, ALL)                                                                                    \
56     static void popcnt_stdlib_##V##_##T_ISPC##_##ALL(benchmark::State &state) {                                        \
57         int count = static_cast<int>(state.range(0));                                                                  \
58         T_C *src = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                      \
59         T_C *dst = static_cast<T_C *>(aligned_alloc_helper(sizeof(T_C) * count));                                      \
60         init(src, dst, count);                                                                                         \
61                                                                                                                        \
62         for (auto _ : state) {                                                                                         \
63             ispc::popcnt_##V##_##T_ISPC##_##ALL(src, dst, count);                                                      \
64         }                                                                                                              \
65                                                                                                                        \
66         check_##ALL(src, dst, count);                                                                                  \
67         aligned_free_helper(src);                                                                                      \
68         aligned_free_helper(dst);                                                                                      \
69         state.SetComplexityN(state.range(0));                                                                          \
70     }                                                                                                                  \
71     BENCHMARK(popcnt_stdlib_##V##_##T_ISPC##_##ALL)->ARGS;
72 
73 POPCNT(int, int32, uniform, all)
74 POPCNT(int, int32, varying, all)
75 POPCNT(int, int32, uniform, even)
76 POPCNT(int, int32, varying, even)
77 POPCNT(int64_t, int64, uniform, all)
78 POPCNT(int64_t, int64, varying, all)
79 POPCNT(int64_t, int64, uniform, even)
80 POPCNT(int64_t, int64, varying, even)
81 
82 BENCHMARK_MAIN();
83