1 // This file is part of the FXT library.
2 // Copyright (C) 2010, 2011, 2012 Joerg Arndt
3 // License: GNU General Public License version 3 or later,
4 // see the file COPYING.txt in the main directory.
5 
6 #include "bits/bitcount.h"
7 #include "fxttypes.h"
8 
9 //#include "bits/print-bin.h"
10 //#include "fxtio.h"
11 
bit_count_v15(const ulong * x)12 static inline ulong bit_count_v15(const ulong *x)
13 // Return sum(j=0, 14, bit_count(x[j]) )
14 // Technique is "vertical" addition.
15 {
16 //#define PP(k) { cout << "x[" << setw(2) << k << "]="; print_bin("", x[k], 8); print_bin("  a0=", a0, 8); print_bin("  a1=", a1, 8); print_bin("  a2=", a2, 8); print_bin("  a3=", a3, 8); cout << endl; }
17 #define PP(k)
18 
19 #define VV(A) { ulong t = A & cy;  A ^= cy;  cy = t; }
20     ulong a1, a2, a3;
21 //    a1= a2 = a3 = 0;  // initialization only needed with printing
22     ulong a0=x[0]; PP(0);
23     { ulong cy = x[ 1];  VV(a0);  a1 = cy; }  PP(1);
24     { ulong cy = x[ 2];  VV(a0);  a1 ^= cy; }  PP(2);
25     { ulong cy = x[ 3];  VV(a0);  VV(a1);  a2 = cy; }  PP(3);
26     { ulong cy = x[ 4];  VV(a0);  VV(a1);  a2 ^= cy; }  PP(4);
27     { ulong cy = x[ 5];  VV(a0);  VV(a1);  a2 ^= cy; }  PP(5);
28     { ulong cy = x[ 6];  VV(a0);  VV(a1);  a2 ^= cy; }  PP(6);
29     { ulong cy = x[ 7];  VV(a0);  VV(a1);  VV(a2);  a3 = cy; }  PP(7);
30     { ulong cy = x[ 8];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(8);
31     { ulong cy = x[ 9];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(9);
32     { ulong cy = x[10];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(10);
33     { ulong cy = x[11];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(11);
34     { ulong cy = x[12];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(12);
35     { ulong cy = x[13];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(13);
36     { ulong cy = x[14];  VV(a0);  VV(a1);  VV(a2);  a3 ^= cy; }  PP(14);
37 #undef VV
38 #undef PP
39 
40     ulong b = bit_count(a0);
41     b += (bit_count(a1)<<1);
42     b += (bit_count(a2)<<2);
43     b += (bit_count(a3)<<3);
44     return  b;
45 }
46 // -------------------------
47 
48 ulong
bit_count_v(const ulong * x,ulong n)49 bit_count_v(const ulong *x, ulong n)
50 // Return sum(j=0, n-1, bit_count(x[j]) )
51 {
52     ulong b = 0;
53     const ulong *xe = x + n + 1;
54     while ( x+15 < xe )  // process blocks of 15 elements
55     {
56         b += bit_count_v15(x);
57         x += 15;
58     }
59 
60     // process remaining elements:
61     const ulong r = (ulong)(xe-x-1);
62     for (ulong k=0; k<r; ++k)  b+=bit_count(x[k]);
63 
64     return  b;
65 }
66 // -------------------------
67