1 // This file is part of the FXT library.
2 // Copyright (C) 2010, 2011, 2012 Joerg Arndt
3 // License: GNU General Public License version 3 or later,
4 // see the file COPYING.txt in the main directory.
5
6 #include "bits/bitcount.h"
7 #include "fxttypes.h"
8
9 //#include "bits/print-bin.h"
10 //#include "fxtio.h"
11
bit_count_v15(const ulong * x)12 static inline ulong bit_count_v15(const ulong *x)
13 // Return sum(j=0, 14, bit_count(x[j]) )
14 // Technique is "vertical" addition.
15 {
16 //#define PP(k) { cout << "x[" << setw(2) << k << "]="; print_bin("", x[k], 8); print_bin(" a0=", a0, 8); print_bin(" a1=", a1, 8); print_bin(" a2=", a2, 8); print_bin(" a3=", a3, 8); cout << endl; }
17 #define PP(k)
18
19 #define VV(A) { ulong t = A & cy; A ^= cy; cy = t; }
20 ulong a1, a2, a3;
21 // a1= a2 = a3 = 0; // initialization only needed with printing
22 ulong a0=x[0]; PP(0);
23 { ulong cy = x[ 1]; VV(a0); a1 = cy; } PP(1);
24 { ulong cy = x[ 2]; VV(a0); a1 ^= cy; } PP(2);
25 { ulong cy = x[ 3]; VV(a0); VV(a1); a2 = cy; } PP(3);
26 { ulong cy = x[ 4]; VV(a0); VV(a1); a2 ^= cy; } PP(4);
27 { ulong cy = x[ 5]; VV(a0); VV(a1); a2 ^= cy; } PP(5);
28 { ulong cy = x[ 6]; VV(a0); VV(a1); a2 ^= cy; } PP(6);
29 { ulong cy = x[ 7]; VV(a0); VV(a1); VV(a2); a3 = cy; } PP(7);
30 { ulong cy = x[ 8]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(8);
31 { ulong cy = x[ 9]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(9);
32 { ulong cy = x[10]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(10);
33 { ulong cy = x[11]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(11);
34 { ulong cy = x[12]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(12);
35 { ulong cy = x[13]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(13);
36 { ulong cy = x[14]; VV(a0); VV(a1); VV(a2); a3 ^= cy; } PP(14);
37 #undef VV
38 #undef PP
39
40 ulong b = bit_count(a0);
41 b += (bit_count(a1)<<1);
42 b += (bit_count(a2)<<2);
43 b += (bit_count(a3)<<3);
44 return b;
45 }
46 // -------------------------
47
48 ulong
bit_count_v(const ulong * x,ulong n)49 bit_count_v(const ulong *x, ulong n)
50 // Return sum(j=0, n-1, bit_count(x[j]) )
51 {
52 ulong b = 0;
53 const ulong *xe = x + n + 1;
54 while ( x+15 < xe ) // process blocks of 15 elements
55 {
56 b += bit_count_v15(x);
57 x += 15;
58 }
59
60 // process remaining elements:
61 const ulong r = (ulong)(xe-x-1);
62 for (ulong k=0; k<r; ++k) b+=bit_count(x[k]);
63
64 return b;
65 }
66 // -------------------------
67