1 /*  This file is part of Jellyfish.
2 
3     This work is dual-licensed under 3-Clause BSD License or GPL 3.0.
4     You can choose between one of them if you use this work.
5 
6 `SPDX-License-Identifier: BSD-3-Clause OR  GPL-3.0`
7 */
8 
9 #ifndef __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__
10 #define __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__
11 
12 #include <stdint.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <assert.h>
16 #include <time.h>
17 #include <jellyfish/misc.hpp>
18 #include <iostream>
19 #include <exception>
20 #include <stdexcept>
21 #include <vector>
22 #include <limits>
23 
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 
28 // Column major representation
29 //
30 // Rectangular matrices on Z/2Z of size _r x _c where 1<=_r<=64 (_c is
31 // not limited) and _r <= _c. I.e., the matrix can be stored as an
32 // array of 64 bit word, each representing a column (the highest 64-_r
33 // bits of each word are set to 0).
34 //
35 // Multiplication between a matrix and vector of size _c x 1 gives a
36 // vector of size _r x 1 stored as one 64 bit word. A matrix with a
37 // NULL _columns pointer behaves like the identity.
38 
39 namespace jellyfish {
40   class RectangularBinaryMatrix {
RectangularBinaryMatrix(unsigned int c)41     explicit RectangularBinaryMatrix(unsigned int c)
42       : _columns(NULL)
43       , _r(c)
44       , _c(c)
45     { }
46 
47   public:
RectangularBinaryMatrix(unsigned int r,unsigned c)48     RectangularBinaryMatrix(unsigned int r, unsigned c)
49       : _columns(alloc(r, c)), _r(r), _c(c) { }
RectangularBinaryMatrix(const RectangularBinaryMatrix & rhs)50     RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs)
51       : _columns(rhs._columns ? alloc(rhs._r, rhs._c) : NULL)
52       , _r(rhs._r)
53       , _c(rhs._c)
54     {
55       if(_columns)
56         memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
57     }
RectangularBinaryMatrix(RectangularBinaryMatrix && rhs)58     RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs)
59       : _columns(rhs._columns)
60       , _r(rhs._r)
61       , _c(rhs._c)
62     {
63       rhs._columns = 0;
64     }
65     // Initialize from raw data. raw must contain at least c words.
66     template<typename T>
RectangularBinaryMatrix(const T & raw,unsigned int r,unsigned c)67     RectangularBinaryMatrix(const T &raw, unsigned int r, unsigned c)
68       : _columns(alloc(r, c)), _r(r), _c(c) {
69       for(unsigned int i = 0; i < _c; ++i)
70         _columns[i] = raw[i] & cmask();
71     }
~RectangularBinaryMatrix()72     ~RectangularBinaryMatrix() {
73       free(_columns);
74     }
75 
identity(unsigned c)76     static RectangularBinaryMatrix identity(unsigned c) {
77       return RectangularBinaryMatrix(c);
78     }
79 
identity(unsigned r,unsigned c)80     static RectangularBinaryMatrix identity(unsigned r, unsigned c) {
81       RectangularBinaryMatrix res(r, c);
82       res.init_low_identity();
83       return res;
84     }
85 
operator =(const RectangularBinaryMatrix & rhs)86     RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) {
87       if(_r != rhs._r || _c != rhs._c)
88         throw std::invalid_argument("RHS matrix dimensions do not match");
89       memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
90       return *this;
91     }
operator =(RectangularBinaryMatrix && rhs)92     RectangularBinaryMatrix& operator=(RectangularBinaryMatrix&& rhs) {
93       if(_r != rhs._r || _c != rhs._c)
94         throw std::invalid_argument("RHS matrix dimensions do not match");
95       std::swap(_columns, rhs._columns);
96       return *this;
97     }
98 
operator ==(const RectangularBinaryMatrix & rhs) const99     bool operator==(const RectangularBinaryMatrix &rhs) const {
100       if(_r != rhs._r || _c != rhs._c)
101         return false;
102       if(!_columns || !rhs._columns)
103         return _columns == rhs._columns;
104       return !memcmp(_columns, rhs._columns, sizeof(uint64_t) * _c);
105     }
operator !=(const RectangularBinaryMatrix & rhs) const106     bool operator!=(const RectangularBinaryMatrix &rhs) const {
107       return !(*this == rhs);
108     }
109 
110     // Get i-th column. No check on range
operator [](unsigned int i) const111     uint64_t operator[](unsigned int i) const { return _columns ? _columns[i] : ((uint64_t)1 << i); }
112 
r() const113     unsigned int r() const { return _r; }
c() const114     unsigned int c() const { return _c; }
115 
116     // True if every column is zero
is_zero() const117     bool is_zero() const {
118       uint64_t *p = _columns;
119       while(*p == 0 && p < _columns + _c)
120         ++p;
121       return (p - _columns) == _c;
122     }
123 
124     // Randomize the content of the matrix
randomize(uint64_t (* rng)())125     void randomize(uint64_t (*rng)()) {
126       for(unsigned int i = 0; i < _c; ++i)
127         _columns[i] = rng() & cmask();
128     }
129     //void randomize() { randomize(rng); }
130 
131     // Make and check that the matrix the lower right corner of the
132     // identity.
133     void init_low_identity(bool simplify = true);
134     bool is_low_identity() const;
135 
136     // Left matrix vector multiplication. Type T supports the operator
137     // v[i] to return the i-th 64 bit word of v.
138     template<typename T>
139     uint64_t times_loop(const T &v) const;
140 
141 
142 #ifdef HAVE_SSE
143     // This SSE implementation only works if the number of columns is
144     // even.
145     template<typename T>
146     uint64_t times_sse(const T &v) const;
147 #endif
148 
149 #ifdef HAVE_INT128
150     // Implementation using __int128
151     template<typename T>
152     uint64_t times_128(const T& v) const;
153 #endif
154 
155     template<typename T>
times(const T & v) const156     inline uint64_t times(const T& v) const {
157 #ifdef HAVE_SSE
158       return times_sse(v);
159 #elif HAVE_INT128
160       return times_128(v);
161 #else
162       return times_loop(v);
163 #endif
164     }
165 
166     // Return a matrix which is the "pseudo inverse" of this matrix. It
167     // is assumed that there is above this square matrix an identity
168     // block and a zero so as to make the matrix squared. Raise an
169     // exception std::domain_error if the matrix is singular.
170     RectangularBinaryMatrix pseudo_inverse() const;
171 
172     // Return the multiplication of this and rhs. As in pseudo_inverse,
173     // the two matrices are viewed as being squared, padded above by the
174     // identity.
175     RectangularBinaryMatrix pseudo_multiplication(const RectangularBinaryMatrix &rhs) const;
176 
177     // Initialize the object with a pseudo-invertible matrix and return its pseudo-inverse
178     RectangularBinaryMatrix randomize_pseudo_inverse(uint64_t (*rng)());
randomize_pseudo_inverse()179     RectangularBinaryMatrix randomize_pseudo_inverse() { return randomize_pseudo_inverse(random_bits); }
180 
181     // Return the rank of the matrix. The matrix is assumed to be
182     // squared, padded above by the identity.
183     unsigned int pseudo_rank() const;
184 
185     // Display matrix
186     void print(std::ostream &os) const;
187     template<typename T>
188     void print_vector(std::ostream &os, const T &v) const;
189 
190     // Nb words in vector for multiplication
nb_words() const191     uint64_t nb_words() const { return (_c >> 6) + ((_c & 0x3f) != 0); }
192     // Mask of most significant bit in most significant word of a vector
193     // with _c rows.
msb() const194     uint64_t msb() const {
195       int shift = _c & 0x3f;
196       if(shift == 0)
197         shift = sizeof(uint64_t) * 8;
198       return (uint64_t)1 << (shift - 1);
199     }
200 
201   private:
202     // Store column by column. A column may use one word.  By
203     // convention, the "unused" bits (most significant bits) of each
204     // column are set to 0.
205     uint64_t *         _columns;
206     const unsigned int _r, _c;
207 
208     static uint64_t *alloc(unsigned int r, unsigned int c) __attribute__((malloc));
209     // Mask for column word (zero msb)
cmask() const210     uint64_t cmask() const { return std::numeric_limits<uint64_t>::max() >> (std::numeric_limits<uint64_t>::digits - _r); }
211     // Mask of highest word of a vector with _c rows (Most Significant
212     // Word)
msw() const213     uint64_t msw() const { return (msb() << 1) - 1; }
214     // Nb of bits used in highest word of vector with _c rows.
nb_msb() const215     uint64_t nb_msb() const {
216       uint64_t nb = _c & 0x3f;
217       return nb ? nb : sizeof(uint64_t) * 8;
218     }
219     // Allow to change the matrix vectors. No check on i.
get(unsigned int i)220     uint64_t & get(unsigned int i) { return _columns[i]; }
221   };
222 
223   template<typename T>
times_loop(const T & v) const224   uint64_t RectangularBinaryMatrix::times_loop(const T &v) const {
225     if(!_columns) return v[0] & cmask();
226     uint64_t       *p   = _columns + _c - 1;
227     uint64_t        res = 0, x = 0, j = 0;
228     const uint64_t  one = (uint64_t)1;
229 
230     for(unsigned int i = 0; i < nb_words(); ++i) {
231       j = sizeof(uint64_t) * 8;
232       x = v[i];
233       if(i == nb_words() - 1) {
234         x &= msw();
235         j  = nb_msb();
236       }
237       for( ; j > 7; j -= 8, p -= 8) {
238         res ^= (-(x & one)) & p[0];  x >>= 1;
239         res ^= (-(x & one)) & p[-1]; x >>= 1;
240         res ^= (-(x & one)) & p[-2]; x >>= 1;
241         res ^= (-(x & one)) & p[-3]; x >>= 1;
242         res ^= (-(x & one)) & p[-4]; x >>= 1;
243         res ^= (-(x & one)) & p[-5]; x >>= 1;
244         res ^= (-(x & one)) & p[-6]; x >>= 1;
245         res ^= (-(x & one)) & p[-7]; x >>= 1;
246       }
247     }
248 
249     // Finish the loop
250     switch(j) {
251     case 7: res ^= (-(x & one)) & *p--; x >>= 1;
252     case 6: res ^= (-(x & one)) & *p--; x >>= 1;
253     case 5: res ^= (-(x & one)) & *p--; x >>= 1;
254     case 4: res ^= (-(x & one)) & *p--; x >>= 1;
255     case 3: res ^= (-(x & one)) & *p--; x >>= 1;
256     case 2: res ^= (-(x & one)) & *p--; x >>= 1;
257     case 1: res ^= (-(x & one)) & *p;
258     }
259 
260     return res;
261   }
262 
263 #ifdef HAVE_SSE
264   template<typename T>
times_sse(const T & v) const265   uint64_t RectangularBinaryMatrix::times_sse(const T &v) const {
266     if(!_columns) return v[0] & cmask();
267 #define FFs ((uint64_t)-1)
268     static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) =
269       {0, 0, 0, FFs, FFs, 0, FFs, FFs};
270     typedef uint64_t xmm_t __attribute__((vector_size(16)));
271 
272     uint64_t *p = _columns + _c - 8;
273 
274     // //#ifdef __ICC
275     // register xmm_t acc;
276     // register xmm_t load;
277     // memset(&acc, '\0', 16);
278     // memset(&load, '\0', 16);
279     // #else
280 #ifdef __clang__
281 #pragma clang diagnostic push
282 #pragma clang diagnostic ignored "-Wuninitialized"
283 #endif
284     xmm_t acc  = acc ^ acc; // Set acc to 0
285     xmm_t load = load ^ load;
286 #ifdef __clang__
287 #pragma clang diagnostic pop
288 #endif
289     // #endif
290 
291 //     // Zero out acc
292 // #pragma GCC diagnostic push
293 // #pragma GCC diagnostic ignored "-Wuninitialized"
294 //     asm("pxor %0,%0\n\t" : "=x"(acc) : "0"(acc));
295 //     asm("pxor %0,%0\n\t" : "=x"(load) : "0"(load));
296 // #pragma GCC diagnostic pop
297 
298     // i is the lower 2 bits of x, and an index into the smear array. Compute res ^= smear[i] & p[j].
299 #define AND_XOR(off)                                                    \
300     asm("movdqa (%[s],%[i]), %[load]\n\t"                               \
301         "pand " off "(%[p]),%[load]\n\t"                                \
302         "pxor %[load],%[acc]\n\t"                                       \
303         : [acc]"=&x"(acc)                                               \
304         : "[acc]"(acc),  [i]"r"(i), [p]"r"(p), [s]"r"(smear), [load]"x"(load))
305 
306 
307     uint64_t i, j = 0, x = 0;
308     for(unsigned int w = 0; w < nb_words(); ++w) {
309       x = v[w];
310       j = sizeof(uint64_t) * 8;
311       if(w == nb_words() - 1) {
312         x &= msw();
313         j  = nb_msb();
314       }
315       for( ; j > 7; j -= 8, p -= 8) {
316         i = (x & (uint64_t)0x3) << 4;
317         AND_XOR("0x30");
318         x >>= 2;
319         i = (x & (uint64_t)0x3) << 4;
320         AND_XOR("0x20");
321         x >>= 2;
322         i = (x & (uint64_t)0x3) << 4;
323         AND_XOR("0x10");
324         x >>= 2;
325         i = (x & (uint64_t)0x3) << 4;
326         AND_XOR("");
327         x >>= 2;
328       }
329     }
330 
331     // Finish loop
332     p = _columns;
333     switch(j) {
334     case 6:
335       i = (x & (uint64_t)0x3) << 4;
336       AND_XOR("0x20");
337       x >>= 2;
338     case 4:
339       i = (x & (uint64_t)0x3) << 4;
340       AND_XOR("0x10");
341       x >>= 2;
342     case 2:
343       i = (x & (uint64_t)0x3) << 4;
344       AND_XOR("");
345     }
346 
347     // Get result out
348     uint64_t res1, res2;
349     asm("movd %[acc], %[res1]\n\t"
350         "psrldq $8, %[acc]\n\t"
351         "movd %[acc], %[res2]\n\t"
352         : [res1]"=r"(res1), [res2]"=r"(res2)
353         : [acc]"x"(acc));
354     return res1 ^ res2;
355   }
356 #endif // HAVE_SSE
357 
358 #ifdef HAVE_INT128
359   template<typename T>
times_128(const T & v) const360   uint64_t RectangularBinaryMatrix::times_128(const T &v) const {
361     if(!_columns) return v[0] & cmask();
362     typedef unsigned __int128 u128;
363     static const u128 smear[4] =
364       { (u128)0,
365         (((u128)1 << 64) - 1) << 64,
366         ((u128)1 << 64) - 1,
367         (u128)-1
368       };
369     u128* p   = (u128*)(_columns + _c - 2);
370     u128 res = 0;
371     //    u128  res = res ^ res;
372 
373     uint64_t j = 0, x = 0;
374     for(unsigned int w = 0; w < nb_words(); ++w) {
375       x = v[w];
376       j = sizeof(uint64_t) * 8;
377       if(w == nb_words() - 1) {
378         x &= msw();
379         j  = nb_msb();
380       }
381       for( ; j > 7; j -= 8, p -= 4) {
382         res ^= smear[x & (uint64_t)0x3] & p[ 0]; x >>= 2;
383         res ^= smear[x & (uint64_t)0x3] & p[-1]; x >>= 2;
384         res ^= smear[x & (uint64_t)0x3] & p[-2]; x >>= 2;
385         res ^= smear[x & (uint64_t)0x3] & p[-3]; x >>= 2;
386       }
387     }
388 
389     switch(j) {
390     case 6: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2;
391     case 4: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2;
392     case 2: res ^= smear[x & (uint64_t)0x3] & *p;
393     }
394 
395     return (res ^ (res >> 64)) & smear[2];
396   }
397 #endif // HAVE_INT128
398 
399 }
400 
401 #endif
402