1 /*  This file is part of Jellyfish.
2 
3     Jellyfish is free software: you can redistribute it and/or modify
4     it under the terms of the GNU General Public License as published by
5     the Free Software Foundation, either version 3 of the License, or
6     (at your option) any later version.
7 
8     Jellyfish is distributed in the hope that it will be useful,
9     but WITHOUT ANY WARRANTY; without even the implied warranty of
10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11     GNU General Public License for more details.
12 
13     You should have received a copy of the GNU General Public License
14     along with Jellyfish.  If not, see <http://www.gnu.org/licenses/>.
15 */
16 
17 #ifndef __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__
18 #define __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__
19 
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <assert.h>
24 #include <time.h>
25 #include <jellyfish/misc.hpp>
26 #include <iostream>
27 #include <exception>
28 #include <stdexcept>
29 #include <vector>
30 #include <limits>
31 
32 #ifdef HAVE_CONFIG_H
33 #include <config.h>
34 #endif
35 
36 // Column major representation
37 //
38 // Rectangular matrices on Z/2Z of size _r x _c where 1<=_r<=64 (_c is
39 // not limited) and _r <= _c. I.e., the matrix can be stored as an
40 // array of 64 bit word, each representing a column (the highest 64-_r
41 // bits of each word are set to 0).
42 //
43 // Multiplication between a matrix and vector of size _c x 1 gives a
44 // vector of size _r x 1 stored as one 64 bit word.
45 
46 namespace jellyfish {
47   class RectangularBinaryMatrix {
48   public:
RectangularBinaryMatrix(unsigned int r,unsigned c)49     RectangularBinaryMatrix(unsigned int r, unsigned c)
50       : _columns(alloc(r, c)), _r(r), _c(c) { }
RectangularBinaryMatrix(const RectangularBinaryMatrix & rhs)51     RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs)
52     : _columns(alloc(rhs._r, rhs._c)), _r(rhs._r), _c(rhs._c) {
53       memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
54     }
RectangularBinaryMatrix(RectangularBinaryMatrix && rhs)55     RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) :
56     _columns(rhs._columns), _r(rhs._r), _c(rhs._c) {
57       rhs._columns = 0;
58     }
59     // Initialize from raw data. raw must contain at least c words.
60     template<typename T>
RectangularBinaryMatrix(const T & raw,unsigned int r,unsigned c)61     RectangularBinaryMatrix(const T &raw, unsigned int r, unsigned c)
62       : _columns(alloc(r, c)), _r(r), _c(c) {
63       for(unsigned int i = 0; i < _c; ++i)
64         _columns[i] = raw[i] & cmask();
65     }
~RectangularBinaryMatrix()66     ~RectangularBinaryMatrix() {
67       free(_columns);
68     }
69 
operator =(const RectangularBinaryMatrix & rhs)70     RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) {
71       if(_r != rhs._r || _c != rhs._c)
72         throw std::invalid_argument("RHS matrix dimensions do not match");
73       memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
74       return *this;
75     }
operator =(RectangularBinaryMatrix && rhs)76     RectangularBinaryMatrix& operator=(RectangularBinaryMatrix&& rhs) {
77       if(_r != rhs._r || _c != rhs._c)
78         throw std::invalid_argument("RHS matrix dimensions do not match");
79       std::swap(_columns, rhs._columns);
80       return *this;
81     }
82 
operator ==(const RectangularBinaryMatrix & rhs) const83     bool operator==(const RectangularBinaryMatrix &rhs) const {
84       if(_r != rhs._r || _c != rhs._c)
85         return false;
86       return !memcmp(_columns, rhs._columns, sizeof(uint64_t) * _c);
87     }
operator !=(const RectangularBinaryMatrix & rhs) const88     bool operator!=(const RectangularBinaryMatrix &rhs) const {
89       return !(*this == rhs);
90     }
91 
92     // Get i-th column. No check on range
operator [](unsigned int i) const93     const uint64_t & operator[](unsigned int i) const { return _columns[i]; }
94 
r() const95     unsigned int r() const { return _r; }
c() const96     unsigned int c() const { return _c; }
97 
98     // True if every column is zero
is_zero() const99     bool is_zero() const {
100       uint64_t *p = _columns;
101       while(*p == 0 && p < _columns + _c)
102         ++p;
103       return (p - _columns) == _c;
104     }
105 
106     // Randomize the content of the matrix
randomize(uint64_t (* rng)())107     void randomize(uint64_t (*rng)()) {
108       for(unsigned int i = 0; i < _c; ++i)
109         _columns[i] = rng() & cmask();
110     }
111     //void randomize() { randomize(rng); }
112 
113     // Make and check that the matrix the lower right corner of the
114     // identity.
115     void init_low_identity();
116     bool is_low_identity();
117 
118     // Left matrix vector multiplication. Type T supports the operator
119     // v[i] to return the i-th 64 bit word of v.
120     template<typename T>
121     uint64_t times_loop(const T &v) const;
122 
123 
124 #ifdef HAVE_SSE
125     // This SSE implementation only works if the number of columns is
126     // even.
127     template<typename T>
128     uint64_t times_sse(const T &v) const;
129 #endif
130 
131 #ifdef HAVE_INT128
132     // Implementation using __int128
133     template<typename T>
134     uint64_t times_128(const T& v) const;
135 #endif
136 
137     template<typename T>
times(const T & v) const138     inline uint64_t times(const T& v) const {
139 #ifdef HAVE_SSE
140       return times_sse(v);
141 #elif HAVE_INT128
142       return times_128(v);
143 #else
144       return times_loop(v);
145 #endif
146     }
147 
148     // Return a matrix which is the "pseudo inverse" of this matrix. It
149     // is assumed that there is above this square matrix an identity
150     // block and a zero so as to make the matrix squared. Raise an
151     // exception std::domain_error if the matrix is singular.
152     RectangularBinaryMatrix pseudo_inverse() const;
153 
154     // Return the multiplication of this and rhs. As in pseudo_inverse,
155     // the two matrices are viewed as being squared, padded above by the
156     // identity.
157     RectangularBinaryMatrix pseudo_multiplication(const RectangularBinaryMatrix &rhs) const;
158 
159     // Initialize the object with a pseudo-invertible matrix and return its pseudo-inverse
160     RectangularBinaryMatrix randomize_pseudo_inverse(uint64_t (*rng)());
randomize_pseudo_inverse()161     RectangularBinaryMatrix randomize_pseudo_inverse() { return randomize_pseudo_inverse(random_bits); }
162 
163     // Return the rank of the matrix. The matrix is assumed to be
164     // squared, padded above by the identity.
165     unsigned int pseudo_rank() const;
166 
167     // Display matrix
168     void print(std::ostream &os) const;
169     template<typename T>
170     void print_vector(std::ostream &os, const T &v) const;
171 
172     // Nb words in vector for multiplication
nb_words() const173     uint64_t nb_words() const { return (_c >> 6) + ((_c & 0x3f) != 0); }
174     // Mask of most significant bit in most significant word of a vector
175     // with _c rows.
msb() const176     uint64_t msb() const {
177       int shift = _c & 0x3f;
178       if(shift == 0)
179         shift = sizeof(uint64_t) * 8;
180       return (uint64_t)1 << (shift - 1);
181     }
182 
183   private:
184     // Store column by column. A column may use one word.  By
185     // convention, the "unused" bits (most significant bits) of each
186     // column are set to 0.
187     uint64_t *         _columns;
188     const unsigned int _r, _c;
189 
190     static uint64_t *alloc(unsigned int r, unsigned int c) __attribute__((malloc));
191     // Mask for column word (zero msb)
cmask() const192     uint64_t cmask() const { return std::numeric_limits<uint64_t>::max() >> (std::numeric_limits<uint64_t>::digits - _r); }
193     // Mask of highest word of a vector with _c rows (Most Significant
194     // Word)
msw() const195     uint64_t msw() const { return (msb() << 1) - 1; }
196     // Nb of bits used in highest word of vector with _c rows.
nb_msb() const197     uint64_t nb_msb() const {
198       uint64_t nb = _c & 0x3f;
199       return nb ? nb : sizeof(uint64_t) * 8;
200     }
201     // Allow to change the matrix vectors. No check on i.
get(unsigned int i)202     uint64_t & get(unsigned int i) { return _columns[i]; }
203   };
204 
205   template<typename T>
times_loop(const T & v) const206   uint64_t RectangularBinaryMatrix::times_loop(const T &v) const {
207     uint64_t       *p   = _columns + _c - 1;
208     uint64_t        res = 0, x = 0, j = 0;
209     const uint64_t  one = (uint64_t)1;
210 
211     for(unsigned int i = 0; i < nb_words(); ++i) {
212       j = sizeof(uint64_t) * 8;
213       x = v[i];
214       if(i == nb_words() - 1) {
215         x &= msw();
216         j  = nb_msb();
217       }
218       for( ; j > 7; j -= 8, p -= 8) {
219         res ^= (-(x & one)) & p[0];  x >>= 1;
220         res ^= (-(x & one)) & p[-1]; x >>= 1;
221         res ^= (-(x & one)) & p[-2]; x >>= 1;
222         res ^= (-(x & one)) & p[-3]; x >>= 1;
223         res ^= (-(x & one)) & p[-4]; x >>= 1;
224         res ^= (-(x & one)) & p[-5]; x >>= 1;
225         res ^= (-(x & one)) & p[-6]; x >>= 1;
226         res ^= (-(x & one)) & p[-7]; x >>= 1;
227       }
228     }
229 
230     // Finish the loop
231     switch(j) {
232     case 7: res ^= (-(x & one)) & *p--; x >>= 1;
233     case 6: res ^= (-(x & one)) & *p--; x >>= 1;
234     case 5: res ^= (-(x & one)) & *p--; x >>= 1;
235     case 4: res ^= (-(x & one)) & *p--; x >>= 1;
236     case 3: res ^= (-(x & one)) & *p--; x >>= 1;
237     case 2: res ^= (-(x & one)) & *p--; x >>= 1;
238     case 1: res ^= (-(x & one)) & *p;
239     }
240 
241     return res;
242   }
243 
244 #ifdef HAVE_SSE
245   template<typename T>
times_sse(const T & v) const246   uint64_t RectangularBinaryMatrix::times_sse(const T &v) const {
247 #define FFs ((uint64_t)-1)
248     static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) =
249       {0, 0, 0, FFs, FFs, 0, FFs, FFs};
250     typedef uint64_t xmm_t __attribute__((vector_size(16)));
251 
252     uint64_t *p = _columns + _c - 8;
253 
254     // //#ifdef __ICC
255     // register xmm_t acc;
256     // register xmm_t load;
257     // memset(&acc, '\0', 16);
258     // memset(&load, '\0', 16);
259     // #else
260 #ifdef __clang__
261 #pragma clang diagnostic push
262 #pragma clang diagnostic ignored "-Wuninitialized"
263 #endif
264     xmm_t acc  = acc ^ acc; // Set acc to 0
265     xmm_t load = load ^ load;
266 #ifdef __clang__
267 #pragma clang diagnostic pop
268 #endif
269     // #endif
270 
271 //     // Zero out acc
272 // #pragma GCC diagnostic push
273 // #pragma GCC diagnostic ignored "-Wuninitialized"
274 //     asm("pxor %0,%0\n\t" : "=x"(acc) : "0"(acc));
275 //     asm("pxor %0,%0\n\t" : "=x"(load) : "0"(load));
276 // #pragma GCC diagnostic pop
277 
278     // i is the lower 2 bits of x, and an index into the smear array. Compute res ^= smear[i] & p[j].
279 #define AND_XOR(off)                                                    \
280     asm("movdqa (%[s],%[i]), %[load]\n\t"                               \
281         "pand " off "(%[p]),%[load]\n\t"                                \
282         "pxor %[load],%[acc]\n\t"                                       \
283         : [acc]"=&x"(acc)                                               \
284         : "[acc]"(acc),  [i]"r"(i), [p]"r"(p), [s]"r"(smear), [load]"x"(load))
285 
286 
287     uint64_t i, j = 0, x = 0;
288     for(unsigned int w = 0; w < nb_words(); ++w) {
289       x = v[w];
290       j = sizeof(uint64_t) * 8;
291       if(w == nb_words() - 1) {
292         x &= msw();
293         j  = nb_msb();
294       }
295       for( ; j > 7; j -= 8, p -= 8) {
296         i = (x & (uint64_t)0x3) << 4;
297         AND_XOR("0x30");
298         x >>= 2;
299         i = (x & (uint64_t)0x3) << 4;
300         AND_XOR("0x20");
301         x >>= 2;
302         i = (x & (uint64_t)0x3) << 4;
303         AND_XOR("0x10");
304         x >>= 2;
305         i = (x & (uint64_t)0x3) << 4;
306         AND_XOR("");
307         x >>= 2;
308       }
309     }
310 
311     // Finish loop
312     p = _columns;
313     switch(j) {
314     case 6:
315       i = (x & (uint64_t)0x3) << 4;
316       AND_XOR("0x20");
317       x >>= 2;
318     case 4:
319       i = (x & (uint64_t)0x3) << 4;
320       AND_XOR("0x10");
321       x >>= 2;
322     case 2:
323       i = (x & (uint64_t)0x3) << 4;
324       AND_XOR("");
325     }
326 
327     // Get result out
328     uint64_t res1, res2;
329     asm("movd %[acc], %[res1]\n\t"
330         "psrldq $8, %[acc]\n\t"
331         "movd %[acc], %[res2]\n\t"
332         : [res1]"=r"(res1), [res2]"=r"(res2)
333         : [acc]"x"(acc));
334     return res1 ^ res2;
335   }
336 #endif // HAVE_SSE
337 
338 #ifdef HAVE_INT128
339   template<typename T>
times_128(const T & v) const340   uint64_t RectangularBinaryMatrix::times_128(const T &v) const {
341     typedef unsigned __int128 u128;
342     static const u128 smear[4] =
343       { (u128)0,
344         (((u128)1 << 64) - 1) << 64,
345         ((u128)1 << 64) - 1,
346         (u128)-1
347       };
348     u128* p   = (u128*)(_columns + _c - 2);
349     u128 res = 0;
350     //    u128  res = res ^ res;
351 
352     uint64_t j = 0, x = 0;
353     for(unsigned int w = 0; w < nb_words(); ++w) {
354       x = v[w];
355       j = sizeof(uint64_t) * 8;
356       if(w == nb_words() - 1) {
357         x &= msw();
358         j  = nb_msb();
359       }
360       for( ; j > 7; j -= 8, p -= 4) {
361         res ^= smear[x & (uint64_t)0x3] & p[ 0]; x >>= 2;
362         res ^= smear[x & (uint64_t)0x3] & p[-1]; x >>= 2;
363         res ^= smear[x & (uint64_t)0x3] & p[-2]; x >>= 2;
364         res ^= smear[x & (uint64_t)0x3] & p[-3]; x >>= 2;
365       }
366     }
367 
368     switch(j) {
369     case 6: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2;
370     case 4: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2;
371     case 2: res ^= smear[x & (uint64_t)0x3] & *p;
372     }
373 
374     return (res ^ (res >> 64)) & smear[2];
375   }
376 #endif // HAVE_INT128
377 
378 }
379 
380 #endif
381