1 /* This file is part of Jellyfish. 2 3 This work is dual-licensed under 3-Clause BSD License or GPL 3.0. 4 You can choose between one of them if you use this work. 5 6 `SPDX-License-Identifier: BSD-3-Clause OR GPL-3.0` 7 */ 8 9 #ifndef __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__ 10 #define __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__ 11 12 #include <stdint.h> 13 #include <stdlib.h> 14 #include <string.h> 15 #include <assert.h> 16 #include <time.h> 17 #include <jellyfish/misc.hpp> 18 #include <iostream> 19 #include <exception> 20 #include <stdexcept> 21 #include <vector> 22 #include <limits> 23 24 #ifdef HAVE_CONFIG_H 25 #include <config.h> 26 #endif 27 28 // Column major representation 29 // 30 // Rectangular matrices on Z/2Z of size _r x _c where 1<=_r<=64 (_c is 31 // not limited) and _r <= _c. I.e., the matrix can be stored as an 32 // array of 64 bit word, each representing a column (the highest 64-_r 33 // bits of each word are set to 0). 34 // 35 // Multiplication between a matrix and vector of size _c x 1 gives a 36 // vector of size _r x 1 stored as one 64 bit word. A matrix with a 37 // NULL _columns pointer behaves like the identity. 38 39 namespace jellyfish { 40 class RectangularBinaryMatrix { RectangularBinaryMatrix(unsigned int c)41 explicit RectangularBinaryMatrix(unsigned int c) 42 : _columns(NULL) 43 , _r(c) 44 , _c(c) 45 { } 46 47 public: RectangularBinaryMatrix(unsigned int r,unsigned c)48 RectangularBinaryMatrix(unsigned int r, unsigned c) 49 : _columns(alloc(r, c)), _r(r), _c(c) { } RectangularBinaryMatrix(const RectangularBinaryMatrix & rhs)50 RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs) 51 : _columns(rhs._columns ? alloc(rhs._r, rhs._c) : NULL) 52 , _r(rhs._r) 53 , _c(rhs._c) 54 { 55 if(_columns) 56 memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c); 57 } RectangularBinaryMatrix(RectangularBinaryMatrix && rhs)58 RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) 59 : _columns(rhs._columns) 60 , _r(rhs._r) 61 , _c(rhs._c) 62 { 63 rhs._columns = 0; 64 } 65 // Initialize from raw data. raw must contain at least c words. 66 template<typename T> RectangularBinaryMatrix(const T & raw,unsigned int r,unsigned c)67 RectangularBinaryMatrix(const T &raw, unsigned int r, unsigned c) 68 : _columns(alloc(r, c)), _r(r), _c(c) { 69 for(unsigned int i = 0; i < _c; ++i) 70 _columns[i] = raw[i] & cmask(); 71 } ~RectangularBinaryMatrix()72 ~RectangularBinaryMatrix() { 73 free(_columns); 74 } 75 identity(unsigned c)76 static RectangularBinaryMatrix identity(unsigned c) { 77 return RectangularBinaryMatrix(c); 78 } 79 identity(unsigned r,unsigned c)80 static RectangularBinaryMatrix identity(unsigned r, unsigned c) { 81 RectangularBinaryMatrix res(r, c); 82 res.init_low_identity(); 83 return res; 84 } 85 operator =(const RectangularBinaryMatrix & rhs)86 RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) { 87 if(_r != rhs._r || _c != rhs._c) 88 throw std::invalid_argument("RHS matrix dimensions do not match"); 89 memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c); 90 return *this; 91 } operator =(RectangularBinaryMatrix && rhs)92 RectangularBinaryMatrix& operator=(RectangularBinaryMatrix&& rhs) { 93 if(_r != rhs._r || _c != rhs._c) 94 throw std::invalid_argument("RHS matrix dimensions do not match"); 95 std::swap(_columns, rhs._columns); 96 return *this; 97 } 98 operator ==(const RectangularBinaryMatrix & rhs) const99 bool operator==(const RectangularBinaryMatrix &rhs) const { 100 if(_r != rhs._r || _c != rhs._c) 101 return false; 102 if(!_columns || !rhs._columns) 103 return _columns == rhs._columns; 104 return !memcmp(_columns, rhs._columns, sizeof(uint64_t) * _c); 105 } operator !=(const RectangularBinaryMatrix & rhs) const106 bool operator!=(const RectangularBinaryMatrix &rhs) const { 107 return !(*this == rhs); 108 } 109 110 // Get i-th column. No check on range operator [](unsigned int i) const111 uint64_t operator[](unsigned int i) const { return _columns ? _columns[i] : ((uint64_t)1 << i); } 112 r() const113 unsigned int r() const { return _r; } c() const114 unsigned int c() const { return _c; } 115 116 // True if every column is zero is_zero() const117 bool is_zero() const { 118 uint64_t *p = _columns; 119 while(*p == 0 && p < _columns + _c) 120 ++p; 121 return (p - _columns) == _c; 122 } 123 124 // Randomize the content of the matrix randomize(uint64_t (* rng)())125 void randomize(uint64_t (*rng)()) { 126 for(unsigned int i = 0; i < _c; ++i) 127 _columns[i] = rng() & cmask(); 128 } 129 //void randomize() { randomize(rng); } 130 131 // Make and check that the matrix the lower right corner of the 132 // identity. 133 void init_low_identity(bool simplify = true); 134 bool is_low_identity() const; 135 136 // Left matrix vector multiplication. Type T supports the operator 137 // v[i] to return the i-th 64 bit word of v. 138 template<typename T> 139 uint64_t times_loop(const T &v) const; 140 141 142 #ifdef HAVE_SSE 143 // This SSE implementation only works if the number of columns is 144 // even. 145 template<typename T> 146 uint64_t times_sse(const T &v) const; 147 #endif 148 149 #ifdef HAVE_INT128 150 // Implementation using __int128 151 template<typename T> 152 uint64_t times_128(const T& v) const; 153 #endif 154 155 template<typename T> times(const T & v) const156 inline uint64_t times(const T& v) const { 157 #ifdef HAVE_SSE 158 return times_sse(v); 159 #elif HAVE_INT128 160 return times_128(v); 161 #else 162 return times_loop(v); 163 #endif 164 } 165 166 // Return a matrix which is the "pseudo inverse" of this matrix. It 167 // is assumed that there is above this square matrix an identity 168 // block and a zero so as to make the matrix squared. Raise an 169 // exception std::domain_error if the matrix is singular. 170 RectangularBinaryMatrix pseudo_inverse() const; 171 172 // Return the multiplication of this and rhs. As in pseudo_inverse, 173 // the two matrices are viewed as being squared, padded above by the 174 // identity. 175 RectangularBinaryMatrix pseudo_multiplication(const RectangularBinaryMatrix &rhs) const; 176 177 // Initialize the object with a pseudo-invertible matrix and return its pseudo-inverse 178 RectangularBinaryMatrix randomize_pseudo_inverse(uint64_t (*rng)()); randomize_pseudo_inverse()179 RectangularBinaryMatrix randomize_pseudo_inverse() { return randomize_pseudo_inverse(random_bits); } 180 181 // Return the rank of the matrix. The matrix is assumed to be 182 // squared, padded above by the identity. 183 unsigned int pseudo_rank() const; 184 185 // Display matrix 186 void print(std::ostream &os) const; 187 template<typename T> 188 void print_vector(std::ostream &os, const T &v) const; 189 190 // Nb words in vector for multiplication nb_words() const191 uint64_t nb_words() const { return (_c >> 6) + ((_c & 0x3f) != 0); } 192 // Mask of most significant bit in most significant word of a vector 193 // with _c rows. msb() const194 uint64_t msb() const { 195 int shift = _c & 0x3f; 196 if(shift == 0) 197 shift = sizeof(uint64_t) * 8; 198 return (uint64_t)1 << (shift - 1); 199 } 200 201 private: 202 // Store column by column. A column may use one word. By 203 // convention, the "unused" bits (most significant bits) of each 204 // column are set to 0. 205 uint64_t * _columns; 206 const unsigned int _r, _c; 207 208 static uint64_t *alloc(unsigned int r, unsigned int c) __attribute__((malloc)); 209 // Mask for column word (zero msb) cmask() const210 uint64_t cmask() const { return std::numeric_limits<uint64_t>::max() >> (std::numeric_limits<uint64_t>::digits - _r); } 211 // Mask of highest word of a vector with _c rows (Most Significant 212 // Word) msw() const213 uint64_t msw() const { return (msb() << 1) - 1; } 214 // Nb of bits used in highest word of vector with _c rows. nb_msb() const215 uint64_t nb_msb() const { 216 uint64_t nb = _c & 0x3f; 217 return nb ? nb : sizeof(uint64_t) * 8; 218 } 219 // Allow to change the matrix vectors. No check on i. get(unsigned int i)220 uint64_t & get(unsigned int i) { return _columns[i]; } 221 }; 222 223 template<typename T> times_loop(const T & v) const224 uint64_t RectangularBinaryMatrix::times_loop(const T &v) const { 225 if(!_columns) return v[0] & cmask(); 226 uint64_t *p = _columns + _c - 1; 227 uint64_t res = 0, x = 0, j = 0; 228 const uint64_t one = (uint64_t)1; 229 230 for(unsigned int i = 0; i < nb_words(); ++i) { 231 j = sizeof(uint64_t) * 8; 232 x = v[i]; 233 if(i == nb_words() - 1) { 234 x &= msw(); 235 j = nb_msb(); 236 } 237 for( ; j > 7; j -= 8, p -= 8) { 238 res ^= (-(x & one)) & p[0]; x >>= 1; 239 res ^= (-(x & one)) & p[-1]; x >>= 1; 240 res ^= (-(x & one)) & p[-2]; x >>= 1; 241 res ^= (-(x & one)) & p[-3]; x >>= 1; 242 res ^= (-(x & one)) & p[-4]; x >>= 1; 243 res ^= (-(x & one)) & p[-5]; x >>= 1; 244 res ^= (-(x & one)) & p[-6]; x >>= 1; 245 res ^= (-(x & one)) & p[-7]; x >>= 1; 246 } 247 } 248 249 // Finish the loop 250 switch(j) { 251 case 7: res ^= (-(x & one)) & *p--; x >>= 1; 252 case 6: res ^= (-(x & one)) & *p--; x >>= 1; 253 case 5: res ^= (-(x & one)) & *p--; x >>= 1; 254 case 4: res ^= (-(x & one)) & *p--; x >>= 1; 255 case 3: res ^= (-(x & one)) & *p--; x >>= 1; 256 case 2: res ^= (-(x & one)) & *p--; x >>= 1; 257 case 1: res ^= (-(x & one)) & *p; 258 } 259 260 return res; 261 } 262 263 #ifdef HAVE_SSE 264 template<typename T> times_sse(const T & v) const265 uint64_t RectangularBinaryMatrix::times_sse(const T &v) const { 266 if(!_columns) return v[0] & cmask(); 267 #define FFs ((uint64_t)-1) 268 static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) = 269 {0, 0, 0, FFs, FFs, 0, FFs, FFs}; 270 typedef uint64_t xmm_t __attribute__((vector_size(16))); 271 272 uint64_t *p = _columns + _c - 8; 273 274 // //#ifdef __ICC 275 // register xmm_t acc; 276 // register xmm_t load; 277 // memset(&acc, '\0', 16); 278 // memset(&load, '\0', 16); 279 // #else 280 #ifdef __clang__ 281 #pragma clang diagnostic push 282 #pragma clang diagnostic ignored "-Wuninitialized" 283 #endif 284 xmm_t acc = acc ^ acc; // Set acc to 0 285 xmm_t load = load ^ load; 286 #ifdef __clang__ 287 #pragma clang diagnostic pop 288 #endif 289 // #endif 290 291 // // Zero out acc 292 // #pragma GCC diagnostic push 293 // #pragma GCC diagnostic ignored "-Wuninitialized" 294 // asm("pxor %0,%0\n\t" : "=x"(acc) : "0"(acc)); 295 // asm("pxor %0,%0\n\t" : "=x"(load) : "0"(load)); 296 // #pragma GCC diagnostic pop 297 298 // i is the lower 2 bits of x, and an index into the smear array. Compute res ^= smear[i] & p[j]. 299 #define AND_XOR(off) \ 300 asm("movdqa (%[s],%[i]), %[load]\n\t" \ 301 "pand " off "(%[p]),%[load]\n\t" \ 302 "pxor %[load],%[acc]\n\t" \ 303 : [acc]"=&x"(acc) \ 304 : "[acc]"(acc), [i]"r"(i), [p]"r"(p), [s]"r"(smear), [load]"x"(load)) 305 306 307 uint64_t i, j = 0, x = 0; 308 for(unsigned int w = 0; w < nb_words(); ++w) { 309 x = v[w]; 310 j = sizeof(uint64_t) * 8; 311 if(w == nb_words() - 1) { 312 x &= msw(); 313 j = nb_msb(); 314 } 315 for( ; j > 7; j -= 8, p -= 8) { 316 i = (x & (uint64_t)0x3) << 4; 317 AND_XOR("0x30"); 318 x >>= 2; 319 i = (x & (uint64_t)0x3) << 4; 320 AND_XOR("0x20"); 321 x >>= 2; 322 i = (x & (uint64_t)0x3) << 4; 323 AND_XOR("0x10"); 324 x >>= 2; 325 i = (x & (uint64_t)0x3) << 4; 326 AND_XOR(""); 327 x >>= 2; 328 } 329 } 330 331 // Finish loop 332 p = _columns; 333 switch(j) { 334 case 6: 335 i = (x & (uint64_t)0x3) << 4; 336 AND_XOR("0x20"); 337 x >>= 2; 338 case 4: 339 i = (x & (uint64_t)0x3) << 4; 340 AND_XOR("0x10"); 341 x >>= 2; 342 case 2: 343 i = (x & (uint64_t)0x3) << 4; 344 AND_XOR(""); 345 } 346 347 // Get result out 348 uint64_t res1, res2; 349 asm("movd %[acc], %[res1]\n\t" 350 "psrldq $8, %[acc]\n\t" 351 "movd %[acc], %[res2]\n\t" 352 : [res1]"=r"(res1), [res2]"=r"(res2) 353 : [acc]"x"(acc)); 354 return res1 ^ res2; 355 } 356 #endif // HAVE_SSE 357 358 #ifdef HAVE_INT128 359 template<typename T> times_128(const T & v) const360 uint64_t RectangularBinaryMatrix::times_128(const T &v) const { 361 if(!_columns) return v[0] & cmask(); 362 typedef unsigned __int128 u128; 363 static const u128 smear[4] = 364 { (u128)0, 365 (((u128)1 << 64) - 1) << 64, 366 ((u128)1 << 64) - 1, 367 (u128)-1 368 }; 369 u128* p = (u128*)(_columns + _c - 2); 370 u128 res = 0; 371 // u128 res = res ^ res; 372 373 uint64_t j = 0, x = 0; 374 for(unsigned int w = 0; w < nb_words(); ++w) { 375 x = v[w]; 376 j = sizeof(uint64_t) * 8; 377 if(w == nb_words() - 1) { 378 x &= msw(); 379 j = nb_msb(); 380 } 381 for( ; j > 7; j -= 8, p -= 4) { 382 res ^= smear[x & (uint64_t)0x3] & p[ 0]; x >>= 2; 383 res ^= smear[x & (uint64_t)0x3] & p[-1]; x >>= 2; 384 res ^= smear[x & (uint64_t)0x3] & p[-2]; x >>= 2; 385 res ^= smear[x & (uint64_t)0x3] & p[-3]; x >>= 2; 386 } 387 } 388 389 switch(j) { 390 case 6: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2; 391 case 4: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2; 392 case 2: res ^= smear[x & (uint64_t)0x3] & *p; 393 } 394 395 return (res ^ (res >> 64)) & smear[2]; 396 } 397 #endif // HAVE_INT128 398 399 } 400 401 #endif 402