1 /* This file is part of Jellyfish. 2 3 Jellyfish is free software: you can redistribute it and/or modify 4 it under the terms of the GNU General Public License as published by 5 the Free Software Foundation, either version 3 of the License, or 6 (at your option) any later version. 7 8 Jellyfish is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with Jellyfish. If not, see <http://www.gnu.org/licenses/>. 15 */ 16 17 #ifndef __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__ 18 #define __JELLYFISH_RECTANGULAR_BINARY_MATRIX_HPP__ 19 20 #include <stdint.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <assert.h> 24 #include <time.h> 25 #include <jellyfish/misc.hpp> 26 #include <iostream> 27 #include <exception> 28 #include <stdexcept> 29 #include <vector> 30 #include <limits> 31 32 #ifdef HAVE_CONFIG_H 33 #include <config.h> 34 #endif 35 36 // Column major representation 37 // 38 // Rectangular matrices on Z/2Z of size _r x _c where 1<=_r<=64 (_c is 39 // not limited) and _r <= _c. I.e., the matrix can be stored as an 40 // array of 64 bit word, each representing a column (the highest 64-_r 41 // bits of each word are set to 0). 42 // 43 // Multiplication between a matrix and vector of size _c x 1 gives a 44 // vector of size _r x 1 stored as one 64 bit word. 45 46 namespace jellyfish { 47 class RectangularBinaryMatrix { 48 public: RectangularBinaryMatrix(unsigned int r,unsigned c)49 RectangularBinaryMatrix(unsigned int r, unsigned c) 50 : _columns(alloc(r, c)), _r(r), _c(c) { } RectangularBinaryMatrix(const RectangularBinaryMatrix & rhs)51 RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs) 52 : _columns(alloc(rhs._r, rhs._c)), _r(rhs._r), _c(rhs._c) { 53 memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c); 54 } RectangularBinaryMatrix(RectangularBinaryMatrix && rhs)55 RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) : 56 _columns(rhs._columns), _r(rhs._r), _c(rhs._c) { 57 rhs._columns = 0; 58 } 59 // Initialize from raw data. raw must contain at least c words. 60 template<typename T> RectangularBinaryMatrix(const T & raw,unsigned int r,unsigned c)61 RectangularBinaryMatrix(const T &raw, unsigned int r, unsigned c) 62 : _columns(alloc(r, c)), _r(r), _c(c) { 63 for(unsigned int i = 0; i < _c; ++i) 64 _columns[i] = raw[i] & cmask(); 65 } ~RectangularBinaryMatrix()66 ~RectangularBinaryMatrix() { 67 free(_columns); 68 } 69 operator =(const RectangularBinaryMatrix & rhs)70 RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) { 71 if(_r != rhs._r || _c != rhs._c) 72 throw std::invalid_argument("RHS matrix dimensions do not match"); 73 memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c); 74 return *this; 75 } operator =(RectangularBinaryMatrix && rhs)76 RectangularBinaryMatrix& operator=(RectangularBinaryMatrix&& rhs) { 77 if(_r != rhs._r || _c != rhs._c) 78 throw std::invalid_argument("RHS matrix dimensions do not match"); 79 std::swap(_columns, rhs._columns); 80 return *this; 81 } 82 operator ==(const RectangularBinaryMatrix & rhs) const83 bool operator==(const RectangularBinaryMatrix &rhs) const { 84 if(_r != rhs._r || _c != rhs._c) 85 return false; 86 return !memcmp(_columns, rhs._columns, sizeof(uint64_t) * _c); 87 } operator !=(const RectangularBinaryMatrix & rhs) const88 bool operator!=(const RectangularBinaryMatrix &rhs) const { 89 return !(*this == rhs); 90 } 91 92 // Get i-th column. No check on range operator [](unsigned int i) const93 const uint64_t & operator[](unsigned int i) const { return _columns[i]; } 94 r() const95 unsigned int r() const { return _r; } c() const96 unsigned int c() const { return _c; } 97 98 // True if every column is zero is_zero() const99 bool is_zero() const { 100 uint64_t *p = _columns; 101 while(*p == 0 && p < _columns + _c) 102 ++p; 103 return (p - _columns) == _c; 104 } 105 106 // Randomize the content of the matrix randomize(uint64_t (* rng)())107 void randomize(uint64_t (*rng)()) { 108 for(unsigned int i = 0; i < _c; ++i) 109 _columns[i] = rng() & cmask(); 110 } 111 //void randomize() { randomize(rng); } 112 113 // Make and check that the matrix the lower right corner of the 114 // identity. 115 void init_low_identity(); 116 bool is_low_identity(); 117 118 // Left matrix vector multiplication. Type T supports the operator 119 // v[i] to return the i-th 64 bit word of v. 120 template<typename T> 121 uint64_t times_loop(const T &v) const; 122 123 124 #ifdef HAVE_SSE 125 // This SSE implementation only works if the number of columns is 126 // even. 127 template<typename T> 128 uint64_t times_sse(const T &v) const; 129 #endif 130 131 #ifdef HAVE_INT128 132 // Implementation using __int128 133 template<typename T> 134 uint64_t times_128(const T& v) const; 135 #endif 136 137 template<typename T> times(const T & v) const138 inline uint64_t times(const T& v) const { 139 #ifdef HAVE_SSE 140 return times_sse(v); 141 #elif HAVE_INT128 142 return times_128(v); 143 #else 144 return times_loop(v); 145 #endif 146 } 147 148 // Return a matrix which is the "pseudo inverse" of this matrix. It 149 // is assumed that there is above this square matrix an identity 150 // block and a zero so as to make the matrix squared. Raise an 151 // exception std::domain_error if the matrix is singular. 152 RectangularBinaryMatrix pseudo_inverse() const; 153 154 // Return the multiplication of this and rhs. As in pseudo_inverse, 155 // the two matrices are viewed as being squared, padded above by the 156 // identity. 157 RectangularBinaryMatrix pseudo_multiplication(const RectangularBinaryMatrix &rhs) const; 158 159 // Initialize the object with a pseudo-invertible matrix and return its pseudo-inverse 160 RectangularBinaryMatrix randomize_pseudo_inverse(uint64_t (*rng)()); randomize_pseudo_inverse()161 RectangularBinaryMatrix randomize_pseudo_inverse() { return randomize_pseudo_inverse(random_bits); } 162 163 // Return the rank of the matrix. The matrix is assumed to be 164 // squared, padded above by the identity. 165 unsigned int pseudo_rank() const; 166 167 // Display matrix 168 void print(std::ostream &os) const; 169 template<typename T> 170 void print_vector(std::ostream &os, const T &v) const; 171 172 // Nb words in vector for multiplication nb_words() const173 uint64_t nb_words() const { return (_c >> 6) + ((_c & 0x3f) != 0); } 174 // Mask of most significant bit in most significant word of a vector 175 // with _c rows. msb() const176 uint64_t msb() const { 177 int shift = _c & 0x3f; 178 if(shift == 0) 179 shift = sizeof(uint64_t) * 8; 180 return (uint64_t)1 << (shift - 1); 181 } 182 183 private: 184 // Store column by column. A column may use one word. By 185 // convention, the "unused" bits (most significant bits) of each 186 // column are set to 0. 187 uint64_t * _columns; 188 const unsigned int _r, _c; 189 190 static uint64_t *alloc(unsigned int r, unsigned int c) __attribute__((malloc)); 191 // Mask for column word (zero msb) cmask() const192 uint64_t cmask() const { return std::numeric_limits<uint64_t>::max() >> (std::numeric_limits<uint64_t>::digits - _r); } 193 // Mask of highest word of a vector with _c rows (Most Significant 194 // Word) msw() const195 uint64_t msw() const { return (msb() << 1) - 1; } 196 // Nb of bits used in highest word of vector with _c rows. nb_msb() const197 uint64_t nb_msb() const { 198 uint64_t nb = _c & 0x3f; 199 return nb ? nb : sizeof(uint64_t) * 8; 200 } 201 // Allow to change the matrix vectors. No check on i. get(unsigned int i)202 uint64_t & get(unsigned int i) { return _columns[i]; } 203 }; 204 205 template<typename T> times_loop(const T & v) const206 uint64_t RectangularBinaryMatrix::times_loop(const T &v) const { 207 uint64_t *p = _columns + _c - 1; 208 uint64_t res = 0, x = 0, j = 0; 209 const uint64_t one = (uint64_t)1; 210 211 for(unsigned int i = 0; i < nb_words(); ++i) { 212 j = sizeof(uint64_t) * 8; 213 x = v[i]; 214 if(i == nb_words() - 1) { 215 x &= msw(); 216 j = nb_msb(); 217 } 218 for( ; j > 7; j -= 8, p -= 8) { 219 res ^= (-(x & one)) & p[0]; x >>= 1; 220 res ^= (-(x & one)) & p[-1]; x >>= 1; 221 res ^= (-(x & one)) & p[-2]; x >>= 1; 222 res ^= (-(x & one)) & p[-3]; x >>= 1; 223 res ^= (-(x & one)) & p[-4]; x >>= 1; 224 res ^= (-(x & one)) & p[-5]; x >>= 1; 225 res ^= (-(x & one)) & p[-6]; x >>= 1; 226 res ^= (-(x & one)) & p[-7]; x >>= 1; 227 } 228 } 229 230 // Finish the loop 231 switch(j) { 232 case 7: res ^= (-(x & one)) & *p--; x >>= 1; 233 case 6: res ^= (-(x & one)) & *p--; x >>= 1; 234 case 5: res ^= (-(x & one)) & *p--; x >>= 1; 235 case 4: res ^= (-(x & one)) & *p--; x >>= 1; 236 case 3: res ^= (-(x & one)) & *p--; x >>= 1; 237 case 2: res ^= (-(x & one)) & *p--; x >>= 1; 238 case 1: res ^= (-(x & one)) & *p; 239 } 240 241 return res; 242 } 243 244 #ifdef HAVE_SSE 245 template<typename T> times_sse(const T & v) const246 uint64_t RectangularBinaryMatrix::times_sse(const T &v) const { 247 #define FFs ((uint64_t)-1) 248 static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) = 249 {0, 0, 0, FFs, FFs, 0, FFs, FFs}; 250 typedef uint64_t xmm_t __attribute__((vector_size(16))); 251 252 uint64_t *p = _columns + _c - 8; 253 254 // //#ifdef __ICC 255 // register xmm_t acc; 256 // register xmm_t load; 257 // memset(&acc, '\0', 16); 258 // memset(&load, '\0', 16); 259 // #else 260 #ifdef __clang__ 261 #pragma clang diagnostic push 262 #pragma clang diagnostic ignored "-Wuninitialized" 263 #endif 264 xmm_t acc = acc ^ acc; // Set acc to 0 265 xmm_t load = load ^ load; 266 #ifdef __clang__ 267 #pragma clang diagnostic pop 268 #endif 269 // #endif 270 271 // // Zero out acc 272 // #pragma GCC diagnostic push 273 // #pragma GCC diagnostic ignored "-Wuninitialized" 274 // asm("pxor %0,%0\n\t" : "=x"(acc) : "0"(acc)); 275 // asm("pxor %0,%0\n\t" : "=x"(load) : "0"(load)); 276 // #pragma GCC diagnostic pop 277 278 // i is the lower 2 bits of x, and an index into the smear array. Compute res ^= smear[i] & p[j]. 279 #define AND_XOR(off) \ 280 asm("movdqa (%[s],%[i]), %[load]\n\t" \ 281 "pand " off "(%[p]),%[load]\n\t" \ 282 "pxor %[load],%[acc]\n\t" \ 283 : [acc]"=&x"(acc) \ 284 : "[acc]"(acc), [i]"r"(i), [p]"r"(p), [s]"r"(smear), [load]"x"(load)) 285 286 287 uint64_t i, j = 0, x = 0; 288 for(unsigned int w = 0; w < nb_words(); ++w) { 289 x = v[w]; 290 j = sizeof(uint64_t) * 8; 291 if(w == nb_words() - 1) { 292 x &= msw(); 293 j = nb_msb(); 294 } 295 for( ; j > 7; j -= 8, p -= 8) { 296 i = (x & (uint64_t)0x3) << 4; 297 AND_XOR("0x30"); 298 x >>= 2; 299 i = (x & (uint64_t)0x3) << 4; 300 AND_XOR("0x20"); 301 x >>= 2; 302 i = (x & (uint64_t)0x3) << 4; 303 AND_XOR("0x10"); 304 x >>= 2; 305 i = (x & (uint64_t)0x3) << 4; 306 AND_XOR(""); 307 x >>= 2; 308 } 309 } 310 311 // Finish loop 312 p = _columns; 313 switch(j) { 314 case 6: 315 i = (x & (uint64_t)0x3) << 4; 316 AND_XOR("0x20"); 317 x >>= 2; 318 case 4: 319 i = (x & (uint64_t)0x3) << 4; 320 AND_XOR("0x10"); 321 x >>= 2; 322 case 2: 323 i = (x & (uint64_t)0x3) << 4; 324 AND_XOR(""); 325 } 326 327 // Get result out 328 uint64_t res1, res2; 329 asm("movd %[acc], %[res1]\n\t" 330 "psrldq $8, %[acc]\n\t" 331 "movd %[acc], %[res2]\n\t" 332 : [res1]"=r"(res1), [res2]"=r"(res2) 333 : [acc]"x"(acc)); 334 return res1 ^ res2; 335 } 336 #endif // HAVE_SSE 337 338 #ifdef HAVE_INT128 339 template<typename T> times_128(const T & v) const340 uint64_t RectangularBinaryMatrix::times_128(const T &v) const { 341 typedef unsigned __int128 u128; 342 static const u128 smear[4] = 343 { (u128)0, 344 (((u128)1 << 64) - 1) << 64, 345 ((u128)1 << 64) - 1, 346 (u128)-1 347 }; 348 u128* p = (u128*)(_columns + _c - 2); 349 u128 res = 0; 350 // u128 res = res ^ res; 351 352 uint64_t j = 0, x = 0; 353 for(unsigned int w = 0; w < nb_words(); ++w) { 354 x = v[w]; 355 j = sizeof(uint64_t) * 8; 356 if(w == nb_words() - 1) { 357 x &= msw(); 358 j = nb_msb(); 359 } 360 for( ; j > 7; j -= 8, p -= 4) { 361 res ^= smear[x & (uint64_t)0x3] & p[ 0]; x >>= 2; 362 res ^= smear[x & (uint64_t)0x3] & p[-1]; x >>= 2; 363 res ^= smear[x & (uint64_t)0x3] & p[-2]; x >>= 2; 364 res ^= smear[x & (uint64_t)0x3] & p[-3]; x >>= 2; 365 } 366 } 367 368 switch(j) { 369 case 6: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2; 370 case 4: res ^= smear[x & (uint64_t)0x3] & *p--; x >>=2; 371 case 2: res ^= smear[x & (uint64_t)0x3] & *p; 372 } 373 374 return (res ^ (res >> 64)) & smear[2]; 375 } 376 #endif // HAVE_INT128 377 378 } 379 380 #endif 381