1 /**
2 * \file mzd.h
3 * \brief Dense matrices over GF(2) represented as a bit field.
4 *
5 * \author Gregory Bard <bard@fordham.edu>
6 * \author Martin Albrecht <martinralbrecht+m4ri@googlemail.com>
7 * \author Carlo Wood <carlo@alinoe.com>
8 */
9
10 #ifndef M4RI_MZD
11 #define M4RI_MZD
12
13 /*******************************************************************
14 *
15 * M4RI: Linear Algebra over GF(2)
16 *
17 * Copyright (C) 2007, 2008 Gregory Bard <bard@fordham.edu>
18 * Copyright (C) 2008-2013 Martin Albrecht <M.R.Albrecht@rhul.ac.uk>
19 * Copyright (C) 2011 Carlo Wood <carlo@alinoe.com>
20 *
21 * Distributed under the terms of the GNU General Public License (GPL)
22 * version 2 or higher.
23 *
24 * This code is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * General Public License for more details.
28 *
29 * The full text of the GPL is available at:
30 *
31 * http://www.gnu.org/licenses/
32 *
33 ********************************************************************/
34
35 #ifdef HAVE_CONFIG_H
36 #include "config.h"
37 #endif
38
39 #include <m4ri/m4ri_config.h>
40
41 #include <assert.h>
42 #include <math.h>
43 #include <stdio.h>
44
45 #if __M4RI_HAVE_SSE2
46 #include <emmintrin.h>
47 #endif
48
49 #include <m4ri/misc.h>
50 #include <m4ri/debug_dump.h>
51
52 /**
53 * Maximum number of words allocated for one mzd_t block.
54 *
55 * \note This value must fit in an int, even though it's type is size_t.
56 */
57
58 #define __M4RI_MAX_MZD_BLOCKSIZE (((size_t)1) << 27)
59
60 /**
61 * \brief Matrix multiplication block-ing dimension.
62 *
63 * Defines the number of rows of the matrix A that are
64 * processed as one block during the execution of a multiplication
65 * algorithm.
66 */
67
68 #define __M4RI_MUL_BLOCKSIZE MIN(((int)sqrt((double)(4 * __M4RI_CPU_L3_CACHE))) / 2, 2048)
69
70 /**
71 * \brief Data containers containing the values packed into words
72 */
73
74 typedef struct {
75 size_t size; /*!< number of words */
76 word *begin; /*!< first word */
77 word *end; /*!< last word */
78 } mzd_block_t;
79
80 /**
81 * \brief Dense matrices over GF(2).
82 *
83 * The most fundamental data type in this library.
84 */
85
86 typedef struct mzd_t {
87
88 rci_t nrows; /*!< Number of rows. */
89 rci_t ncols; /*!< Number of columns. */
90 wi_t width; /*!< Number of words with valid bits: width = ceil(ncols / m4ri_radix) */
91
92 /**
93 * Offset in words between rows.
94 *
95 * rowstride = (width < mzd_paddingwidth || (width & 1) == 0) ? width : width + 1;
96 * where width is the width of the underlying non-windowed matrix.
97 */
98
99 wi_t rowstride;
100
101 /**
102 * Offset in words from start of block to first word.
103 *
104 * rows[0] = blocks[0].begin + offset_vector;
105 * This, together with rowstride, makes the rows array obsolete.
106 */
107
108 wi_t offset_vector;
109
110 wi_t row_offset; /*!< Number of rows to the first row counting from the start of the first block. */
111
112 /**
113 * Booleans to speed up things.
114 *
115 * The bits have the following meaning:
116 *
117 * 1: Has non-zero excess.
118 * 2: Is windowed, but has zero offset.
119 * 3: Is windowed, but has zero excess.
120 * 4: Is windowed, but owns the blocks allocations.
121 * 5: Spans more than 1 block.
122 */
123
124 uint8_t flags;
125
126 /**
127 * blockrows_log = log2(blockrows);
128 * where blockrows is the number of rows in one block, which is a power of 2.
129 */
130
131 uint8_t blockrows_log;
132
133 /* ensures sizeof(mzd_t) == 64 */
134 uint8_t padding[62 - 2 * sizeof(rci_t) - 4 * sizeof(wi_t) - sizeof(word) - 2 * sizeof(void *)];
135
136 word high_bitmask; /*!< Mask for valid bits in the word with the highest index (width - 1). */
137 mzd_block_t *blocks; /*!< Pointers to the actual blocks of memory containing the values packed into words. */
138 word **rows; /*!< Address of first word in each row, so the first word of row i is is m->rows[i] */
139 } mzd_t;
140
141 /**
142 * \brief The minimum width where padding occurs.
143 */
144 static wi_t const mzd_paddingwidth = 1;
145
146 /**
147 * \brief flag when ncols%64 == 0
148 */
149
150 static uint8_t const mzd_flag_nonzero_excess = 0x2;
151
152 /**
153 * \brief flag for windowed matrix
154 */
155
156 static uint8_t const mzd_flag_windowed_zerooffset = 0x4;
157
158 /**
159 * \brief flag for windowed matrix where ncols%64 == 0
160 */
161
162 static uint8_t const mzd_flag_windowed_zeroexcess = 0x8;
163
164 /**
165 * \brief flag for windowed matrix wich owns its memory
166 */
167
168 static uint8_t const mzd_flag_windowed_ownsblocks = 0x10;
169
170 /**
171 * \brief flag for multiply blocks
172 */
173
174 static uint8_t const mzd_flag_multiple_blocks = 0x20;
175
176 /**
177 * \brief Test if a matrix is windowed.
178 *
179 * \param M Matrix
180 *
181 * \return a non-zero value if the matrix is windowed, otherwise return zero.
182 */
mzd_is_windowed(mzd_t const * M)183 static inline int mzd_is_windowed(mzd_t const *M) { return M->flags & (mzd_flag_windowed_zerooffset); }
184
185 /**
186 * \brief Test if this mzd_t should free blocks.
187 *
188 * \param M Matrix
189 *
190 * \return TRUE iff blocks is non-zero and should be freed upon a call to mzd_free.
191 */
mzd_owns_blocks(mzd_t const * M)192 static inline int mzd_owns_blocks(mzd_t const *M) {
193 return M->blocks && (!mzd_is_windowed(M) || ((M->flags & mzd_flag_windowed_ownsblocks)));
194 }
195
196 /**
197 * \brief Get a pointer the first word.
198 *
199 * \param M Matrix
200 *
201 * \return a pointer to the first word of the first row.
202 */
203
mzd_first_row(mzd_t const * M)204 static inline word *mzd_first_row(mzd_t const *M) {
205 word *result = M->blocks[0].begin + M->offset_vector;
206 assert(M->nrows == 0 || result == M->rows[0]);
207 return result;
208 }
209
210 /**
211 * \brief Get a pointer to the first word in block n.
212 *
213 * Use mzd_first_row for block number 0.
214 *
215 * \param M Matrix
216 * \param n The block number. Must be larger than 0.
217 *
218 * \return a pointer to the first word of the first row in block n.
219 */
mzd_first_row_next_block(mzd_t const * M,int n)220 static inline word *mzd_first_row_next_block(mzd_t const *M, int n) {
221 assert(n > 0);
222 return M->blocks[n].begin + M->offset_vector - M->row_offset * M->rowstride;
223 }
224
225 /**
226 * \brief Convert row to blocks index.
227 *
228 * \param M Matrix.
229 * \param row The row to convert.
230 *
231 * \return the block number that contains this row.
232 */
233
mzd_row_to_block(mzd_t const * M,rci_t row)234 static inline int mzd_row_to_block(mzd_t const *M, rci_t row) { return (M->row_offset + row) >> M->blockrows_log; }
235
236 /**
237 * \brief Total number of rows in this block.
238 *
239 * Should be called with a constant n=0, or with
240 * n > 0 when n is a variable, for optimization
241 * reasons.
242 *
243 * \param M Matrix
244 * \param n The block number.
245 *
246 * \return the total number of rows in this block.
247 */
248
mzd_rows_in_block(mzd_t const * M,int n)249 static inline wi_t mzd_rows_in_block(mzd_t const *M, int n) {
250 if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
251 if (__M4RI_UNLIKELY(n == 0)) {
252 return (1 << M->blockrows_log) - M->row_offset;
253 } else {
254 int const last_block = mzd_row_to_block(M, M->nrows - 1);
255 if (n < last_block) {
256 return (1 << M->blockrows_log);
257 }
258 return M->nrows + M->row_offset - (n << M->blockrows_log);
259 }
260 }
261 return n ? 0 : M->nrows;
262 }
263
264 /**
265 * \brief Number of rows in this block including r
266 *
267 * \param M Matrix
268 * \param r row
269 *
270 * \return the number of rows with index >= r in this block
271 */
272
mzd_remaining_rows_in_block(mzd_t const * M,rci_t r)273 static inline wi_t mzd_remaining_rows_in_block(mzd_t const *M, rci_t r) {
274 const int n = mzd_row_to_block(M, r);
275 r = (r - (n << M->blockrows_log));
276 if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
277 if (__M4RI_UNLIKELY(n == 0)) {
278 return (1 << M->blockrows_log) - M->row_offset - r;
279 } else {
280 int const last_block = mzd_row_to_block(M, M->nrows - 1);
281 if (n < last_block) {
282 return (1 << M->blockrows_log) - r;
283 }
284 return M->nrows + M->row_offset - (n << M->blockrows_log) - r;
285 }
286 }
287 return n ? 0 : M->nrows - r;
288 }
289
290 /**
291 * \brief Get pointer to first word of row.
292 *
293 * \param M Matrix
294 * \param row The row index.
295 *
296 * \return pointer to first word of the row.
297 */
298
mzd_row(mzd_t const * M,rci_t row)299 static inline word *mzd_row(mzd_t const *M, rci_t row) {
300 wi_t big_vector = M->offset_vector + row * M->rowstride;
301 word *result = M->blocks[0].begin + big_vector;
302 if (__M4RI_UNLIKELY(M->flags & mzd_flag_multiple_blocks)) {
303 int const n = (M->row_offset + row) >> M->blockrows_log;
304 result = M->blocks[n].begin + big_vector - n * (M->blocks[0].size / sizeof(word));
305 }
306 assert(result == M->rows[row]);
307 return result;
308 }
309
310 /**
311 * \brief Create a new matrix of dimension r x c.
312 *
313 * Use mzd_free to kill it.
314 *
315 * \param r Number of rows
316 * \param c Number of columns
317 *
318 */
319
320 mzd_t *mzd_init(rci_t const r, rci_t const c);
321
322 /**
323 * \brief Free a matrix created with mzd_init.
324 *
325 * \param A Matrix
326 */
327
328 void mzd_free(mzd_t *A);
329
330 /**
331 * \brief Create a window/view into the matrix M.
332 *
333 * A matrix window for M is a meta structure on the matrix M. It is
334 * setup to point into the matrix so M \em must \em not be freed while the
335 * matrix window is used.
336 *
337 * This function puts the restriction on the provided parameters that
338 * all parameters must be within range for M which is not enforced
339 * currently .
340 *
341 * Use mzd_free_window to free the window.
342 *
343 * \param M Matrix
344 * \param lowr Starting row (inclusive)
345 * \param lowc Starting column (inclusive, must be multiple of m4ri_radix)
346 * \param highr End row (exclusive)
347 * \param highc End column (exclusive)
348 *
349 */
350
351 mzd_t *mzd_init_window(mzd_t *M, rci_t const lowr, rci_t const lowc, rci_t const highr, rci_t const highc);
352
353 /**
354 * \brief Create a const window/view into a const matrix M.
355 *
356 * See mzd_init_window, but for constant M.
357 */
358
mzd_init_window_const(mzd_t const * M,rci_t const lowr,rci_t const lowc,rci_t const highr,rci_t const highc)359 static inline mzd_t const *mzd_init_window_const(mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr,
360 rci_t const highc) {
361 return mzd_init_window((mzd_t *)M, lowr, lowc, highr, highc);
362 }
363
364 /**
365 * \brief Free a matrix window created with mzd_init_window.
366 *
367 * \param A Matrix
368 */
369
370 #define mzd_free_window mzd_free
371
372 /**
373 * \brief Swap the two rows rowa and rowb starting at startblock.
374 *
375 * \param M Matrix with a zero offset.
376 * \param rowa Row index.
377 * \param rowb Row index.
378 * \param startblock Start swapping only in this block.
379 */
380
_mzd_row_swap(mzd_t * M,rci_t const rowa,rci_t const rowb,wi_t const startblock)381 static inline void _mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb, wi_t const startblock) {
382 if ((rowa == rowb) || (startblock >= M->width)) {
383 return;
384 }
385
386 wi_t width = M->width - startblock - 1;
387 word *a = M->rows[rowa] + startblock;
388 word *b = M->rows[rowb] + startblock;
389 word tmp;
390 word const mask_end = M->high_bitmask;
391
392 for (wi_t i = 0; i < width; ++i) {
393 tmp = a[i];
394 a[i] = b[i];
395 b[i] = tmp;
396 }
397 tmp = (a[width] ^ b[width]) & mask_end;
398 a[width] ^= tmp;
399 b[width] ^= tmp;
400
401 __M4RI_DD_ROW(M, rowa);
402 __M4RI_DD_ROW(M, rowb);
403 }
404
405 /**
406 * \brief Swap the two rows rowa and rowb.
407 *
408 * \param M Matrix
409 * \param rowa Row index.
410 * \param rowb Row index.
411 */
412
mzd_row_swap(mzd_t * M,rci_t const rowa,rci_t const rowb)413 static inline void mzd_row_swap(mzd_t *M, rci_t const rowa, rci_t const rowb) { _mzd_row_swap(M, rowa, rowb, 0); }
414
415 /**
416 * \brief copy row j from A to row i from B.
417 *
418 * The offsets of A and B must match and the number of columns of A
419 * must be less than or equal to the number of columns of B.
420 *
421 * \param B Target matrix.
422 * \param i Target row index.
423 * \param A Source matrix.
424 * \param j Source row index.
425 */
426
427 void mzd_copy_row(mzd_t *B, rci_t i, mzd_t const *A, rci_t j);
428
429 /**
430 * \brief Swap the two columns cola and colb.
431 *
432 * \param M Matrix.
433 * \param cola Column index.
434 * \param colb Column index.
435 */
436
437 void mzd_col_swap(mzd_t *M, rci_t const cola, rci_t const colb);
438
439 /**
440 * \brief Swap the two columns cola and colb but only between start_row and stop_row.
441 *
442 * \param M Matrix.
443 * \param cola Column index.
444 * \param colb Column index.
445 * \param start_row Row index.
446 * \param stop_row Row index (exclusive).
447 */
448
mzd_col_swap_in_rows(mzd_t * M,rci_t const cola,rci_t const colb,rci_t const start_row,rci_t const stop_row)449 static inline void mzd_col_swap_in_rows(mzd_t *M, rci_t const cola, rci_t const colb, rci_t const start_row,
450 rci_t const stop_row) {
451 if (cola == colb) {
452 return;
453 }
454
455 rci_t const _cola = cola;
456 rci_t const _colb = colb;
457
458 wi_t const a_word = _cola / m4ri_radix;
459 wi_t const b_word = _colb / m4ri_radix;
460
461 int const a_bit = _cola % m4ri_radix;
462 int const b_bit = _colb % m4ri_radix;
463
464 word *RESTRICT ptr = mzd_row(M, start_row);
465 int max_bit = MAX(a_bit, b_bit);
466 int count_remaining = stop_row - start_row;
467 int min_bit = a_bit + b_bit - max_bit;
468 int block = mzd_row_to_block(M, start_row);
469 int offset = max_bit - min_bit;
470 word mask = m4ri_one << min_bit;
471 int count = MIN(mzd_remaining_rows_in_block(M, start_row), count_remaining);
472
473 // Apparently we're calling with start_row == stop_row sometimes (seems a bug to me).
474 if (count <= 0) {
475 return;
476 }
477
478 if (a_word == b_word) {
479 while (1) {
480 count_remaining -= count;
481 ptr += a_word;
482 int fast_count = count / 4;
483 int rest_count = count - 4 * fast_count;
484 word xor_v[4];
485 wi_t const rowstride = M->rowstride;
486 while (fast_count--) {
487 xor_v[0] = ptr[0];
488 xor_v[1] = ptr[rowstride];
489 xor_v[2] = ptr[2 * rowstride];
490 xor_v[3] = ptr[3 * rowstride];
491 xor_v[0] ^= xor_v[0] >> offset;
492 xor_v[1] ^= xor_v[1] >> offset;
493 xor_v[2] ^= xor_v[2] >> offset;
494 xor_v[3] ^= xor_v[3] >> offset;
495 xor_v[0] &= mask;
496 xor_v[1] &= mask;
497 xor_v[2] &= mask;
498 xor_v[3] &= mask;
499 xor_v[0] |= xor_v[0] << offset;
500 xor_v[1] |= xor_v[1] << offset;
501 xor_v[2] |= xor_v[2] << offset;
502 xor_v[3] |= xor_v[3] << offset;
503 ptr[0] ^= xor_v[0];
504 ptr[rowstride] ^= xor_v[1];
505 ptr[2 * rowstride] ^= xor_v[2];
506 ptr[3 * rowstride] ^= xor_v[3];
507 ptr += 4 * rowstride;
508 }
509 while (rest_count--) {
510 word xor_v = *ptr;
511 xor_v ^= xor_v >> offset;
512 xor_v &= mask;
513 *ptr ^= xor_v | (xor_v << offset);
514 ptr += rowstride;
515 }
516 block++;
517 if ((count = MIN(mzd_rows_in_block(M, block), count_remaining)) <= 0) {
518 break;
519 }
520 ptr = mzd_first_row_next_block(M, block);
521 }
522 } else {
523 word *RESTRICT min_ptr;
524 wi_t max_offset;
525 if (min_bit == a_bit) {
526 min_ptr = ptr + a_word;
527 max_offset = b_word - a_word;
528 } else {
529 min_ptr = ptr + b_word;
530 max_offset = a_word - b_word;
531 }
532 while (1) {
533 count_remaining -= count;
534 wi_t const rowstride = M->rowstride;
535 while (count--) {
536 word xor_v = (min_ptr[0] ^ (min_ptr[max_offset] >> offset)) & mask;
537 min_ptr[0] ^= xor_v;
538 min_ptr[max_offset] ^= xor_v << offset;
539 min_ptr += rowstride;
540 }
541 block++;
542 if ((count = MIN(mzd_rows_in_block(M, +block), count_remaining)) <= 0) {
543 break;
544 }
545 ptr = mzd_first_row_next_block(M, block);
546 if (min_bit == a_bit) {
547 min_ptr = ptr + a_word;
548 } else {
549 min_ptr = ptr + b_word;
550 }
551 }
552 }
553
554 __M4RI_DD_MZD(M);
555 }
556
557 /**
558 * \brief Read the bit at position M[row,col].
559 *
560 * \param M Matrix
561 * \param row Row index
562 * \param col Column index
563 *
564 * \note No bounds checks whatsoever are performed.
565 *
566 */
567
mzd_read_bit(mzd_t const * M,rci_t const row,rci_t const col)568 static inline BIT mzd_read_bit(mzd_t const *M, rci_t const row, rci_t const col) {
569 return __M4RI_GET_BIT(M->rows[row][col / m4ri_radix], col % m4ri_radix);
570 }
571
572 /**
573 * \brief Write the bit value to position M[row,col]
574 *
575 * \param M Matrix
576 * \param row Row index
577 * \param col Column index
578 * \param value Either 0 or 1
579 *
580 * \note No bounds checks whatsoever are performed.
581 *
582 */
583
mzd_write_bit(mzd_t * M,rci_t const row,rci_t const col,BIT const value)584 static inline void mzd_write_bit(mzd_t *M, rci_t const row, rci_t const col, BIT const value) {
585 __M4RI_WRITE_BIT(M->rows[row][col / m4ri_radix], col % m4ri_radix, value);
586 }
587
588 /**
589 * \brief XOR n bits from values to M starting a position (x,y).
590 *
591 * \param M Source matrix.
592 * \param x Starting row.
593 * \param y Starting column.
594 * \param n Number of bits (<= m4ri_radix);
595 * \param values Word with values;
596 */
597
mzd_xor_bits(mzd_t const * M,rci_t const x,rci_t const y,int const n,word values)598 static inline void mzd_xor_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) {
599 int const spot = y % m4ri_radix;
600 wi_t const block = y / m4ri_radix;
601 M->rows[x][block] ^= values << spot;
602 int const space = m4ri_radix - spot;
603 if (n > space) {
604 M->rows[x][block + 1] ^= values >> space;
605 }
606 }
607
608 /**
609 * \brief AND n bits from values to M starting a position (x,y).
610 *
611 * \param M Source matrix.
612 * \param x Starting row.
613 * \param y Starting column.
614 * \param n Number of bits (<= m4ri_radix);
615 * \param values Word with values;
616 */
617
mzd_and_bits(mzd_t const * M,rci_t const x,rci_t const y,int const n,word values)618 static inline void mzd_and_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n, word values) {
619 /* This is the best way, since this will drop out once we inverse the bits in values: */
620 values >>= (m4ri_radix - n); /* Move the bits to the lowest columns */
621
622 int const spot = y % m4ri_radix;
623 wi_t const block = y / m4ri_radix;
624 M->rows[x][block] &= values << spot;
625 int const space = m4ri_radix - spot;
626 if (n > space) {
627 M->rows[x][block + 1] &= values >> space;
628 }
629 }
630
631 /**
632 * \brief Clear n bits in M starting a position (x,y).
633 *
634 * \param M Source matrix.
635 * \param x Starting row.
636 * \param y Starting column.
637 * \param n Number of bits (0 < n <= m4ri_radix);
638 */
639
mzd_clear_bits(mzd_t const * M,rci_t const x,rci_t const y,int const n)640 static inline void mzd_clear_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
641 assert(n > 0 && n <= m4ri_radix);
642 word values = m4ri_ffff >> (m4ri_radix - n);
643 int const spot = y % m4ri_radix;
644 wi_t const block = y / m4ri_radix;
645 M->rows[x][block] &= ~(values << spot);
646 int const space = m4ri_radix - spot;
647 if (n > space) {
648 M->rows[x][block + 1] &= ~(values >> space);
649 }
650 }
651
652 /**
653 * \brief Add the rows sourcerow and destrow and stores the total in the row
654 * destrow, but only begins at the column coloffset.
655 *
656 * \param M Matrix
657 * \param dstrow Index of target row
658 * \param srcrow Index of source row
659 * \param coloffset Start column (0 <= coloffset < M->ncols)
660 *
661 * \warning This function expects that there is at least one word worth of work.
662 */
663
mzd_row_add_offset(mzd_t * M,rci_t dstrow,rci_t srcrow,rci_t coloffset)664 static inline void mzd_row_add_offset(mzd_t *M, rci_t dstrow, rci_t srcrow, rci_t coloffset) {
665 assert(dstrow < M->nrows && srcrow < M->nrows && coloffset < M->ncols);
666 wi_t const startblock = coloffset / m4ri_radix;
667 wi_t wide = M->width - startblock;
668 word *src = M->rows[srcrow] + startblock;
669 word *dst = M->rows[dstrow] + startblock;
670 word const mask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - coloffset % m4ri_radix);
671 word const mask_end = M->high_bitmask;
672
673 *dst++ ^= *src++ & mask_begin;
674 --wide;
675
676 #if __M4RI_HAVE_SSE2
677 int not_aligned = __M4RI_ALIGNMENT(src, 16) != 0; /* 0: Aligned, 1: Not aligned */
678 if (wide > not_aligned + 1) /* Speed up for small matrices */
679 {
680 if (not_aligned) {
681 *dst++ ^= *src++;
682 --wide;
683 }
684 /* Now wide > 1 */
685 __m128i *__src = (__m128i *)src;
686 __m128i *__dst = (__m128i *)dst;
687 __m128i *const eof = (__m128i *)((unsigned long)(src + wide) & ~0xFUL);
688 do {
689 __m128i xmm1 = _mm_xor_si128(*__dst, *__src);
690 *__dst++ = xmm1;
691 } while (++__src < eof);
692 src = (word *)__src;
693 dst = (word *)__dst;
694 wide = ((sizeof(word) * wide) % 16) / sizeof(word);
695 }
696 #endif
697 wi_t i = -1;
698 while (++i < wide) {
699 dst[i] ^= src[i];
700 }
701 /*
702 * Revert possibly non-zero excess bits.
703 * Note that i == wide here, and wide can be 0.
704 * But really, src[wide - 1] is M->rows[srcrow][M->width - 1] ;)
705 * We use i - 1 here to let the compiler know these are the same addresses
706 * that we last accessed, in the previous loop.
707 */
708 dst[i - 1] ^= src[i - 1] & ~mask_end;
709
710 __M4RI_DD_ROW(M, dstrow);
711 }
712
713 /**
714 * \brief Add the rows sourcerow and destrow and stores the total in
715 * the row destrow.
716 *
717 * \param M Matrix
718 * \param sourcerow Index of source row
719 * \param destrow Index of target row
720 *
721 * \note this can be done much faster with mzd_combine.
722 */
723
724 void mzd_row_add(mzd_t *M, rci_t const sourcerow, rci_t const destrow);
725
726 /**
727 * \brief Transpose a matrix.
728 *
729 * This function uses the fact that:
730 \verbatim
731 [ A B ]T [AT CT]
732 [ C D ] = [BT DT]
733 \endverbatim
734 * and thus rearranges the blocks recursively.
735 *
736 * \param DST Preallocated return matrix, may be NULL for automatic creation.
737 * \param A Matrix
738 */
739
740 mzd_t *mzd_transpose(mzd_t *DST, mzd_t const *A);
741
742 /**
743 * \brief Naive cubic matrix multiplication.
744 *
745 * That is, compute C such that C == AB.
746 *
747 * \param C Preallocated product matrix, may be NULL for automatic creation.
748 * \param A Input matrix A.
749 * \param B Input matrix B.
750 *
751 * \note Normally, if you will multiply several times by b, it is
752 * smarter to calculate bT yourself, and keep it, and then use the
753 * function called _mzd_mul_naive
754 *
755 */
756 mzd_t *mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B);
757
758 /**
759 * \brief Naive cubic matrix multiplication and addition
760 *
761 * That is, compute C such that C == C + AB.
762 *
763 * \param C Preallocated product matrix.
764 * \param A Input matrix A.
765 * \param B Input matrix B.
766 *
767 * \note Normally, if you will multiply several times by b, it is
768 * smarter to calculate bT yourself, and keep it, and then use the
769 * function called _mzd_mul_naive
770 */
771
772 mzd_t *mzd_addmul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B);
773
774 /**
775 * \brief Naive cubic matrix multiplication with the pre-transposed B.
776 *
777 * That is, compute C such that C == AB^t.
778 *
779 * \param C Preallocated product matrix.
780 * \param A Input matrix A.
781 * \param B Pre-transposed input matrix B.
782 * \param clear Whether to clear C before accumulating AB
783 */
784
785 mzd_t *_mzd_mul_naive(mzd_t *C, mzd_t const *A, mzd_t const *B, int const clear);
786
787 /**
788 * \brief Matrix multiplication optimized for v*A where v is a vector.
789 *
790 * \param C Preallocated product matrix.
791 * \param v Input matrix v.
792 * \param A Input matrix A.
793 * \param clear If set clear C first, otherwise add result to C.
794 *
795 */
796 mzd_t *_mzd_mul_va(mzd_t *C, mzd_t const *v, mzd_t const *A, int const clear);
797
798 /**
799 * \brief Fill matrix M with uniformly distributed bits.
800 *
801 * \param M Matrix
802 */
803
804 void mzd_randomize(mzd_t *M);
805
806 /**
807 * \brief Random callback that produces uniformly distributed random
808 * words on every call.
809 *
810 * \param data callback data
811 *
812 * \return uniformly distributed random word
813 */
814 typedef word (*m4ri_random_callback)(void* data);
815
816 /**
817 * \brief Fill matrix M with uniformly distributed bits.
818 *
819 * \param M Matrix
820 * \param rc callback
821 * \param data callback data passed to every call to rc
822 */
823 void mzd_randomize_custom(mzd_t *M, m4ri_random_callback rc, void* data);
824
825 /**
826 * \brief Set the matrix M to the value equivalent to the integer
827 * value provided.
828 *
829 * Specifically, this function does nothing if value%2 == 0 and
830 * returns the identity matrix if value%2 == 1.
831 *
832 * If the matrix is not square then the largest possible square
833 * submatrix is set to the identity matrix.
834 *
835 * \param M Matrix
836 * \param value Either 0 or 1
837 */
838
839 void mzd_set_ui(mzd_t *M, unsigned int const value);
840
841 /**
842 * \brief Gaussian elimination.
843 *
844 * This will do Gaussian elimination on the matrix m but will start
845 * not at column 0 necc but at column startcol. If full=FALSE, then it
846 * will do triangular style elimination, and if full=TRUE, it will do
847 * Gauss-Jordan style, or full elimination.
848 *
849 * \param M Matrix
850 * \param startcol First column to consider for reduction.
851 * \param full Gauss-Jordan style or upper triangular form only.
852 */
853
854 rci_t mzd_gauss_delayed(mzd_t *M, rci_t const startcol, int const full);
855
856 /**
857 * \brief Gaussian elimination.
858 *
859 * This will do Gaussian elimination on the matrix m. If full=FALSE,
860 * then it will do triangular style elimination, and if full=TRUE,
861 * it will do Gauss-Jordan style, or full elimination.
862 *
863 * \param M Matrix
864 * \param full Gauss-Jordan style or upper triangular form only.
865 *
866 * \sa mzd_echelonize_m4ri(), mzd_echelonize_pluq()
867 */
868
869 rci_t mzd_echelonize_naive(mzd_t *M, int const full);
870
871 /**
872 * \brief Return TRUE if A == B.
873 *
874 * \param A Matrix
875 * \param B Matrix
876 */
877
878 int mzd_equal(mzd_t const *A, mzd_t const *B);
879
880 /**
881 * \brief Return -1,0,1 if if A < B, A == B or A > B respectively.
882 *
883 * \param A Matrix.
884 * \param B Matrix.
885 *
886 * \note This comparison is not well defined mathematically and
887 * relatively arbitrary since elements of GF(2) don't have an
888 * ordering.
889 */
890
891 int mzd_cmp(mzd_t const *A, mzd_t const *B);
892
893 /**
894 * \brief Copy matrix A to DST.
895 *
896 * \param DST May be NULL for automatic creation.
897 * \param A Source matrix.
898 */
899
900 mzd_t *mzd_copy(mzd_t *DST, mzd_t const *A);
901
902 /**
903 * \brief Concatenate B to A and write the result to C.
904 *
905 * That is,
906 *
907 \verbatim
908 [ A ], [ B ] -> [ A B ] = C
909 \endverbatim
910 *
911 * The inputs are not modified but a new matrix is created.
912 *
913 * \param C Matrix, may be NULL for automatic creation
914 * \param A Matrix
915 * \param B Matrix
916 *
917 * \note This is sometimes called augment.
918 */
919
920 mzd_t *mzd_concat(mzd_t *C, mzd_t const *A, mzd_t const *B);
921
922 /**
923 * \brief Stack A on top of B and write the result to C.
924 *
925 * That is,
926 *
927 \verbatim
928 [ A ], [ B ] -> [ A ] = C
929 [ B ]
930 \endverbatim
931 *
932 * The inputs are not modified but a new matrix is created.
933 *
934 * \param C Matrix, may be NULL for automatic creation
935 * \param A Matrix
936 * \param B Matrix
937 */
938
939 mzd_t *mzd_stack(mzd_t *C, mzd_t const *A, mzd_t const *B);
940
941 /**
942 * \brief Copy a submatrix.
943 *
944 * Note that the upper bounds are not included.
945 *
946 * \param S Preallocated space for submatrix, may be NULL for automatic creation.
947 * \param M Matrix
948 * \param lowr start rows
949 * \param lowc start column
950 * \param highr stop row (this row is \em not included)
951 * \param highc stop column (this column is \em not included)
952 */
953 mzd_t *mzd_submatrix(mzd_t *S, mzd_t const *M, rci_t const lowr, rci_t const lowc, rci_t const highr,
954 rci_t const highc);
955
956 /**
957 * \brief Invert the matrix target using Gaussian elimination.
958 *
959 * To avoid recomputing the identity matrix over and over again, I may
960 * be passed in as identity parameter.
961 *
962 * \param INV Preallocated space for inversion matrix, may be NULL for automatic creation.
963 * \param A Matrix to be reduced.
964 * \param I Identity matrix.
965 */
966
967 mzd_t *mzd_invert_naive(mzd_t *INV, mzd_t const *A, mzd_t const *I);
968
969 /**
970 * \brief Set C = A+B.
971 *
972 * C is also returned. If C is NULL then a new matrix is created which
973 * must be freed by mzd_free.
974 *
975 * \param C Preallocated sum matrix, may be NULL for automatic creation.
976 * \param A Matrix
977 * \param B Matrix
978 */
979
980 mzd_t *mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B);
981
982 /**
983 * \brief Same as mzd_add but without any checks on the input.
984 *
985 * \param C Preallocated sum matrix, may be NULL for automatic creation.
986 * \param A Matrix
987 * \param B Matrix
988 */
989
990 mzd_t *_mzd_add(mzd_t *C, mzd_t const *A, mzd_t const *B);
991
992 /**
993 * \brief Same as mzd_add.
994 *
995 * \param C Preallocated difference matrix, may be NULL for automatic creation.
996 * \param A Matrix
997 * \param B Matrix
998 */
999
1000 #define mzd_sub mzd_add
1001
1002 /**
1003 * \brief Same as mzd_sub but without any checks on the input.
1004 *
1005 * \param C Preallocated difference matrix, may be NULL for automatic creation.
1006 * \param A Matrix
1007 * \param B Matrix
1008 */
1009
1010 #define _mzd_sub _mzd_add
1011
1012 /**
1013 * Get n bits starting a position (x,y) from the matrix M.
1014 *
1015 * \param M Source matrix.
1016 * \param x Starting row.
1017 * \param y Starting column.
1018 * \param n Number of bits (<= m4ri_radix);
1019 */
1020
mzd_read_bits(mzd_t const * M,rci_t const x,rci_t const y,int const n)1021 static inline word mzd_read_bits(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
1022 int const spot = y % m4ri_radix;
1023 wi_t const block = y / m4ri_radix;
1024 int const spill = spot + n - m4ri_radix;
1025 word temp = (spill <= 0) ? M->rows[x][block] << -spill
1026 : (M->rows[x][block + 1] << (m4ri_radix - spill)) | (M->rows[x][block] >> spill);
1027 return temp >> (m4ri_radix - n);
1028 }
1029
1030 /**
1031 * \brief a_row[a_startblock:] += b_row[b_startblock:] for offset 0
1032 *
1033 * Adds a_row of A, starting with a_startblock to the end, to
1034 * b_row of B, starting with b_startblock to the end. This gets stored
1035 * in A, in a_row, starting with a_startblock.
1036 *
1037 * \param A destination matrix
1038 * \param a_row destination row for matrix C
1039 * \param a_startblock starting block to work on in matrix C
1040 * \param B source matrix
1041 * \param b_row source row for matrix B
1042 * \param b_startblock starting block to work on in matrix B
1043 *
1044 */
1045
mzd_combine_even_in_place(mzd_t * A,rci_t const a_row,wi_t const a_startblock,mzd_t const * B,rci_t const b_row,wi_t const b_startblock)1046 static inline void mzd_combine_even_in_place(mzd_t *A, rci_t const a_row, wi_t const a_startblock,
1047 mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
1048
1049 wi_t wide = A->width - a_startblock - 1;
1050
1051 word *a = A->rows[a_row] + a_startblock;
1052 word *b = B->rows[b_row] + b_startblock;
1053
1054 #if __M4RI_HAVE_SSE2
1055 if (wide > 2) {
1056 /** check alignments **/
1057 if (__M4RI_ALIGNMENT(a, 16)) {
1058 *a++ ^= *b++;
1059 wide--;
1060 }
1061
1062 if (__M4RI_ALIGNMENT(a, 16) == 0 && __M4RI_ALIGNMENT(b, 16) == 0) {
1063 __m128i *a128 = (__m128i *)a;
1064 __m128i *b128 = (__m128i *)b;
1065 const __m128i *eof = (__m128i *)((unsigned long)(a + wide) & ~0xFUL);
1066
1067 do {
1068 *a128 = _mm_xor_si128(*a128, *b128);
1069 ++b128;
1070 ++a128;
1071 } while (a128 < eof);
1072
1073 a = (word *)a128;
1074 b = (word *)b128;
1075 wide = ((sizeof(word) * wide) % 16) / sizeof(word);
1076 }
1077 }
1078 #endif // __M4RI_HAVE_SSE2
1079
1080 if (wide > 0) {
1081 wi_t n = (wide + 7) / 8;
1082 switch (wide % 8) {
1083 case 0: do { *(a++) ^= *(b++);
1084 case 7: *(a++) ^= *(b++);
1085 case 6: *(a++) ^= *(b++);
1086 case 5: *(a++) ^= *(b++);
1087 case 4: *(a++) ^= *(b++);
1088 case 3: *(a++) ^= *(b++);
1089 case 2: *(a++) ^= *(b++);
1090 case 1: *(a++) ^= *(b++);
1091 } while (--n > 0);
1092 }
1093 }
1094
1095 *a ^= *b & A->high_bitmask;
1096
1097 __M4RI_DD_MZD(A);
1098 }
1099
1100 /**
1101 * \brief c_row[c_startblock:] = a_row[a_startblock:] + b_row[b_startblock:] for offset 0
1102 *
1103 * Adds a_row of A, starting with a_startblock to the end, to
1104 * b_row of B, starting with b_startblock to the end. This gets stored
1105 * in C, in c_row, starting with c_startblock.
1106 *
1107 * \param C destination matrix
1108 * \param c_row destination row for matrix C
1109 * \param c_startblock starting block to work on in matrix C
1110 * \param A source matrix
1111 * \param a_row source row for matrix A
1112 * \param a_startblock starting block to work on in matrix A
1113 * \param B source matrix
1114 * \param b_row source row for matrix B
1115 * \param b_startblock starting block to work on in matrix B
1116 *
1117 */
1118
mzd_combine_even(mzd_t * C,rci_t const c_row,wi_t const c_startblock,mzd_t const * A,rci_t const a_row,wi_t const a_startblock,mzd_t const * B,rci_t const b_row,wi_t const b_startblock)1119 static inline void mzd_combine_even(mzd_t *C, rci_t const c_row, wi_t const c_startblock,
1120 mzd_t const *A, rci_t const a_row, wi_t const a_startblock,
1121 mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
1122
1123 wi_t wide = A->width - a_startblock - 1;
1124 word *a = A->rows[a_row] + a_startblock;
1125 word *b = B->rows[b_row] + b_startblock;
1126 word *c = C->rows[c_row] + c_startblock;
1127
1128 #if __M4RI_HAVE_SSE2
1129 if (wide > 2) {
1130 /** check alignments **/
1131 if (__M4RI_ALIGNMENT(a, 16)) {
1132 *c++ = *b++ ^ *a++;
1133 wide--;
1134 }
1135
1136 if ((__M4RI_ALIGNMENT(b, 16) | __M4RI_ALIGNMENT(c, 16)) == 0) {
1137 __m128i *a128 = (__m128i *)a;
1138 __m128i *b128 = (__m128i *)b;
1139 __m128i *c128 = (__m128i *)c;
1140 const __m128i *eof = (__m128i *)((unsigned long)(a + wide) & ~0xFUL);
1141
1142 do {
1143 *c128 = _mm_xor_si128(*a128, *b128);
1144 ++c128;
1145 ++b128;
1146 ++a128;
1147 } while (a128 < eof);
1148
1149 a = (word *)a128;
1150 b = (word *)b128;
1151 c = (word *)c128;
1152 wide = ((sizeof(word) * wide) % 16) / sizeof(word);
1153 }
1154 }
1155 #endif // __M4RI_HAVE_SSE2
1156
1157 if (wide > 0) {
1158 wi_t n = (wide + 7) / 8;
1159 switch (wide % 8) {
1160 case 0: do { *(c++) = *(a++) ^ *(b++);
1161 case 7: *(c++) = *(a++) ^ *(b++);
1162 case 6: *(c++) = *(a++) ^ *(b++);
1163 case 5: *(c++) = *(a++) ^ *(b++);
1164 case 4: *(c++) = *(a++) ^ *(b++);
1165 case 3: *(c++) = *(a++) ^ *(b++);
1166 case 2: *(c++) = *(a++) ^ *(b++);
1167 case 1: *(c++) = *(a++) ^ *(b++);
1168 } while (--n > 0);
1169 }
1170 }
1171 *c ^= ((*a ^ *b ^ *c) & C->high_bitmask);
1172
1173 __M4RI_DD_MZD(C);
1174 }
1175
1176 /**
1177 * \brief row3[col3:] = row1[col1:] + row2[col2:]
1178 *
1179 * Adds row1 of SC1, starting with startblock1 to the end, to
1180 * row2 of SC2, starting with startblock2 to the end. This gets stored
1181 * in DST, in row3, starting with startblock3.
1182 *
1183 * \param C destination matrix
1184 * \param c_row destination row for matrix dst
1185 * \param c_startblock starting block to work on in matrix dst
1186 * \param A source matrix
1187 * \param a_row source row for matrix sc1
1188 * \param a_startblock starting block to work on in matrix sc1
1189 * \param B source matrix
1190 * \param b_row source row for matrix sc2
1191 * \param b_startblock starting block to work on in matrix sc2
1192 *
1193 */
mzd_combine(mzd_t * C,rci_t const c_row,wi_t const c_startblock,mzd_t const * A,rci_t const a_row,wi_t const a_startblock,mzd_t const * B,rci_t const b_row,wi_t const b_startblock)1194 static inline void mzd_combine(mzd_t *C, rci_t const c_row, wi_t const c_startblock,
1195 mzd_t const *A, rci_t const a_row, wi_t const a_startblock,
1196 mzd_t const *B, rci_t const b_row, wi_t const b_startblock) {
1197
1198 if ((C == A) & (a_row == c_row) & (a_startblock == c_startblock)) {
1199 mzd_combine_even_in_place(C, c_row, c_startblock, B, b_row, b_startblock);
1200 } else {
1201 mzd_combine_even(C, c_row, c_startblock, A, a_row, a_startblock, B, b_row, b_startblock);
1202 }
1203 return;
1204 }
1205
1206 /**
1207 * \brief Get n bits starting a position (x,y) from the matrix M.
1208 *
1209 * This function is in principle the same as mzd_read_bits,
1210 * but it explicitely returns an 'int' and is used as
1211 * index into an array (Gray code).
1212 */
1213
mzd_read_bits_int(mzd_t const * M,rci_t const x,rci_t const y,int const n)1214 static inline int mzd_read_bits_int(mzd_t const *M, rci_t const x, rci_t const y, int const n) {
1215 return __M4RI_CONVERT_TO_INT(mzd_read_bits(M, x, y, n));
1216 }
1217
1218 /**
1219 * \brief Zero test for matrix.
1220 *
1221 * \param A Input matrix.
1222 *
1223 */
1224 int mzd_is_zero(mzd_t const *A);
1225
1226 /**
1227 * \brief Clear the given row, but only begins at the column coloffset.
1228 *
1229 * \param M Matrix
1230 * \param row Index of row
1231 * \param coloffset Column offset
1232 */
1233
1234 void mzd_row_clear_offset(mzd_t *M, rci_t const row, rci_t const coloffset);
1235
1236 /**
1237 * \brief Find the next nonzero entry in M starting at start_row and start_col.
1238 *
1239 * This function walks down rows in the inner loop and columns in the
1240 * outer loop. If a nonzero entry is found this function returns 1 and
1241 * zero otherwise.
1242 *
1243 * If and only if a nonzero entry is found r and c are updated.
1244 *
1245 * \param M Matrix
1246 * \param start_row Index of row where to start search
1247 * \param start_col Index of column where to start search
1248 * \param r Row index updated if pivot is found
1249 * \param c Column index updated if pivot is found
1250 */
1251
1252 int mzd_find_pivot(mzd_t const *M, rci_t start_row, rci_t start_col, rci_t *r, rci_t *c);
1253
1254 /**
1255 * \brief Return the number of nonzero entries divided by nrows *
1256 * ncols
1257 *
1258 * If res = 0 then 100 samples per row are made, if res > 0 the
1259 * function takes res sized steps within each row (res = 1 uses every
1260 * word).
1261 *
1262 * \param A Matrix
1263 * \param res Resolution of sampling (in words)
1264 */
1265
1266 double mzd_density(mzd_t const *A, wi_t res);
1267
1268 /**
1269 * \brief Return the number of nonzero entries divided by nrows *
1270 * ncols considering only the submatrix starting at (r,c).
1271 *
1272 * If res = 0 then 100 samples per row are made, if res > 0 the
1273 * function takes res sized steps within each row (res = 1 uses every
1274 * word).
1275 *
1276 * \param A Matrix
1277 * \param res Resolution of sampling (in words)
1278 * \param r Row to start counting
1279 * \param c Column to start counting
1280 */
1281
1282 double _mzd_density(mzd_t const *A, wi_t res, rci_t r, rci_t c);
1283
1284 /**
1285 * \brief Return the first row with all zero entries.
1286 *
1287 * If no such row can be found returns nrows.
1288 *
1289 * \param A Matrix
1290 */
1291
1292 rci_t mzd_first_zero_row(mzd_t const *A);
1293
1294 /**
1295 * \brief Return hash value for matrix.
1296 *
1297 * \param A Matrix
1298 */
1299
mzd_hash(mzd_t const * A)1300 static inline word mzd_hash(mzd_t const *A) {
1301 word hash = 0;
1302 for (rci_t r = 0; r < A->nrows; ++r) {
1303 hash ^= rotate_word(calculate_hash(A->rows[r], A->width), r % m4ri_radix);
1304 }
1305 return hash;
1306 }
1307
1308 /**
1309 * Return upper triangular submatrix of A
1310 *
1311 * \param U Output matrix, if NULL a new matrix will be returned
1312 * \param A Source matrix
1313 *
1314 * \return U
1315 */
1316
1317 mzd_t *mzd_extract_u(mzd_t *U, mzd_t const *A);
1318
1319 /**
1320 * Return lower triangular submatrix of A
1321 *
1322 * \param L Output matrix, if NULL a new matrix will be returned
1323 * \param A Source matrix
1324 *
1325 * \return L
1326 */
1327
1328 mzd_t *mzd_extract_l(mzd_t *L, mzd_t const *A);
1329
1330 #endif // M4RI_MZD
1331