1 /**
2  * \file mzed.h
3  *
4  * \brief Dense matrices over \GF2E represented as packed matrices.
5  *
6  * This file implements the data type mzed_t. That is, matrices over
7  * \GF2E in row major representation.
8 
9  * For example, let \f$ a = \sum a_i x_i / <f>\f$ and \f$b = \sum b_i x_i / <f>\f$
10  * be elements in \f$\mathbb{F}_{2^6}\f$ with minimal polynomial \f$f\f$. Then, the
11  * \f$ 1 \times 2\f$ matrix [b a] would be stored as
12 \verbatim
13  [...| 0 0 b5 b4 b3 b2 b1 b0 | 0 0 a5 a4 a3 a2 a1 a0]
14 \endverbatim
15  *
16  * Internally M4RI matrices are used to store bits with allows to
17  * re-use existing M4RI methods (such as mzd_add) when implementing
18  * functions for mzed_t.
19  *
20  * This data type is preferable when Newton-John tables ought be used
21  * or when the matrix is small (\f$ m \times n \times e < L2\f$).
22  *
23  * \author Martin Albrecht <martinralbrecht@googlemail.com>
24  */
25 
26 
27 #ifndef M4RIE_MZED_H
28 #define M4RIE_MZED_H
29 
30 /******************************************************************************
31 *
32 *            M4RIE: Linear Algebra over GF(2^e)
33 *
34 *    Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com>
35 *
36 *  Distributed under the terms of the GNU General Public License (GEL)
37 *  version 2 or higher.
38 *
39 *    This code is distributed in the hope that it will be useful,
40 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
41 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
42 *    General Public License for more details.
43 *
44 *  The full text of the GPL is available at:
45 *
46 *                  http://www.gnu.org/licenses/
47 ******************************************************************************/
48 
49 #include <m4ri/m4ri.h>
50 #include <m4rie/gf2e.h>
51 #include <m4rie/m4ri_functions.h>
52 
53 /**
54  * \brief Dense matrices over \GF2E represented as packed matrices.
55  *
56  * \ingroup Definitions
57  */
58 
59 typedef struct {
60   mzd_t *x; /**< \f$m \times n\f$ matrices over \GF2E are represented as \f$m \times (en)\f$ matrices over \GF2. */
61   const gf2e *finite_field; /**< A finite field \GF2E. */
62   rci_t nrows; /**< Number of rows. */
63   rci_t ncols; /**< Number of columns. */
64   wi_t w;   /**< The internal width of elements (must divide 64). */
65 } mzed_t;
66 
67 
68 /**
69  * \brief Create a new matrix of dimension m x n over ff
70  *
71  * Use mzed_free() to kill it.
72  *
73  * \param ff Finite field
74  * \param m Number of rows
75  * \param n Number of columns
76  *
77  * \ingroup Constructions
78  */
79 
80 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n);
81 
82 /**
83  * \brief Free a matrix created with mzed_init().
84  *
85  * \param A Matrix
86  *
87  * \ingroup Constructions
88  */
89 
90 void mzed_free(mzed_t *A);
91 
92 
93 /**
94  * \brief Concatenate B to A and write the result to C.
95  *
96  * That is,
97  \verbatim
98  [ A ], [ B ] -> [ A  B ] = C
99  \endverbatim
100  * The inputs are not modified but a new matrix is created.
101  *
102  * \param C Matrix, may be NULL for automatic creation
103  * \param A Matrix
104  * \param B Matrix
105  *
106  * \note This is sometimes called augment.
107  *
108  * \ingroup Constructions
109  */
110 
mzed_concat(mzed_t * C,const mzed_t * A,const mzed_t * B)111 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) {
112   if(C==NULL)
113     C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols);
114   mzd_concat(C->x, A->x, B->x);
115   return C;
116 }
117 
118 /**
119  * \brief Stack A on top of B and write the result to C.
120  *
121  * That is,
122  \verbatim
123  [ A ], [ B ] -> [ A ] = C
124                  [ B ]
125  \endverbatim
126  * The inputs are not modified but a new matrix is created.
127  *
128  * \param C Matrix, may be NULL for automatic creation
129  * \param A Matrix
130  * \param B Matrix
131  *
132  * \ingroup Constructions
133  */
134 
mzed_stack(mzed_t * C,const mzed_t * A,const mzed_t * B)135 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) {
136   if(C==NULL)
137     C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols);
138   mzd_stack(C->x, A->x, B->x);
139   return C;
140 }
141 
142 
143 /**
144  * \brief Copy a submatrix.
145  *
146  * Note that the upper bounds are not included.
147  *
148  * \param S Preallocated space for submatrix, may be NULL for automatic creation.
149  * \param M Matrix
150  * \param lowr start rows
151  * \param lowc start column
152  * \param highr stop row (this row is \em not included)
153  * \param highc stop column (this column is \em not included)
154  *
155  * \ingroup Constructions
156  */
mzed_submatrix(mzed_t * S,const mzed_t * M,const rci_t lowr,const rci_t lowc,const rci_t highr,const rci_t highc)157 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
158   if(S==NULL)
159     S = mzed_init(M->finite_field, highr - lowr, highc - lowc);
160 
161   mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w);
162   return S;
163 }
164 
165 /**
166  * \brief Create a window/view into the matrix A.
167  *
168  * A matrix window for A is a meta structure on the matrix A. It is
169  * setup to point into the matrix so M \em must \em not be freed while
170  * the matrix window is used.
171  *
172  * This function puts the restriction on the provided parameters that
173  * all parameters must be within range for A which is not currently
174  * enforced.
175  *
176  * Use mzed_free_window() to free the window.
177  *
178  * \param A Matrix
179  * \param lowr Starting row (inclusive)
180  * \param lowc Starting column (inclusive)
181  * \param highr End row (exclusive)
182  * \param highc End column (exclusive)
183  *
184  * \ingroup Constructions
185  */
186 
mzed_init_window(const mzed_t * A,const rci_t lowr,const rci_t lowc,const rci_t highr,const rci_t highc)187 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
188   mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t));
189   B->finite_field = A->finite_field;
190   B->w = gf2e_degree_to_w(A->finite_field);
191   B->nrows = highr - lowr;
192   B->ncols = highc - lowc;
193   B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc);
194   return B;
195 }
196 
197 /**
198  * \brief Free a matrix window created with mzed_init_window().
199  *
200  * \param A Matrix
201  *
202  * \ingroup Constructions
203  */
204 
mzed_free_window(mzed_t * A)205 static inline void mzed_free_window(mzed_t *A) {
206   mzd_free_window(A->x);
207   m4ri_mm_free(A);
208 }
209 
210 /**
211  * \brief \f$ C = A+B \f$.
212  *
213  * C is also returned. If C is NULL then a new matrix is created which
214  * must be freed by mzed_free().
215  *
216  * \param C Preallocated sum matrix, may be NULL for automatic creation.
217  * \param A Matrix
218  * \param B Matrix
219  *
220  * \ingroup Addition
221  */
222 
223 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
224 
225 /**
226  * \brief \f$ C = A+B \f$.
227  *
228  * \param C Preallocated sum matrix, may be NULL for automatic creation.
229  * \param A Matrix
230  * \param B Matrix
231  *
232  * \ingroup Addition
233  */
234 
235 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
236 
237 /**
238  * \brief \f$ C = A+B \f$.
239  *
240  * \param C Preallocated difference matrix, may be NULL for automatic creation.
241  * \param A Matrix
242  * \param B Matrix
243  *
244  * \ingroup Addition
245  */
246 
247 #define mzed_sub mzed_add
248 
249 /**
250  * \brief \f$ C = A+B \f$.
251  *
252  * \param C Preallocated difference matrix, may be NULL for automatic creation.
253  * \param A Matrix
254  * \param B Matrix
255  *
256  * \ingroup Addition
257  */
258 
259 #define _mzed_sub _mzed_add
260 
261 /**
262  * \brief \f$ C = A \cdot B \f$.
263  *
264  * \param C Preallocated return matrix, may be NULL for automatic creation.
265  * \param A Input matrix A.
266  * \param B Input matrix B.
267  *
268  * \ingroup Multiplication
269  */
270 
271 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
272 
273 /**
274  * \brief \f$ C = C + A \cdot B \f$.
275  *
276  * \param C Preallocated product matrix, may be NULL for automatic creation.
277  * \param A Input matrix A.
278  * \param B Input matrix B.
279  *
280  * \ingroup Multiplication
281  */
282 
283 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
284 
285 /**
286  * \brief \f$ C = A \cdot B \f$.
287  *
288  * \param C Preallocated product matrix.
289  * \param A Input matrix A.
290  * \param B Input matrix B.
291  *
292  * \ingroup Multiplication
293  */
294 
295 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
296 
297 /**
298  * \brief \f$ C = C + A \cdot B \f$.
299  *
300  * \param C Preallocated product matrix.
301  * \param A Input matrix A.
302  * \param B Input matrix B.
303  *
304  * \ingroup Multiplication
305  */
306 
307 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
308 
309 
310 /**
311  * \brief \f$ C = C + A \cdot B \f$ using naive cubic multiplication.
312  *
313  * \param C Preallocated product matrix.
314  * \param A Input matrix A.
315  * \param B Input matrix B.
316  *
317  * \note There is no reason to call this function except for checking
318  * the correctness of other algorithms. It is very slow.
319  *
320  * \ingroup Multiplication
321  */
322 
323 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
324 
325 /**
326  * \brief \f$ C = A \cdot B \f$ using naive cubic multiplication.
327  *
328  * \param C Preallocated product matrix, may be NULL for automatic creation.
329  * \param A Input matrix A.
330  * \param B Input matrix B.
331  *
332  * \note There is no reason to call this function except for checking
333  * the correctness of other algorithms. It is very slow.
334  *
335  * \ingroup Multiplication
336  */
337 
338 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
339 
340 /**
341  * \brief \f$ C = C + A \cdot B \f$ using naive cubic multiplication.
342  *
343  * \param C Preallocated product matrix.
344  * \param A Input matrix A.
345  * \param B Input matrix B.
346  *
347  * \ingroup Multiplication
348  */
349 
350 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
351 
352 /**
353  * \brief \f$ C = a \cdot B \f$.
354  *
355  * \param C Preallocated product matrix or NULL.
356  * \param a finite field element.
357  * \param B Input matrix B.
358  *
359  * \ingroup Multiplication
360  */
361 
362 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B);
363 
364 /**
365  * Check whether C, A and B match in sizes and fields for
366  * multiplication
367  *
368  * \param C Output matrix, if NULL a new matrix is created.
369  * \param A Input matrix.
370  * \param B Input matrix.
371  * \param clear Write zeros to C or not.
372  */
373 
374 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear);
375 
376 /**
377  * \brief Fill matrix A with random elements.
378  *
379  * \param A Matrix
380  *
381  * \todo Allow the user to provide a RNG callback.
382  *
383  * \ingroup Assignment
384  */
385 
386 void mzed_randomize(mzed_t *A);
387 
388 /**
389  * \brief Copy matrix A to B.
390  *
391  * \param B May be NULL for automatic creation.
392  * \param A Source matrix.
393  *
394  * \ingroup Assignment
395  */
396 
397 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A);
398 
399 /**
400  * \brief Return diagonal matrix with value on the diagonal.
401  *
402  * If the matrix is not square then the largest possible square
403  * submatrix is used.
404  *
405  * \param A Matrix
406  * \param value Finite Field element
407  *
408  * \ingroup Assignment
409  */
410 
411 void mzed_set_ui(mzed_t *A, word value);
412 
413 
414 /**
415  * \brief Get the element at position (row,col) from the matrix A.
416  *
417  * \param A Source matrix.
418  * \param row Starting row.
419  * \param col Starting column.
420  *
421  * \ingroup Assignment
422  */
423 
mzed_read_elem(const mzed_t * A,const rci_t row,const rci_t col)424 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) {
425   return __mzd_read_bits(A->x, row, A->w*col, A->w);
426 }
427 
428 /**
429  * \brief At the element elem to the element at position (row,col) in the matrix A.
430  *
431  * \param A Target matrix.
432  * \param row Starting row.
433  * \param col Starting column.
434  * \param elem finite field element.
435  *
436  * \ingroup Assignment
437  */
438 
mzed_add_elem(mzed_t * A,const rci_t row,const rci_t col,const word elem)439 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
440   __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
441 }
442 
443 /**
444  * \brief Write the element elem to the position (row,col) in the matrix A.
445  *
446  * \param A Target matrix.
447  * \param row Starting row.
448  * \param col Starting column.
449  * \param elem finite field element.
450  *
451  * \ingroup Assignment
452  */
453 
mzed_write_elem(mzed_t * A,const rci_t row,const rci_t col,const word elem)454 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
455   __mzd_clear_bits(A->x, row, A->w*col, A->w);
456   __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
457 }
458 
459 /**
460  * \brief Return -1,0,1 if if A < B, A == B or A > B respectively.
461  *
462  * \param A Matrix.
463  * \param B Matrix.
464  *
465  * \note This comparison is not well defined mathematically and
466  * relatively arbitrary since elements of \GF2E don't have an
467  * ordering.
468  *
469  * \ingroup Comparison
470  */
471 
mzed_cmp(mzed_t * A,mzed_t * B)472 static inline int mzed_cmp(mzed_t *A, mzed_t *B) {
473   return mzd_cmp(A->x,B->x);
474 }
475 
476 
477 /**
478  * \brief Zero test for matrix.
479  *
480  * \param A Input matrix.
481  *
482  * \ingroup Comparison
483  */
mzed_is_zero(const mzed_t * A)484 static inline int mzed_is_zero(const mzed_t *A) {
485   return mzd_is_zero(A->x);
486 }
487 
488 /**
489  * A[ar,c] = A[ar,c] + x*B[br,c] for all c >= startcol.
490  *
491  * \param A Matrix.
492  * \param ar Row index in A.
493  * \param B Matrix.
494  * \param br Row index in B.
495  * \param x Finite field element.
496  * \param start_col Column index.
497  *
498  * \ingroup RowOperations
499  */
500 
501 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col);
502 
503 /**
504  * A[ar,c] = A[ar,c] + B[br,c] for all c >= startcol.
505  *
506  * \param A Matrix.
507  * \param ar Row index in A.
508  * \param B Matrix.
509  * \param br Row index in B.
510  * \param start_col Column index.
511  *
512  * \ingroup RowOperations
513  */
514 
mzed_add_row(mzed_t * A,rci_t ar,const mzed_t * B,rci_t br,rci_t start_col)515 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) {
516   assert(A->ncols == B->ncols && A->finite_field == B->finite_field);
517   assert(start_col < A->ncols);
518 
519   const rci_t start = A->w*start_col;
520   const wi_t startblock = start/m4ri_radix;
521   const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
522   const word bitmask_end = A->x->high_bitmask;
523 
524   word *_a = A->x->rows[ar];
525   const word *_b = B->x->rows[br];
526   wi_t j;
527 
528   if (A->x->width - startblock > 1) {
529     _a[startblock] ^= _b[startblock] & bitmask_begin;
530     for(j=startblock+1; j<A->x->width-1; j++)
531       _a[j] ^= _b[j];
532     _a[j] ^= _b[j] & bitmask_end;
533   } else {
534     _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
535   }
536 }
537 
538 /**
539  * \brief Rescale the row r in A by X starting c.
540  *
541  * \param A Matrix
542  * \param r Row index.
543  * \param start_col Column index.
544  * \param x Multiplier
545  *
546  * \ingroup RowOperations
547  */
548 
mzed_rescale_row(mzed_t * A,rci_t r,rci_t start_col,const word x)549 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x) {
550   assert(start_col < A->ncols);
551 
552   const gf2e *ff = A->finite_field;
553   const rci_t start = A->w*start_col;
554   const wi_t startblock = start/m4ri_radix;
555   word *_a = A->x->rows[r];
556   const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
557   const word bitmask_end   = A->x->high_bitmask;
558   register word __a = _a[startblock]>>(start%m4ri_radix);
559   register word __t = 0;
560   int j;
561 
562   if(A->w == 2) {
563     switch( (start/2) % 32 ) {
564     case  0:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0;  __a >>= 2;
565     case  1:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2;  __a >>= 2;
566     case  2:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4;  __a >>= 2;
567     case  3:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6;  __a >>= 2;
568     case  4:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8;  __a >>= 2;
569     case  5:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10;  __a >>= 2;
570     case  6:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12;  __a >>= 2;
571     case  7:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14;  __a >>= 2;
572     case  8:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16;  __a >>= 2;
573     case  9:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18;  __a >>= 2;
574     case 10:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20;  __a >>= 2;
575     case 11:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22;  __a >>= 2;
576     case 12:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24;  __a >>= 2;
577     case 13:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26;  __a >>= 2;
578     case 14:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28;  __a >>= 2;
579     case 15:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30;  __a >>= 2;
580     case 16:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32;  __a >>= 2;
581     case 17:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34;  __a >>= 2;
582     case 18:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36;  __a >>= 2;
583     case 19:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38;  __a >>= 2;
584     case 20:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40;  __a >>= 2;
585     case 21:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42;  __a >>= 2;
586     case 22:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44;  __a >>= 2;
587     case 23:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46;  __a >>= 2;
588     case 24:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48;  __a >>= 2;
589     case 25:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50;  __a >>= 2;
590     case 26:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52;  __a >>= 2;
591     case 27:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54;  __a >>= 2;
592     case 28:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56;  __a >>= 2;
593     case 29:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58;  __a >>= 2;
594     case 30:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60;  __a >>= 2;
595     case 31:  __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;  break;
596     default: m4ri_die("impossible");
597     }
598     if(A->x->width-startblock == 1) {
599       _a[startblock] &= ~(bitmask_begin & bitmask_end);
600       _a[startblock] ^= __t & bitmask_begin & bitmask_end;
601       return;
602     } else {
603       _a[startblock] &= ~bitmask_begin;
604       _a[startblock] ^= __t & bitmask_begin;
605     }
606 
607     for(j=startblock+1; j<A->x->width -1; j++) {
608       __a = _a[j], __t = 0;
609       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0;  __a >>= 2;
610       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2;  __a >>= 2;
611       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4;  __a >>= 2;
612       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6;  __a >>= 2;
613       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8;  __a >>= 2;
614       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10;  __a >>= 2;
615       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12;  __a >>= 2;
616       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14;  __a >>= 2;
617       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16;  __a >>= 2;
618       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18;  __a >>= 2;
619       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20;  __a >>= 2;
620       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22;  __a >>= 2;
621       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24;  __a >>= 2;
622       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26;  __a >>= 2;
623       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28;  __a >>= 2;
624       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30;  __a >>= 2;
625       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32;  __a >>= 2;
626       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34;  __a >>= 2;
627       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36;  __a >>= 2;
628       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38;  __a >>= 2;
629       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40;  __a >>= 2;
630       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42;  __a >>= 2;
631       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44;  __a >>= 2;
632       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46;  __a >>= 2;
633       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48;  __a >>= 2;
634       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50;  __a >>= 2;
635       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52;  __a >>= 2;
636       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54;  __a >>= 2;
637       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56;  __a >>= 2;
638       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58;  __a >>= 2;
639       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60;  __a >>= 2;
640       __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;
641       _a[j] = __t;
642     }
643 
644     __t = _a[j] & ~bitmask_end;
645     switch(A->x->ncols % m4ri_radix) {
646     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62;
647     case 62: __t ^= ff->mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60;
648     case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58;
649     case 58: __t ^= ff->mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56;
650     case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54;
651     case 54: __t ^= ff->mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52;
652     case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50;
653     case 50: __t ^= ff->mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48;
654     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46;
655     case 46: __t ^= ff->mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44;
656     case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42;
657     case 42: __t ^= ff->mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40;
658     case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38;
659     case 38: __t ^= ff->mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36;
660     case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34;
661     case 34: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32;
662     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30;
663     case 30: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28;
664     case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26;
665     case 26: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24;
666     case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22;
667     case 22: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20;
668     case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18;
669     case 18: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16;
670     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14;
671     case 14: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12;
672     case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10;
673     case 10: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8;
674     case  8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6;
675     case  6: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4;
676     case  4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2;
677     case  2: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0;
678     };
679     _a[j] = __t;
680 
681   } else if(A->w == 4) {
682     switch( (start/4)%16 ) {
683     case  0: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0;  __a >>= 4;
684     case  1: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4;  __a >>= 4;
685     case  2: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8;  __a >>= 4;
686     case  3: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12;  __a >>= 4;
687     case  4: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16;  __a >>= 4;
688     case  5: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20;  __a >>= 4;
689     case  6: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24;  __a >>= 4;
690     case  7: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28;  __a >>= 4;
691     case  8: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32;  __a >>= 4;
692     case  9: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36;  __a >>= 4;
693     case 10: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40;  __a >>= 4;
694     case 11: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44;  __a >>= 4;
695     case 12: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48;  __a >>= 4;
696     case 13: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52;  __a >>= 4;
697     case 14: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56;  __a >>= 4;
698     case 15: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;  break;
699     default: m4ri_die("impossible");
700     }
701     if(A->x->width-startblock == 1) {
702       _a[startblock] &= ~(bitmask_begin & bitmask_end);
703       _a[startblock] ^= __t & bitmask_begin & bitmask_end;
704       return;
705     } else {
706       _a[startblock] &= ~bitmask_begin;
707       _a[startblock] ^= __t & bitmask_begin;
708     }
709 
710     for(j=startblock+1; j<A->x->width -1; j++) {
711       __a = _a[j], __t = 0;
712       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0;  __a >>= 4;
713       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4;  __a >>= 4;
714       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8;  __a >>= 4;
715       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12;  __a >>= 4;
716       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16;  __a >>= 4;
717       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20;  __a >>= 4;
718       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24;  __a >>= 4;
719       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28;  __a >>= 4;
720       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32;  __a >>= 4;
721       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36;  __a >>= 4;
722       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40;  __a >>= 4;
723       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44;  __a >>= 4;
724       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48;  __a >>= 4;
725       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52;  __a >>= 4;
726       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56;  __a >>= 4;
727       __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;
728       _a[j] = __t;
729     }
730 
731     __t = _a[j] & ~bitmask_end;
732     switch(A->x->ncols % m4ri_radix) {
733     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60;
734     case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56;
735     case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52;
736     case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48;
737     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44;
738     case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40;
739     case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36;
740     case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32;
741     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28;
742     case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24;
743     case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20;
744     case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16;
745     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12;
746     case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8;
747     case  8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4;
748     case  4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0;
749     };
750     _a[j] = __t;
751 
752   } else if (A->w == 8) {
753 
754     register word __a0 = _a[startblock]>>(start%m4ri_radix);
755     register word __a1;
756     register word __t0 = 0;
757     register word __t1;
758 
759     switch( (start/8) %8 ) {
760     case 0: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8;
761     case 1: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8;
762     case 2: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8;
763     case 3: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8;
764     case 4: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8;
765     case 5: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8;
766     case 6: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8;
767     case 7: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56; break;
768     default: m4ri_die("impossible");
769     }
770     if(A->x->width-startblock == 1) {
771       _a[startblock] &= ~(bitmask_begin & bitmask_end);
772       _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
773       return;
774     } else {
775       _a[startblock] &= ~bitmask_begin;
776       _a[startblock] ^= __t0 & bitmask_begin;
777     }
778 
779     for(j=startblock+1; j+2 < A->x->width; j+=2) {
780       __a0 = _a[j], __t0 = 0;
781       __a1 = _a[j+1], __t1 = 0;
782       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
783       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8;
784       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
785       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8;
786       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
787       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8;
788       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
789       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8;
790       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
791       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8;
792       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
793       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8;
794       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
795       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8;
796       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8;
797       __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<56;
798       _a[j+0] = __t0;
799       _a[j+1] = __t1;
800     }
801 
802     for(; j < A->x->width-1; j++) {
803       __a0 = _a[j], __t0 = 0;
804       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
805       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
806       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
807       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
808       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
809       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
810       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
811       __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56;
812       _a[j] = __t0;
813     }
814 
815     __t = _a[j] & ~bitmask_end;
816     switch(A->x->ncols % m4ri_radix ) {
817     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56;
818     case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48;
819     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40;
820     case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32;
821     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24;
822     case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16;
823     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8;
824     case  8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0;
825     };
826     _a[j] = __t;
827 
828   } else if (A->w == 16) {
829     switch( (start/16) %4 ) {
830     case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
831     case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
832     case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
833     case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; break;
834     default: m4ri_die("impossible");
835     }
836     if(A->x->width-startblock == 1) {
837       _a[startblock] &= ~(bitmask_begin & bitmask_end);
838       _a[startblock] ^= __t & bitmask_begin & bitmask_end;
839       return;
840     } else {
841       _a[startblock] &= ~bitmask_begin;
842       _a[startblock] ^= __t & bitmask_begin;
843     }
844 
845     for(j=startblock+1; j+4<A->x->width; j+=4) {
846       __a = _a[j], __t = 0;
847       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
848       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
849       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
850       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
851       _a[j] = __t;
852 
853       __a = _a[j+1], __t = 0;
854       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
855       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
856       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
857       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
858       _a[j+1] = __t;
859 
860       __a = _a[j+2], __t = 0;
861       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
862       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
863       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
864       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
865       _a[j+2] = __t;
866 
867       __a = _a[j+3], __t = 0;
868       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
869       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
870       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
871       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
872       _a[j+3] = __t;
873     }
874     for( ; j<A->x->width-1; j++) {
875       __a = _a[j], __t = 0;
876       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
877       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
878       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
879       __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
880       _a[j] = __t;
881     }
882 
883     __t = _a[j] & ~bitmask_end;
884     switch(A->x->ncols % m4ri_radix) {
885     case  0: __t ^= ff->mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48;
886     case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32;
887     case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16;
888     case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0;
889     };
890     _a[j] = __t;
891 
892   } else {
893     for(rci_t j=start_col; j<A->ncols; j++) {
894       mzed_write_elem(A, r, j, ff->mul(ff, x, mzed_read_elem(A, r, j)));
895     }
896   }
897 }
898 
899 /**
900  * \brief Swap the two rows rowa and rowb.
901  *
902  * \param M Matrix
903  * \param rowa Row index.
904  * \param rowb Row index.
905  *
906  * \ingroup RowOperations
907  */
908 
mzed_row_swap(mzed_t * M,const rci_t rowa,const rci_t rowb)909 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) {
910   mzd_row_swap(M->x, rowa, rowb);
911 }
912 
913 /**
914  * \brief copy row j from A to row i from B.
915  *
916  * The the number of columns of A must be less than or equal to the number of columns of B.
917  *
918  * \param B Target matrix.
919  * \param i Target row index.
920  * \param A Source matrix.
921  * \param j Source row index.
922  *
923  * \ingroup RowOperations
924  */
925 
mzed_copy_row(mzed_t * B,rci_t i,const mzed_t * A,rci_t j)926 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) {
927   mzd_copy_row(B->x, i, A->x, j);
928 }
929 
930 /**
931  * \brief Swap the two columns cola and colb.
932  *
933  * \param M Matrix.
934  * \param cola Column index.
935  * \param colb Column index.
936  *
937  * \ingroup RowOperations
938  */
939 
mzed_col_swap(mzed_t * M,const rci_t cola,const rci_t colb)940 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) {
941   for(rci_t i=0; i<M->w; i++)
942     mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i);
943 }
944 
945 /**
946  * \brief Swap the two columns cola and colb but only between start_row and stop_row.
947  *
948  * \param A Matrix.
949  * \param cola Column index.
950  * \param colb Column index.
951  * \param start_row Row index.
952  * \param stop_row Row index (exclusive).
953  *
954  * \ingroup RowOperations
955  */
956 
mzed_col_swap_in_rows(mzed_t * A,const rci_t cola,const rci_t colb,const rci_t start_row,rci_t stop_row)957 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) {
958   for(unsigned int e=0; e < A->finite_field->degree; e++) {
959     mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row);
960   };
961 }
962 
963 /**
964  * \brief Add the rows sourcerow and destrow and stores the total in
965  * the row destrow.
966  *
967  * \param M Matrix
968  * \param sourcerow Index of source row
969  * \param destrow Index of target row
970  *
971  * \note this can be done much faster with mzed_combine.
972  *
973  * \ingroup RowOperations
974  */
975 
mzed_row_add(mzed_t * M,const rci_t sourcerow,const rci_t destrow)976 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) {
977   mzd_row_add(M->x, sourcerow, destrow);
978 }
979 
980 /**
981  * \brief Return the first row with all zero entries.
982  *
983  * If no such row can be found returns nrows.
984  *
985  * \param A Matrix
986  *
987  * \ingroup RowOperations
988  */
989 
mzed_first_zero_row(mzed_t * A)990 static inline rci_t mzed_first_zero_row(mzed_t *A) {
991   return mzd_first_zero_row(A->x);
992 }
993 
994 
995 /**
996  * \brief Gaussian elimination.
997  *
998  * Perform Gaussian elimination on the matrix A.  If full=0, then it
999  * will do triangular style elimination, and if full=1, it will do
1000  * Gauss-Jordan style, or full elimination.
1001  *
1002  * \param A Matrix
1003  * \param full Gauss-Jordan style or upper unit-triangular form only.
1004  *
1005  * \ingroup Echelon
1006  */
1007 
1008 rci_t mzed_echelonize_naive(mzed_t *A, int full);
1009 
1010 /**
1011  * \brief Print a matrix to stdout.
1012  *
1013  * \param M Matrix
1014  *
1015  * \ingroup StringConversions
1016  */
1017 
1018 void mzed_print(const mzed_t *M);
1019 
1020 #endif //M4RIE_MATRIX_H
1021