1 /**
2 * \file mzed.h
3 *
4 * \brief Dense matrices over \GF2E represented as packed matrices.
5 *
6 * This file implements the data type mzed_t. That is, matrices over
7 * \GF2E in row major representation.
8
9 * For example, let \f$ a = \sum a_i x_i / <f>\f$ and \f$b = \sum b_i x_i / <f>\f$
10 * be elements in \f$\mathbb{F}_{2^6}\f$ with minimal polynomial \f$f\f$. Then, the
11 * \f$ 1 \times 2\f$ matrix [b a] would be stored as
12 \verbatim
13 [...| 0 0 b5 b4 b3 b2 b1 b0 | 0 0 a5 a4 a3 a2 a1 a0]
14 \endverbatim
15 *
16 * Internally M4RI matrices are used to store bits with allows to
17 * re-use existing M4RI methods (such as mzd_add) when implementing
18 * functions for mzed_t.
19 *
20 * This data type is preferable when Newton-John tables ought be used
21 * or when the matrix is small (\f$ m \times n \times e < L2\f$).
22 *
23 * \author Martin Albrecht <martinralbrecht@googlemail.com>
24 */
25
26
27 #ifndef M4RIE_MZED_H
28 #define M4RIE_MZED_H
29
30 /******************************************************************************
31 *
32 * M4RIE: Linear Algebra over GF(2^e)
33 *
34 * Copyright (C) 2010,2011 Martin Albrecht <martinralbrecht@googlemail.com>
35 *
36 * Distributed under the terms of the GNU General Public License (GEL)
37 * version 2 or higher.
38 *
39 * This code is distributed in the hope that it will be useful,
40 * but WITHOUT ANY WARRANTY; without even the implied warranty of
41 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
42 * General Public License for more details.
43 *
44 * The full text of the GPL is available at:
45 *
46 * http://www.gnu.org/licenses/
47 ******************************************************************************/
48
49 #include <m4ri/m4ri.h>
50 #include <m4rie/gf2e.h>
51 #include <m4rie/m4ri_functions.h>
52
53 /**
54 * \brief Dense matrices over \GF2E represented as packed matrices.
55 *
56 * \ingroup Definitions
57 */
58
59 typedef struct {
60 mzd_t *x; /**< \f$m \times n\f$ matrices over \GF2E are represented as \f$m \times (en)\f$ matrices over \GF2. */
61 const gf2e *finite_field; /**< A finite field \GF2E. */
62 rci_t nrows; /**< Number of rows. */
63 rci_t ncols; /**< Number of columns. */
64 wi_t w; /**< The internal width of elements (must divide 64). */
65 } mzed_t;
66
67
68 /**
69 * \brief Create a new matrix of dimension m x n over ff
70 *
71 * Use mzed_free() to kill it.
72 *
73 * \param ff Finite field
74 * \param m Number of rows
75 * \param n Number of columns
76 *
77 * \ingroup Constructions
78 */
79
80 mzed_t *mzed_init(const gf2e *ff, const rci_t m, const rci_t n);
81
82 /**
83 * \brief Free a matrix created with mzed_init().
84 *
85 * \param A Matrix
86 *
87 * \ingroup Constructions
88 */
89
90 void mzed_free(mzed_t *A);
91
92
93 /**
94 * \brief Concatenate B to A and write the result to C.
95 *
96 * That is,
97 \verbatim
98 [ A ], [ B ] -> [ A B ] = C
99 \endverbatim
100 * The inputs are not modified but a new matrix is created.
101 *
102 * \param C Matrix, may be NULL for automatic creation
103 * \param A Matrix
104 * \param B Matrix
105 *
106 * \note This is sometimes called augment.
107 *
108 * \ingroup Constructions
109 */
110
mzed_concat(mzed_t * C,const mzed_t * A,const mzed_t * B)111 static inline mzed_t *mzed_concat(mzed_t *C, const mzed_t *A, const mzed_t *B) {
112 if(C==NULL)
113 C = mzed_init(A->finite_field, A->nrows, A->ncols + B->ncols);
114 mzd_concat(C->x, A->x, B->x);
115 return C;
116 }
117
118 /**
119 * \brief Stack A on top of B and write the result to C.
120 *
121 * That is,
122 \verbatim
123 [ A ], [ B ] -> [ A ] = C
124 [ B ]
125 \endverbatim
126 * The inputs are not modified but a new matrix is created.
127 *
128 * \param C Matrix, may be NULL for automatic creation
129 * \param A Matrix
130 * \param B Matrix
131 *
132 * \ingroup Constructions
133 */
134
mzed_stack(mzed_t * C,const mzed_t * A,const mzed_t * B)135 static inline mzed_t *mzed_stack(mzed_t *C, const mzed_t *A, const mzed_t *B) {
136 if(C==NULL)
137 C = mzed_init(A->finite_field, A->nrows + B->nrows, A->ncols);
138 mzd_stack(C->x, A->x, B->x);
139 return C;
140 }
141
142
143 /**
144 * \brief Copy a submatrix.
145 *
146 * Note that the upper bounds are not included.
147 *
148 * \param S Preallocated space for submatrix, may be NULL for automatic creation.
149 * \param M Matrix
150 * \param lowr start rows
151 * \param lowc start column
152 * \param highr stop row (this row is \em not included)
153 * \param highc stop column (this column is \em not included)
154 *
155 * \ingroup Constructions
156 */
mzed_submatrix(mzed_t * S,const mzed_t * M,const rci_t lowr,const rci_t lowc,const rci_t highr,const rci_t highc)157 static inline mzed_t *mzed_submatrix(mzed_t *S, const mzed_t *M, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
158 if(S==NULL)
159 S = mzed_init(M->finite_field, highr - lowr, highc - lowc);
160
161 mzd_submatrix(S->x, M->x, lowr, lowc*M->w, highr, highc*M->w);
162 return S;
163 }
164
165 /**
166 * \brief Create a window/view into the matrix A.
167 *
168 * A matrix window for A is a meta structure on the matrix A. It is
169 * setup to point into the matrix so M \em must \em not be freed while
170 * the matrix window is used.
171 *
172 * This function puts the restriction on the provided parameters that
173 * all parameters must be within range for A which is not currently
174 * enforced.
175 *
176 * Use mzed_free_window() to free the window.
177 *
178 * \param A Matrix
179 * \param lowr Starting row (inclusive)
180 * \param lowc Starting column (inclusive)
181 * \param highr End row (exclusive)
182 * \param highc End column (exclusive)
183 *
184 * \ingroup Constructions
185 */
186
mzed_init_window(const mzed_t * A,const rci_t lowr,const rci_t lowc,const rci_t highr,const rci_t highc)187 static inline mzed_t *mzed_init_window(const mzed_t *A, const rci_t lowr, const rci_t lowc, const rci_t highr, const rci_t highc) {
188 mzed_t *B = (mzed_t *)m4ri_mm_malloc(sizeof(mzed_t));
189 B->finite_field = A->finite_field;
190 B->w = gf2e_degree_to_w(A->finite_field);
191 B->nrows = highr - lowr;
192 B->ncols = highc - lowc;
193 B->x = mzd_init_window(A->x, lowr, B->w*lowc, highr, B->w*highc);
194 return B;
195 }
196
197 /**
198 * \brief Free a matrix window created with mzed_init_window().
199 *
200 * \param A Matrix
201 *
202 * \ingroup Constructions
203 */
204
mzed_free_window(mzed_t * A)205 static inline void mzed_free_window(mzed_t *A) {
206 mzd_free_window(A->x);
207 m4ri_mm_free(A);
208 }
209
210 /**
211 * \brief \f$ C = A+B \f$.
212 *
213 * C is also returned. If C is NULL then a new matrix is created which
214 * must be freed by mzed_free().
215 *
216 * \param C Preallocated sum matrix, may be NULL for automatic creation.
217 * \param A Matrix
218 * \param B Matrix
219 *
220 * \ingroup Addition
221 */
222
223 mzed_t *mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
224
225 /**
226 * \brief \f$ C = A+B \f$.
227 *
228 * \param C Preallocated sum matrix, may be NULL for automatic creation.
229 * \param A Matrix
230 * \param B Matrix
231 *
232 * \ingroup Addition
233 */
234
235 mzed_t *_mzed_add(mzed_t *C, const mzed_t *A, const mzed_t *B);
236
237 /**
238 * \brief \f$ C = A+B \f$.
239 *
240 * \param C Preallocated difference matrix, may be NULL for automatic creation.
241 * \param A Matrix
242 * \param B Matrix
243 *
244 * \ingroup Addition
245 */
246
247 #define mzed_sub mzed_add
248
249 /**
250 * \brief \f$ C = A+B \f$.
251 *
252 * \param C Preallocated difference matrix, may be NULL for automatic creation.
253 * \param A Matrix
254 * \param B Matrix
255 *
256 * \ingroup Addition
257 */
258
259 #define _mzed_sub _mzed_add
260
261 /**
262 * \brief \f$ C = A \cdot B \f$.
263 *
264 * \param C Preallocated return matrix, may be NULL for automatic creation.
265 * \param A Input matrix A.
266 * \param B Input matrix B.
267 *
268 * \ingroup Multiplication
269 */
270
271 mzed_t *mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
272
273 /**
274 * \brief \f$ C = C + A \cdot B \f$.
275 *
276 * \param C Preallocated product matrix, may be NULL for automatic creation.
277 * \param A Input matrix A.
278 * \param B Input matrix B.
279 *
280 * \ingroup Multiplication
281 */
282
283 mzed_t *mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
284
285 /**
286 * \brief \f$ C = A \cdot B \f$.
287 *
288 * \param C Preallocated product matrix.
289 * \param A Input matrix A.
290 * \param B Input matrix B.
291 *
292 * \ingroup Multiplication
293 */
294
295 mzed_t *_mzed_mul(mzed_t *C, const mzed_t *A, const mzed_t *B);
296
297 /**
298 * \brief \f$ C = C + A \cdot B \f$.
299 *
300 * \param C Preallocated product matrix.
301 * \param A Input matrix A.
302 * \param B Input matrix B.
303 *
304 * \ingroup Multiplication
305 */
306
307 mzed_t *_mzed_addmul(mzed_t *C, const mzed_t *A, const mzed_t *B);
308
309
310 /**
311 * \brief \f$ C = C + A \cdot B \f$ using naive cubic multiplication.
312 *
313 * \param C Preallocated product matrix.
314 * \param A Input matrix A.
315 * \param B Input matrix B.
316 *
317 * \note There is no reason to call this function except for checking
318 * the correctness of other algorithms. It is very slow.
319 *
320 * \ingroup Multiplication
321 */
322
323 mzed_t *mzed_addmul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
324
325 /**
326 * \brief \f$ C = A \cdot B \f$ using naive cubic multiplication.
327 *
328 * \param C Preallocated product matrix, may be NULL for automatic creation.
329 * \param A Input matrix A.
330 * \param B Input matrix B.
331 *
332 * \note There is no reason to call this function except for checking
333 * the correctness of other algorithms. It is very slow.
334 *
335 * \ingroup Multiplication
336 */
337
338 mzed_t *mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
339
340 /**
341 * \brief \f$ C = C + A \cdot B \f$ using naive cubic multiplication.
342 *
343 * \param C Preallocated product matrix.
344 * \param A Input matrix A.
345 * \param B Input matrix B.
346 *
347 * \ingroup Multiplication
348 */
349
350 mzed_t *_mzed_mul_naive(mzed_t *C, const mzed_t *A, const mzed_t *B);
351
352 /**
353 * \brief \f$ C = a \cdot B \f$.
354 *
355 * \param C Preallocated product matrix or NULL.
356 * \param a finite field element.
357 * \param B Input matrix B.
358 *
359 * \ingroup Multiplication
360 */
361
362 mzed_t *mzed_mul_scalar(mzed_t *C, const word a, const mzed_t *B);
363
364 /**
365 * Check whether C, A and B match in sizes and fields for
366 * multiplication
367 *
368 * \param C Output matrix, if NULL a new matrix is created.
369 * \param A Input matrix.
370 * \param B Input matrix.
371 * \param clear Write zeros to C or not.
372 */
373
374 mzed_t *_mzed_mul_init(mzed_t *C, const mzed_t *A, const mzed_t *B, int clear);
375
376 /**
377 * \brief Fill matrix A with random elements.
378 *
379 * \param A Matrix
380 *
381 * \todo Allow the user to provide a RNG callback.
382 *
383 * \ingroup Assignment
384 */
385
386 void mzed_randomize(mzed_t *A);
387
388 /**
389 * \brief Copy matrix A to B.
390 *
391 * \param B May be NULL for automatic creation.
392 * \param A Source matrix.
393 *
394 * \ingroup Assignment
395 */
396
397 mzed_t *mzed_copy(mzed_t *B, const mzed_t *A);
398
399 /**
400 * \brief Return diagonal matrix with value on the diagonal.
401 *
402 * If the matrix is not square then the largest possible square
403 * submatrix is used.
404 *
405 * \param A Matrix
406 * \param value Finite Field element
407 *
408 * \ingroup Assignment
409 */
410
411 void mzed_set_ui(mzed_t *A, word value);
412
413
414 /**
415 * \brief Get the element at position (row,col) from the matrix A.
416 *
417 * \param A Source matrix.
418 * \param row Starting row.
419 * \param col Starting column.
420 *
421 * \ingroup Assignment
422 */
423
mzed_read_elem(const mzed_t * A,const rci_t row,const rci_t col)424 static inline word mzed_read_elem(const mzed_t *A, const rci_t row, const rci_t col) {
425 return __mzd_read_bits(A->x, row, A->w*col, A->w);
426 }
427
428 /**
429 * \brief At the element elem to the element at position (row,col) in the matrix A.
430 *
431 * \param A Target matrix.
432 * \param row Starting row.
433 * \param col Starting column.
434 * \param elem finite field element.
435 *
436 * \ingroup Assignment
437 */
438
mzed_add_elem(mzed_t * A,const rci_t row,const rci_t col,const word elem)439 static inline void mzed_add_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
440 __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
441 }
442
443 /**
444 * \brief Write the element elem to the position (row,col) in the matrix A.
445 *
446 * \param A Target matrix.
447 * \param row Starting row.
448 * \param col Starting column.
449 * \param elem finite field element.
450 *
451 * \ingroup Assignment
452 */
453
mzed_write_elem(mzed_t * A,const rci_t row,const rci_t col,const word elem)454 static inline void mzed_write_elem(mzed_t *A, const rci_t row, const rci_t col, const word elem) {
455 __mzd_clear_bits(A->x, row, A->w*col, A->w);
456 __mzd_xor_bits(A->x, row, A->w*col, A->w, elem);
457 }
458
459 /**
460 * \brief Return -1,0,1 if if A < B, A == B or A > B respectively.
461 *
462 * \param A Matrix.
463 * \param B Matrix.
464 *
465 * \note This comparison is not well defined mathematically and
466 * relatively arbitrary since elements of \GF2E don't have an
467 * ordering.
468 *
469 * \ingroup Comparison
470 */
471
mzed_cmp(mzed_t * A,mzed_t * B)472 static inline int mzed_cmp(mzed_t *A, mzed_t *B) {
473 return mzd_cmp(A->x,B->x);
474 }
475
476
477 /**
478 * \brief Zero test for matrix.
479 *
480 * \param A Input matrix.
481 *
482 * \ingroup Comparison
483 */
mzed_is_zero(const mzed_t * A)484 static inline int mzed_is_zero(const mzed_t *A) {
485 return mzd_is_zero(A->x);
486 }
487
488 /**
489 * A[ar,c] = A[ar,c] + x*B[br,c] for all c >= startcol.
490 *
491 * \param A Matrix.
492 * \param ar Row index in A.
493 * \param B Matrix.
494 * \param br Row index in B.
495 * \param x Finite field element.
496 * \param start_col Column index.
497 *
498 * \ingroup RowOperations
499 */
500
501 void mzed_add_multiple_of_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, word x, rci_t start_col);
502
503 /**
504 * A[ar,c] = A[ar,c] + B[br,c] for all c >= startcol.
505 *
506 * \param A Matrix.
507 * \param ar Row index in A.
508 * \param B Matrix.
509 * \param br Row index in B.
510 * \param start_col Column index.
511 *
512 * \ingroup RowOperations
513 */
514
mzed_add_row(mzed_t * A,rci_t ar,const mzed_t * B,rci_t br,rci_t start_col)515 static inline void mzed_add_row(mzed_t *A, rci_t ar, const mzed_t *B, rci_t br, rci_t start_col) {
516 assert(A->ncols == B->ncols && A->finite_field == B->finite_field);
517 assert(start_col < A->ncols);
518
519 const rci_t start = A->w*start_col;
520 const wi_t startblock = start/m4ri_radix;
521 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
522 const word bitmask_end = A->x->high_bitmask;
523
524 word *_a = A->x->rows[ar];
525 const word *_b = B->x->rows[br];
526 wi_t j;
527
528 if (A->x->width - startblock > 1) {
529 _a[startblock] ^= _b[startblock] & bitmask_begin;
530 for(j=startblock+1; j<A->x->width-1; j++)
531 _a[j] ^= _b[j];
532 _a[j] ^= _b[j] & bitmask_end;
533 } else {
534 _a[startblock] ^= _b[startblock] & (bitmask_begin & bitmask_end);
535 }
536 }
537
538 /**
539 * \brief Rescale the row r in A by X starting c.
540 *
541 * \param A Matrix
542 * \param r Row index.
543 * \param start_col Column index.
544 * \param x Multiplier
545 *
546 * \ingroup RowOperations
547 */
548
mzed_rescale_row(mzed_t * A,rci_t r,rci_t start_col,const word x)549 static inline void mzed_rescale_row(mzed_t *A, rci_t r, rci_t start_col, const word x) {
550 assert(start_col < A->ncols);
551
552 const gf2e *ff = A->finite_field;
553 const rci_t start = A->w*start_col;
554 const wi_t startblock = start/m4ri_radix;
555 word *_a = A->x->rows[r];
556 const word bitmask_begin = __M4RI_RIGHT_BITMASK(m4ri_radix - (start%m4ri_radix));
557 const word bitmask_end = A->x->high_bitmask;
558 register word __a = _a[startblock]>>(start%m4ri_radix);
559 register word __t = 0;
560 int j;
561
562 if(A->w == 2) {
563 switch( (start/2) % 32 ) {
564 case 0: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
565 case 1: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
566 case 2: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
567 case 3: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
568 case 4: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
569 case 5: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
570 case 6: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
571 case 7: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
572 case 8: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
573 case 9: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
574 case 10: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
575 case 11: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
576 case 12: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
577 case 13: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
578 case 14: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
579 case 15: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
580 case 16: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
581 case 17: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
582 case 18: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
583 case 19: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
584 case 20: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
585 case 21: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
586 case 22: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
587 case 23: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
588 case 24: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
589 case 25: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
590 case 26: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
591 case 27: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
592 case 28: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
593 case 29: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
594 case 30: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
595 case 31: __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62; break;
596 default: m4ri_die("impossible");
597 }
598 if(A->x->width-startblock == 1) {
599 _a[startblock] &= ~(bitmask_begin & bitmask_end);
600 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
601 return;
602 } else {
603 _a[startblock] &= ~bitmask_begin;
604 _a[startblock] ^= __t & bitmask_begin;
605 }
606
607 for(j=startblock+1; j<A->x->width -1; j++) {
608 __a = _a[j], __t = 0;
609 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 0; __a >>= 2;
610 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 2; __a >>= 2;
611 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 4; __a >>= 2;
612 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 6; __a >>= 2;
613 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<< 8; __a >>= 2;
614 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<10; __a >>= 2;
615 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<12; __a >>= 2;
616 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<14; __a >>= 2;
617 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<16; __a >>= 2;
618 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<18; __a >>= 2;
619 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<20; __a >>= 2;
620 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<22; __a >>= 2;
621 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<24; __a >>= 2;
622 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<26; __a >>= 2;
623 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<28; __a >>= 2;
624 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<30; __a >>= 2;
625 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<32; __a >>= 2;
626 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<34; __a >>= 2;
627 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<36; __a >>= 2;
628 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<38; __a >>= 2;
629 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<40; __a >>= 2;
630 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<42; __a >>= 2;
631 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<44; __a >>= 2;
632 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<46; __a >>= 2;
633 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<48; __a >>= 2;
634 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<50; __a >>= 2;
635 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<52; __a >>= 2;
636 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<54; __a >>= 2;
637 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<56; __a >>= 2;
638 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<58; __a >>= 2;
639 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<60; __a >>= 2;
640 __t ^= ff->mul(ff, x, __a & 0x0000000000000003ULL)<<62;
641 _a[j] = __t;
642 }
643
644 __t = _a[j] & ~bitmask_end;
645 switch(A->x->ncols % m4ri_radix) {
646 case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xC000000000000000ULL)>>62)<<62;
647 case 62: __t ^= ff->mul(ff, x, (_a[j] & 0x3000000000000000ULL)>>60)<<60;
648 case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0C00000000000000ULL)>>58)<<58;
649 case 58: __t ^= ff->mul(ff, x, (_a[j] & 0x0300000000000000ULL)>>56)<<56;
650 case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00C0000000000000ULL)>>54)<<54;
651 case 54: __t ^= ff->mul(ff, x, (_a[j] & 0x0030000000000000ULL)>>52)<<52;
652 case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000C000000000000ULL)>>50)<<50;
653 case 50: __t ^= ff->mul(ff, x, (_a[j] & 0x0003000000000000ULL)>>48)<<48;
654 case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000C00000000000ULL)>>46)<<46;
655 case 46: __t ^= ff->mul(ff, x, (_a[j] & 0x0000300000000000ULL)>>44)<<44;
656 case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000C0000000000ULL)>>42)<<42;
657 case 42: __t ^= ff->mul(ff, x, (_a[j] & 0x0000030000000000ULL)>>40)<<40;
658 case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000C000000000ULL)>>38)<<38;
659 case 38: __t ^= ff->mul(ff, x, (_a[j] & 0x0000003000000000ULL)>>36)<<36;
660 case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000C00000000ULL)>>34)<<34;
661 case 34: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000300000000ULL)>>32)<<32;
662 case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000C0000000ULL)>>30)<<30;
663 case 30: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000030000000ULL)>>28)<<28;
664 case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000C000000ULL)>>26)<<26;
665 case 26: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000003000000ULL)>>24)<<24;
666 case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000C00000ULL)>>22)<<22;
667 case 22: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000300000ULL)>>20)<<20;
668 case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000C0000ULL)>>18)<<18;
669 case 18: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000030000ULL)>>16)<<16;
670 case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000C000ULL)>>14)<<14;
671 case 14: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000003000ULL)>>12)<<12;
672 case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000C00ULL)>>10)<<10;
673 case 10: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000300ULL)>> 8)<< 8;
674 case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000C0ULL)>> 6)<< 6;
675 case 6: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000030ULL)>> 4)<< 4;
676 case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000CULL)>> 2)<< 2;
677 case 2: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000003ULL)>> 0)<< 0;
678 };
679 _a[j] = __t;
680
681 } else if(A->w == 4) {
682 switch( (start/4)%16 ) {
683 case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
684 case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
685 case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
686 case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
687 case 4: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
688 case 5: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
689 case 6: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
690 case 7: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
691 case 8: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
692 case 9: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
693 case 10: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
694 case 11: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
695 case 12: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
696 case 13: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
697 case 14: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
698 case 15: __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60; break;
699 default: m4ri_die("impossible");
700 }
701 if(A->x->width-startblock == 1) {
702 _a[startblock] &= ~(bitmask_begin & bitmask_end);
703 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
704 return;
705 } else {
706 _a[startblock] &= ~bitmask_begin;
707 _a[startblock] ^= __t & bitmask_begin;
708 }
709
710 for(j=startblock+1; j<A->x->width -1; j++) {
711 __a = _a[j], __t = 0;
712 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 0; __a >>= 4;
713 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 4; __a >>= 4;
714 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<< 8; __a >>= 4;
715 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<12; __a >>= 4;
716 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<16; __a >>= 4;
717 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<20; __a >>= 4;
718 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<24; __a >>= 4;
719 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<28; __a >>= 4;
720 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<32; __a >>= 4;
721 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<36; __a >>= 4;
722 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<40; __a >>= 4;
723 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<44; __a >>= 4;
724 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<48; __a >>= 4;
725 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<52; __a >>= 4;
726 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<56; __a >>= 4;
727 __t ^= ff->mul(ff, x, __a & 0x000000000000000FULL)<<60;
728 _a[j] = __t;
729 }
730
731 __t = _a[j] & ~bitmask_end;
732 switch(A->x->ncols % m4ri_radix) {
733 case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xF000000000000000ULL)>>60)<<60;
734 case 60: __t ^= ff->mul(ff, x, (_a[j] & 0x0F00000000000000ULL)>>56)<<56;
735 case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00F0000000000000ULL)>>52)<<52;
736 case 52: __t ^= ff->mul(ff, x, (_a[j] & 0x000F000000000000ULL)>>48)<<48;
737 case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000F00000000000ULL)>>44)<<44;
738 case 44: __t ^= ff->mul(ff, x, (_a[j] & 0x00000F0000000000ULL)>>40)<<40;
739 case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000F000000000ULL)>>36)<<36;
740 case 36: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000F00000000ULL)>>32)<<32;
741 case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000F0000000ULL)>>28)<<28;
742 case 28: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000F000000ULL)>>24)<<24;
743 case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000F00000ULL)>>20)<<20;
744 case 20: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000F0000ULL)>>16)<<16;
745 case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000F000ULL)>>12)<<12;
746 case 12: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000000F00ULL)>> 8)<< 8;
747 case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000F0ULL)>> 4)<< 4;
748 case 4: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000000FULL)>> 0)<< 0;
749 };
750 _a[j] = __t;
751
752 } else if (A->w == 8) {
753
754 register word __a0 = _a[startblock]>>(start%m4ri_radix);
755 register word __a1;
756 register word __t0 = 0;
757 register word __t1;
758
759 switch( (start/8) %8 ) {
760 case 0: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 0; __a0 >>= 8;
761 case 1: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<< 8; __a0 >>= 8;
762 case 2: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<16; __a0 >>= 8;
763 case 3: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<24; __a0 >>= 8;
764 case 4: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<32; __a0 >>= 8;
765 case 5: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<40; __a0 >>= 8;
766 case 6: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<48; __a0 >>= 8;
767 case 7: __t0 ^= ff->mul(ff, x, (__a0 & 0x00000000000000FFULL))<<56; break;
768 default: m4ri_die("impossible");
769 }
770 if(A->x->width-startblock == 1) {
771 _a[startblock] &= ~(bitmask_begin & bitmask_end);
772 _a[startblock] ^= __t0 & bitmask_begin & bitmask_end;
773 return;
774 } else {
775 _a[startblock] &= ~bitmask_begin;
776 _a[startblock] ^= __t0 & bitmask_begin;
777 }
778
779 for(j=startblock+1; j+2 < A->x->width; j+=2) {
780 __a0 = _a[j], __t0 = 0;
781 __a1 = _a[j+1], __t1 = 0;
782 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
783 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 0; __a1 >>= 8;
784 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
785 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<< 8; __a1 >>= 8;
786 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
787 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<16; __a1 >>= 8;
788 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
789 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<24; __a1 >>= 8;
790 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
791 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<32; __a1 >>= 8;
792 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
793 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<40; __a1 >>= 8;
794 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
795 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<48; __a1 >>= 8;
796 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56; __a0 >>= 8;
797 __t1 ^= ff->mul(ff, x, __a1 & 0x00000000000000FFULL)<<56;
798 _a[j+0] = __t0;
799 _a[j+1] = __t1;
800 }
801
802 for(; j < A->x->width-1; j++) {
803 __a0 = _a[j], __t0 = 0;
804 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 0; __a0 >>= 8;
805 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<< 8; __a0 >>= 8;
806 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<16; __a0 >>= 8;
807 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<24; __a0 >>= 8;
808 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<32; __a0 >>= 8;
809 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<40; __a0 >>= 8;
810 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<48; __a0 >>= 8;
811 __t0 ^= ff->mul(ff, x, __a0 & 0x00000000000000FFULL)<<56;
812 _a[j] = __t0;
813 }
814
815 __t = _a[j] & ~bitmask_end;
816 switch(A->x->ncols % m4ri_radix ) {
817 case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFF00000000000000ULL)>>56)<<56;
818 case 56: __t ^= ff->mul(ff, x, (_a[j] & 0x00FF000000000000ULL)>>48)<<48;
819 case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FF0000000000ULL)>>40)<<40;
820 case 40: __t ^= ff->mul(ff, x, (_a[j] & 0x000000FF00000000ULL)>>32)<<32;
821 case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FF000000ULL)>>24)<<24;
822 case 24: __t ^= ff->mul(ff, x, (_a[j] & 0x0000000000FF0000ULL)>>16)<<16;
823 case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FF00ULL)>> 8)<< 8;
824 case 8: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000000000FFULL)>> 0)<< 0;
825 };
826 _a[j] = __t;
827
828 } else if (A->w == 16) {
829 switch( (start/16) %4 ) {
830 case 0: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
831 case 1: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
832 case 2: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
833 case 3: __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48; break;
834 default: m4ri_die("impossible");
835 }
836 if(A->x->width-startblock == 1) {
837 _a[startblock] &= ~(bitmask_begin & bitmask_end);
838 _a[startblock] ^= __t & bitmask_begin & bitmask_end;
839 return;
840 } else {
841 _a[startblock] &= ~bitmask_begin;
842 _a[startblock] ^= __t & bitmask_begin;
843 }
844
845 for(j=startblock+1; j+4<A->x->width; j+=4) {
846 __a = _a[j], __t = 0;
847 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
848 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
849 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
850 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
851 _a[j] = __t;
852
853 __a = _a[j+1], __t = 0;
854 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
855 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
856 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
857 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
858 _a[j+1] = __t;
859
860 __a = _a[j+2], __t = 0;
861 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
862 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
863 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
864 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
865 _a[j+2] = __t;
866
867 __a = _a[j+3], __t = 0;
868 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
869 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
870 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
871 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
872 _a[j+3] = __t;
873 }
874 for( ; j<A->x->width-1; j++) {
875 __a = _a[j], __t = 0;
876 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<< 0; __a >>= 16;
877 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<16; __a >>= 16;
878 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<32; __a >>= 16;
879 __t ^= ff->mul(ff, x, __a & 0x000000000000FFFFULL)<<48;
880 _a[j] = __t;
881 }
882
883 __t = _a[j] & ~bitmask_end;
884 switch(A->x->ncols % m4ri_radix) {
885 case 0: __t ^= ff->mul(ff, x, (_a[j] & 0xFFFF000000000000ULL)>>48)<<48;
886 case 48: __t ^= ff->mul(ff, x, (_a[j] & 0x0000FFFF00000000ULL)>>32)<<32;
887 case 32: __t ^= ff->mul(ff, x, (_a[j] & 0x00000000FFFF0000ULL)>>16)<<16;
888 case 16: __t ^= ff->mul(ff, x, (_a[j] & 0x000000000000FFFFULL)>> 0)<< 0;
889 };
890 _a[j] = __t;
891
892 } else {
893 for(rci_t j=start_col; j<A->ncols; j++) {
894 mzed_write_elem(A, r, j, ff->mul(ff, x, mzed_read_elem(A, r, j)));
895 }
896 }
897 }
898
899 /**
900 * \brief Swap the two rows rowa and rowb.
901 *
902 * \param M Matrix
903 * \param rowa Row index.
904 * \param rowb Row index.
905 *
906 * \ingroup RowOperations
907 */
908
mzed_row_swap(mzed_t * M,const rci_t rowa,const rci_t rowb)909 static inline void mzed_row_swap(mzed_t *M, const rci_t rowa, const rci_t rowb) {
910 mzd_row_swap(M->x, rowa, rowb);
911 }
912
913 /**
914 * \brief copy row j from A to row i from B.
915 *
916 * The the number of columns of A must be less than or equal to the number of columns of B.
917 *
918 * \param B Target matrix.
919 * \param i Target row index.
920 * \param A Source matrix.
921 * \param j Source row index.
922 *
923 * \ingroup RowOperations
924 */
925
mzed_copy_row(mzed_t * B,rci_t i,const mzed_t * A,rci_t j)926 static inline void mzed_copy_row(mzed_t* B, rci_t i, const mzed_t* A, rci_t j) {
927 mzd_copy_row(B->x, i, A->x, j);
928 }
929
930 /**
931 * \brief Swap the two columns cola and colb.
932 *
933 * \param M Matrix.
934 * \param cola Column index.
935 * \param colb Column index.
936 *
937 * \ingroup RowOperations
938 */
939
mzed_col_swap(mzed_t * M,const rci_t cola,const rci_t colb)940 static inline void mzed_col_swap(mzed_t *M, const rci_t cola, const rci_t colb) {
941 for(rci_t i=0; i<M->w; i++)
942 mzd_col_swap(M->x,M->w*cola+i, M->w*colb+i);
943 }
944
945 /**
946 * \brief Swap the two columns cola and colb but only between start_row and stop_row.
947 *
948 * \param A Matrix.
949 * \param cola Column index.
950 * \param colb Column index.
951 * \param start_row Row index.
952 * \param stop_row Row index (exclusive).
953 *
954 * \ingroup RowOperations
955 */
956
mzed_col_swap_in_rows(mzed_t * A,const rci_t cola,const rci_t colb,const rci_t start_row,rci_t stop_row)957 static inline void mzed_col_swap_in_rows(mzed_t *A, const rci_t cola, const rci_t colb, const rci_t start_row, rci_t stop_row) {
958 for(unsigned int e=0; e < A->finite_field->degree; e++) {
959 mzd_col_swap_in_rows(A->x, A->w*cola+e, A->w*colb+e, start_row, stop_row);
960 };
961 }
962
963 /**
964 * \brief Add the rows sourcerow and destrow and stores the total in
965 * the row destrow.
966 *
967 * \param M Matrix
968 * \param sourcerow Index of source row
969 * \param destrow Index of target row
970 *
971 * \note this can be done much faster with mzed_combine.
972 *
973 * \ingroup RowOperations
974 */
975
mzed_row_add(mzed_t * M,const rci_t sourcerow,const rci_t destrow)976 static inline void mzed_row_add(mzed_t *M, const rci_t sourcerow, const rci_t destrow) {
977 mzd_row_add(M->x, sourcerow, destrow);
978 }
979
980 /**
981 * \brief Return the first row with all zero entries.
982 *
983 * If no such row can be found returns nrows.
984 *
985 * \param A Matrix
986 *
987 * \ingroup RowOperations
988 */
989
mzed_first_zero_row(mzed_t * A)990 static inline rci_t mzed_first_zero_row(mzed_t *A) {
991 return mzd_first_zero_row(A->x);
992 }
993
994
995 /**
996 * \brief Gaussian elimination.
997 *
998 * Perform Gaussian elimination on the matrix A. If full=0, then it
999 * will do triangular style elimination, and if full=1, it will do
1000 * Gauss-Jordan style, or full elimination.
1001 *
1002 * \param A Matrix
1003 * \param full Gauss-Jordan style or upper unit-triangular form only.
1004 *
1005 * \ingroup Echelon
1006 */
1007
1008 rci_t mzed_echelonize_naive(mzed_t *A, int full);
1009
1010 /**
1011 * \brief Print a matrix to stdout.
1012 *
1013 * \param M Matrix
1014 *
1015 * \ingroup StringConversions
1016 */
1017
1018 void mzed_print(const mzed_t *M);
1019
1020 #endif //M4RIE_MATRIX_H
1021