1 /* ffpack.h
2  * Copyright (C) 2005 Clement Pernet
3  *               2014 FFLAS-FFPACK group
4  *
5  * Written by Clement Pernet <Clement.Pernet@imag.fr>
6  * Brice Boyer (briceboyer) <boyer.brice@gmail.com>
7  *
8  *
9  * ========LICENCE========
10  * This file is part of the library FFLAS-FFPACK.
11  *
12  * FFLAS-FFPACK is free software: you can redistribute it and/or modify
13  * it under the terms of the  GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with this library; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
25  * ========LICENCE========
26  *.
27  */
28 
29 /** @file ffpack.h
30  * \brief Set of elimination based routines for dense linear algebra.
31  * Matrices are supposed over finite prime field of characteristic less than 2^26.
32 
33 */
34 
35 #ifndef __FFLASFFPACK_ffpack_H
36 #define __FFLASFFPACK_ffpack_H
37 
38 #include "givaro/givpoly1.h"
39 #include <fflas-ffpack/fflas-ffpack-config.h>
40 
41 #ifdef __FFLASFFPACK_USE_OPENMP
42 #include <omp.h>
43 #endif
44 
45 #include "fflas-ffpack/fflas/fflas.h"
46 #include "fflas-ffpack/fflas/fflas_helpers.inl"
47 
48 //#include "parallel.h"
49 #include <list>
50 #include <vector>
51 #include <iostream> // std::cout
52 #include <algorithm>
53 
54 #define  __FFLASFFPACK_FTRSTR_THRESHOLD 64
55 #define  __FFLASFFPACK_FTRSSYR2K_THRESHOLD 64
56 
57 /** @brief <b>F</b>inite <b>F</b>ield <b>PACK</b>
58  * Set of elimination based routines for dense linear algebra.
59  *
60  * This namespace enlarges the set of BLAS routines of the class FFLAS, with higher
61  * level routines based on elimination.
62  \ingroup ffpack
63  */
64 namespace FFPACK  { /* tags */
65 
66     /* \cond */
67     enum FFPACK_LU_TAG
68     {
69         FfpackSlabRecursive = 1,
70         FfpackTileRecursive = 2,
71         FfpackSingular = 3,
72         FfpackGaussJordanSlab = 4,
73         FfpackGaussJordanTile = 5
74     };
75 
76     enum FFPACK_CHARPOLY_TAG
77     {
78         FfpackAuto = 0,
79         FfpackDanilevski = 1,
80         FfpackLUK = 2,
81         FfpackArithProgKrylovPrecond = 3,
82         FfpackArithProg = 4,
83         FfpackKG = 5,
84         FfpackKGFast = 6,
85         FfpackHybrid = 7,
86         FfpackKGFastG = 8
87     };
88     /* \endcond */
89     class CharpolyFailed{};
90 
91     /* \cond */
92     enum FFPACK_MINPOLY_TAG
93     {
94         FfpackDense=1,
95         FfpackKGF=2
96     };
97     /* \endcond */
98 
99 }
100 namespace FFPACK { /* Permutations */
101 
102     /*****************/
103     /* PERMUTATIONS  */
104     /*****************/
105 
106 
107     void LAPACKPerm2MathPerm (size_t * MathP, const size_t * LapackP,
108                               const size_t N);
109 
110     void MathPerm2LAPACKPerm (size_t * LapackP, const size_t * MathP,
111                               const size_t N);
112 
113     /* \cond */
114     template <class Field>
115     void MatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda, const size_t width,
116                        const size_t M2,
117                        const size_t R1, const size_t R2,
118                        const size_t R3, const size_t R4);
119 
120     template <class Field>
121     void MatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda,
122                        const size_t width, const size_t M2,
123                        const size_t R1, const size_t R2,
124                        const size_t R3, const size_t R4,
125                        const FFLAS::ParSeqHelper::Sequential seq);
126 
127     template <class Field, class Cut, class Param>
128     void MatrixApplyS (const Field& F, typename Field::Element_ptr A, const size_t lda,
129                        const size_t width, const size_t M2,
130                        const size_t R1, const size_t R2,
131                        const size_t R3, const size_t R4,
132                        const FFLAS::ParSeqHelper::Parallel<Cut, Param> par);
133 
134     template <class Element>
135     void PermApplyS (Element* A, const size_t lda, const size_t width,
136                      const size_t M2,
137                      const size_t R1, const size_t R2,
138                      const size_t R3, const size_t R4);
139 
140     template <class Field>
141     void MatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda, const size_t width,
142                        const size_t N2,
143                        const size_t R1, const size_t R2,
144                        const size_t R3, const size_t R4);
145 
146     template <class Field>
147     void MatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda,
148                        const size_t width, const size_t N2,
149                        const size_t R1, const size_t R2,
150                        const size_t R3, const size_t R4,
151                        const FFLAS::ParSeqHelper::Sequential seq);
152 
153     template <class Field, class Cut, class Param>
154     void MatrixApplyT (const Field& F, typename Field::Element_ptr A, const size_t lda,
155                        const size_t width, const size_t N2,
156                        const size_t R1, const size_t R2,
157                        const size_t R3, const size_t R4,
158                        const FFLAS::ParSeqHelper::Parallel<Cut, Param> par);
159 
160     template <class Element>
161     void PermApplyT (Element* A, const size_t lda, const size_t width,
162                      const size_t N2,
163                      const size_t R1, const size_t R2,
164                      const size_t R3, const size_t R4);
165     /* \endcond */
166 
167     /**
168      * @brief Computes P1 x Diag (I_R, P2) where P1 is a LAPACK and P2 a LAPACK permutation
169      * and store the result in P1 as a LAPACK permutation
170      * @param [inout] P1 a LAPACK permutation of size N
171      * @param P2 a LAPACK permutation of size N-R
172      */
173 
174     /* \cond */
175     inline void composePermutationsLLL (size_t * P1,
176                                         const size_t * P2,
177                                         const size_t R, const size_t N);
178 
179     /**
180      * @brief Computes P1 x Diag (I_R, P2) where P1 is a LAPACK and P2 a LAPACK permutation
181      * and store the result in MathP as a MathPermutation format.
182      * @param [out] MathP a MathPermutation of size N
183      * @param P1 a LAPACK permutation of size N
184      * @param P2 a LAPACK permutation of size N-R
185      */
186     inline void composePermutationsLLM (size_t * MathP,
187                                         const size_t * P1,
188                                         const size_t * P2,
189                                         const size_t R, const size_t N);
190 
191     /**
192      * @brief Computes MathP1 x Diag (I_R, P2) where MathP1 is a MathPermutation and P2 a LAPACK permutation
193      * and store the result in MathP1 as a MathPermutation format.
194      * @param [inout] MathP1 a MathPermutation of size N
195      * @param P2 a LAPACK permutation of size N-R
196      */
197     inline void composePermutationsMLM (size_t * MathP1,
198                                         const size_t * P2,
199                                         const size_t R, const size_t N);
200 
201     void cyclic_shift_mathPerm (size_t * P,  const size_t s);
202     template<typename Base_t>
203     void cyclic_shift_row_col(Base_t * A, size_t m, size_t n, size_t lda);
204     template<class Field>
205     void cyclic_shift_row(const Field& F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda);
206     template<class Field>
207     void cyclic_shift_col(const Field& F, typename Field::Element_ptr A, size_t m, size_t n, size_t lda);
208     /* \endcond */
209 
210     /**
211      * @brief Applies a permutation P to the matrix A.
212      * Apply a permutation P, stored in the LAPACK format (a sequence of transpositions)
213      * between indices ibeg and iend of P to (iend-ibeg) vectors of size M stored in A (as column for NoTrans and rows for Trans).
214      * Side==FFLAS::FflasLeft for row permutation Side==FFLAS::FflasRight for a column permutation
215      * Trans==FFLAS::FflasTrans for the inverse permutation of P
216      * @param F base field
217      * @param Side  decides if rows (FflasLeft) or columns (FflasRight) are permuted
218      * @param Trans decides if the matrix is seen as columns (FflasTrans) or rows (FflasNoTrans)
219      * @param M size of the elements to permute
220      * @param ibeg first index to consider in P
221      * @param iend last index to consider in P
222      * @param A input matrix
223      * @param lda leading dimension of A
224      * @param P permutation in LAPACK format
225      * @param psh (optional): a sequential or parallel helper, to choose between sequential or parallel execution
226      * @warning not sure the submatrix is still a permutation and the one we expect in all cases... examples for iend=2, ibeg=1 and P=[2,2,2]
227      */
228     template<class Field>
229     void applyP( const Field& F,
230                  const FFLAS::FFLAS_SIDE Side,
231                  const FFLAS::FFLAS_TRANSPOSE Trans,
232                  const size_t M, const size_t ibeg, const size_t iend,
233                  typename Field::Element_ptr A, const size_t lda, const size_t * P );
234 
235     template<class Field>
236     void applyP( const Field& F,
237                  const FFLAS::FFLAS_SIDE Side,
238                  const FFLAS::FFLAS_TRANSPOSE Trans,
239                  const size_t m, const size_t ibeg, const size_t iend,
240                  typename Field::Element_ptr A, const size_t lda, const size_t * P,
241                  const FFLAS::ParSeqHelper::Sequential seq);
242 
243     template<class Field, class Cut, class Param>
244     void applyP( const Field& F,
245                  const FFLAS::FFLAS_SIDE Side,
246                  const FFLAS::FFLAS_TRANSPOSE Trans,
247                  const size_t m, const size_t ibeg, const size_t iend,
248                  typename Field::Element_ptr A, const size_t lda, const size_t * P,
249                  const FFLAS::ParSeqHelper::Parallel<Cut, Param> par);
250 
251     /** Apply a R-monotonically increasing permutation P, to the matrix A.
252      * The permutation represented by P is defined as follows:
253      *  - the first R values of P is a LAPACK reprensentation (a sequence of transpositions)
254      *  - the remaining iend-ibeg-R values of the permutation are in a monotonically increasing progression
255      * Side==FFLAS::FflasLeft for row permutation Side==FFLAS::FflasRight for a column permutation
256      * Trans==FFLAS::FflasTrans for the inverse permutation of P
257      * @param F	base field
258      * @param Side selects if it is a row (FflasLeft) or column (FflasRight) permutation
259      * @param Trans inverse permutation (FflasTrans/NoTrans)
260      * @param M
261      * @param ibeg
262      * @param iend
263      * @param A input matrix
264      * @param lda leading dimension of A
265      * @param P LAPACK permuation
266      * @param R first values of P
267      */
268     template<class Field>
269     void
270     MonotonicApplyP (const Field& F,
271                      const FFLAS::FFLAS_SIDE Side,
272                      const FFLAS::FFLAS_TRANSPOSE Trans,
273                      const size_t M, const size_t ibeg, const size_t iend,
274                      typename Field::Element_ptr A, const size_t lda, const size_t * P, const size_t R);
275     /* \cond */
276     template<class Field>
277     void
278     MonotonicCompress (const Field& F,
279                        const FFLAS::FFLAS_SIDE Side,
280                        const size_t M,
281                        typename Field::Element_ptr A, const size_t lda, const size_t incA, const size_t * P,
282                        const size_t R, const size_t maxpiv, const size_t rowstomove,
283                        const std::vector<bool> &ispiv);
284     template<class Field>
285     void
286     MonotonicCompressMorePivots (const Field& F, const FFLAS::FFLAS_SIDE Side, const size_t M,
287                                  typename Field::Element_ptr A, const size_t lda, const size_t incA,
288                                  const size_t * MathP, const size_t R, const size_t rowstomove, const size_t lenP);
289     template<class Field>
290     void
291     MonotonicCompressCycles (const Field& F, const FFLAS::FFLAS_SIDE Side, const size_t M,
292                              typename Field::Element_ptr A, const size_t lda, const size_t incA,
293                              const size_t * MathP, const size_t lenP);
294 
295     template<class Field>
296     void
297     MonotonicExpand (const Field& F, const FFLAS::FFLAS_SIDE Side, const size_t M,
298                      typename Field::Element_ptr A, const size_t lda, const size_t incA,
299                      const size_t * MathP, const size_t R, const size_t maxpiv,
300                      const size_t rowstomove, const std::vector<bool> &ispiv);
301     /* \endcond */
302 
303 } // FFPACK permutations
304 // #include "ffpack_permutation.inl"
305 
306 namespace FFPACK { /* fgetrs, fgesv */
307 
308     /** Solve the system \f$A X = B\f$ or \f$X A = B\f$.
309      * Solving using the \c PLUQ decomposition of \p A
310      * already computed inplace with \c PLUQ (FFLAS::FflasNonUnit).
311      * Version for A square.
312      * If A is rank deficient, a solution is returned if the system is consistent,
313      * Otherwise an info is 1
314      *
315      * @param F base field
316      * @param Side Determine wheter the resolution is left (FflasLeft) or right (FflasRight) looking.
317      * @param M row dimension of \p B
318      * @param N col dimension of \p B
319      * @param R rank of \p A
320      * @param A input matrix
321      * @param lda leading dimension of \p A
322      * @param P row permutation of the \c PLUQ decomposition of \p A
323      * @param Q column permutation of the \c PLUQ decomposition of \p A
324      * @param B Right/Left hand side matrix. Initially stores \p B, finally stores the solution \p X.
325      * @param ldb leading dimension of \p B
326      * @param info Success of the computation: 0 if successfull, >0 if system is inconsistent
327      */
328     template <class Field>
329     void
330     fgetrs (const Field& F,
331             const FFLAS::FFLAS_SIDE Side,
332             const size_t M, const size_t N, const size_t R,
333             typename Field::Element_ptr A, const size_t lda,
334             const size_t *P, const size_t *Q,
335             typename Field::Element_ptr B, const size_t ldb,
336             int * info);
337 
338     /** Solve the system A X = B or X A = B.
339      * Solving using the PLUQ decomposition of A
340      * already computed inplace with PLUQ(FFLAS::FflasNonUnit).
341      * Version for A rectangular.
342      * If A is rank deficient, a solution is returned if the system is consistent,
343      * Otherwise an info is 1
344      *
345      * @param F base field
346      * @param Side Determine wheter the resolution is left (FflasLeft) or right (FflasRight) looking.
347      * @param M row dimension of A
348      * @param N col dimension of A
349      * @param NRHS number of columns (if Side = FFLAS::FflasLeft) or row (if Side = FFLAS::FflasRight) of the matrices X and B
350      * @param R rank of A
351      * @param A input matrix
352      * @param lda leading dimension of A
353      * @param P row permutation of the PLUQ decomposition of A
354      * @param Q column permutation of the PLUQ decomposition of A
355      * @param X solution matrix
356      * @param ldx leading dimension of X
357      * @param B Right/Left hand side matrix.
358      * @param ldb leading dimension of B
359      * @param info Succes of the computation: 0 if successfull, >0 if system is inconsistent
360      */
361     template <class Field>
362     typename Field::Element_ptr
363     fgetrs (const Field& F,
364             const FFLAS::FFLAS_SIDE Side,
365             const size_t M, const size_t N, const size_t NRHS, const size_t R,
366             typename Field::Element_ptr A, const size_t lda,
367             const size_t *P, const size_t *Q,
368             typename Field::Element_ptr X, const size_t ldx,
369             typename Field::ConstElement_ptr B, const size_t ldb,
370             int * info);
371 
372     /** @brief Square system solver
373      * @param F The computation domain
374      * @param Side Determine wheter the resolution is left (FflasLeft) or right (FflasRight) looking
375      * @param M row dimension of B
376      * @param N col dimension of B
377      * @param A input matrix
378      * @param lda leading dimension of A
379      * @param B Right/Left hand side matrix. Initially contains B, finally contains the solution X.
380      * @param ldb leading dimension of B
381      * @param info Success of the computation: 0 if successfull, >0 if system is inconsistent
382      * @return the rank of the system
383      *
384      * Solve the system A X = B or X A = B.
385      * Version for A square.
386      * If A is rank deficient, a solution is returned if the system is consistent,
387      * Otherwise an info is 1
388      */
389     template <class Field>
390     size_t
391     fgesv (const Field& F,
392            const FFLAS::FFLAS_SIDE Side,
393            const size_t M, const size_t N,
394            typename Field::Element_ptr A, const size_t lda,
395            typename Field::Element_ptr B, const size_t ldb,
396            int * info);
397 
398     /**  @brief Rectangular system solver
399      * @param F The computation domain
400      * @param Side Determine wheter the resolution is left (FflasLeft) or right (FflasRight) looking
401      * @param M row dimension of A
402      * @param N col dimension of A
403      * @param NRHS number of columns (if Side = FFLAS::FflasLeft) or row (if Side = FFLAS::FflasRight) of the matrices X and B
404      * @param A input matrix
405      * @param lda leading dimension of A
406      * @param B Right/Left hand side matrix. Initially contains B, finally contains the solution X.
407      * @param ldb leading dimension of B
408      * @param X
409      * @param ldx
410      * @param info Success of the computation: 0 if successfull, >0 if system is inconsistent
411      * @return the rank of the system
412      *
413      * Solve the system A X = B or X A = B.
414      * Version for A square.
415      * If A is rank deficient, a solution is returned if the system is consistent,
416      * Otherwise an info is 1
417      */
418     template <class Field>
419     size_t
420     fgesv (const Field& F,
421            const FFLAS::FFLAS_SIDE Side,
422            const size_t M, const size_t N, const size_t NRHS,
423            typename Field::Element_ptr A, const size_t lda,
424            typename Field::Element_ptr X, const size_t ldx,
425            typename Field::ConstElement_ptr B, const size_t ldb,
426            int * info);
427 } // FFPACK fgesv, fgetrs
428 // #include "ffpack_fgesv.inl"
429 // #include "ffpack_fgetrs.inl"
430 
431 namespace FFPACK { /* ftrtr */
432 
433     /** Compute the inverse of a triangular matrix.
434      * @param F base field
435      * @param Uplo whether the matrix is upper or lower triangular
436      * @param Diag whether the matrix is unit diagonal (FflasUnit/NoUnit)
437      * @param N input matrix order
438      * @param A the input matrix
439      * @param lda leading dimension of A
440      *
441      */
442     template<class Field>
443     void
444     ftrtri (const Field& F, const FFLAS::FFLAS_UPLO Uplo, const FFLAS::FFLAS_DIAG Diag,
445             const size_t N, typename Field::Element_ptr A, const size_t lda,
446             const size_t threshold = __FFLASFFPACK_FTRTRI_THRESHOLD);
447 
448 
449     template<class Field>
450     void trinv_left( const Field& F, const size_t N, typename Field::ConstElement_ptr L, const size_t ldl,
451                      typename Field::Element_ptr X, const size_t ldx );
452 
453     /**  @brief Compute the product of two triangular matrices of opposite shape.
454      * Product UL or LU of the upper, resp lower triangular matrices U and L
455      * stored one above the other in the square matrix A.
456      * @param F base field
457      * @param Side set to FflasLeft to compute the product UL, FflasRight to compute LU
458      * @param diag whether the matrix U is unit diagonal (FflasUnit/NoUnit)
459      * @param N input matrix order
460      * @param A the input matrix
461      * @param lda leading dimension of A
462      *
463      */
464     template<class Field>
465     void
466     ftrtrm (const Field& F, const FFLAS::FFLAS_SIDE side, const FFLAS::FFLAS_DIAG diag,
467             const size_t N,	typename Field::Element_ptr A, const size_t lda);
468 
469     /** @brief Solve a triangular system with a triangular right hand side of the same shape.
470      * @param F base field
471      * @param Side set to FflasLeft to compute U1^-1*U2 or L1^-1*L2, FflasRight to compute U1*U2^-1 or L1*L2^-1
472      * @param Uplo whether the matrix A is upper or lower triangular
473      * @param diag1 whether the matrix U1 or L2 is unit diagonal (FflasUnit/NoUnit)
474      * @param diag2 whether the matrix U2 or L2 is unit diagonal (FflasUnit/NoUnit)
475      * @param N order of the input matrices
476      * @param A the input matrix to be inverted (U1 or L1)
477      * @param lda leading dimension of A
478      * @param B the input right hand side (U2 or L2)
479      * @param ldb leading dimension of B
480      */
481     template<class Field>
482     void
483     ftrstr (const Field& F, const FFLAS::FFLAS_SIDE side, const FFLAS::FFLAS_UPLO Uplo,
484             const FFLAS::FFLAS_DIAG diagA, const FFLAS::FFLAS_DIAG diagB, const size_t N,
485             typename Field::ConstElement_ptr A, const size_t lda,
486             typename Field::Element_ptr B, const size_t ldb, const size_t threshold=__FFLASFFPACK_FTRSTR_THRESHOLD);
487 
488     /** @brief Solve a triangular system in a symmetric sum: find B upper/lower triangular such that A^T B + B^T A = C
489      * where C is symmetric. C is overwritten by B.
490      * @param F base field
491      * @param Side set to FflasLeft to compute U1^-1*U2 or L1^-1*L2, FflasRight to compute U1*U2^-1 or L1*L2^-1
492      * @param Uplo whether the matrix A is upper or lower triangular
493      * @param diagA whether the matrix A is unit diagonal (FflasUnit/NoUnit)
494      * @param N order of the input matrices
495      * @param A the input matrix
496      * @param lda leading dimension of A
497      * @param [inout] B the input right hand side where the output is written
498      * @param ldb leading dimension of B
499      */
500     template<class Field>
501     void
502     ftrssyr2k (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
503                const FFLAS::FFLAS_DIAG diagA, const size_t N,
504                typename Field::ConstElement_ptr A, const size_t lda,
505                typename Field::Element_ptr B, const size_t ldb, const size_t threshold=__FFLASFFPACK_FTRSSYR2K_THRESHOLD);
506 
507 } // FFPACK ftrtr
508 // #include "ffpack_ftrtr.inl"
509 
510 namespace FFPACK {
511 
512     /* LDLT or UTDU factorizations */
513 
514     /** @brief Triangular factorization of symmetric matrices
515      * @param F The computation domain
516      * @param UpLo Determine wheter to store the upper (FflasUpper) or lower (FflasLower) triangular factor
517      * @param N order of the matrix A
518      * @param [inout] A input matrix
519      * @param lda leading dimension of A
520      * @return false if the \p A does not have generic rank profile, making the computation fail.
521      *
522      * Compute the a triangular factorization of the matrix A: \f$ A = L \times D \times  L^T\f$ if UpLo = FflasLower or
523      * \f$ A = U^T \times D \times  U\f$ otherwise. \p D is a diagonal matrix. The matrices \p L and \p U are unit
524      * diagonal lower (resp. upper) triangular and overwrite the input matrix \p A.
525      * The matrix \p D is stored on the diagonal of \p A, as the diagonal of \p L or \p U is known to be all ones.
526      * If A does not have generic rank profile, the LDLT or UTDU factorizations is not defined, and the algorithm
527      * returns false.
528      */
529     template <class Field>
530     bool fsytrf (const Field& F, const FFLAS::FFLAS_UPLO UpLo, const size_t N,
531                  typename Field::Element_ptr A, const size_t lda,
532                  const size_t threshold = __FFLASFFPACK_FSYTRF_THRESHOLD);
533 
534     template <class Field>
535     bool fsytrf (const Field& F, const FFLAS::FFLAS_UPLO UpLo, const size_t N,
536                  typename Field::Element_ptr A, const size_t lda,
537                  const FFLAS::ParSeqHelper::Sequential seq,
538                  const size_t threshold = __FFLASFFPACK_FSYTRF_THRESHOLD);
539 
540     template <class Field, class Cut, class Param>
541     bool fsytrf (const Field& F, const FFLAS::FFLAS_UPLO UpLo, const size_t N,
542                  typename Field::Element_ptr A, const size_t lda,
543                  const FFLAS::ParSeqHelper::Parallel<Cut,Param> par,
544                  const size_t threshold = __FFLASFFPACK_FSYTRF_THRESHOLD);
545 
546     /* LDLT or UTDU factorizations */
547 
548     /** @brief Triangular factorization of symmetric matrices
549      * @param F The computation domain
550      * @param UpLo Determine wheter to store the upper (FflasUpper) or lower (FflasLower) triangular factor
551      * @param N order of the matrix A
552      * @param [inout] A input matrix
553      * @param [inout] D
554      * @param lda leading dimension of A
555      * @return false if the \p A does not have generic rank profile, making the computation fail.
556      *
557      * Compute the a triangular factorization of the matrix A: \f$ A = L \times Dinv \times  L^T\f$ if UpLo = FflasLower
558      * or \f$ A = U^T \times D \times  U\f$ otherwise. \p D is a diagonal matrix. The matrices \p L and \p U are
559      * lower (resp. upper) triangular and overwrite the input matrix \p A.
560      * The matrix \p D need to be stored separately, as the diagonal of \p L or \p U are not unit.
561      * If A does not have generic rank profile, the LDLT or UTDU factorizations is not defined, and the algorithm
562      * returns false.
563      */
564     template <class Field>
565     bool fsytrf_nonunit (const Field& F, const FFLAS::FFLAS_UPLO UpLo, const size_t N,
566                          typename Field::Element_ptr A, const size_t lda,
567                          typename Field::Element_ptr D, const size_t incD,
568                          const size_t threshold = __FFLASFFPACK_FSYTRF_THRESHOLD);
569     /* PLUQ */
570 
571     /** @brief Compute a PLUQ factorization of the given matrix.
572      * Return its rank.
573      * The permutations P and Q are represented
574      * using LAPACK's convention.
575      * @param F base field
576      * @param Diag   whether U should have a unit diagonal (FflasUnit) or not (FflasNoUnit)
577      * @param M matrix row dimension
578      * @param N matrix column dimension
579      * @param A input matrix
580      * @param lda leading dimension of \p A
581      * @param P the row permutation
582      * @param Q the column permutation
583 
584      * @return the rank of \p A
585      * @bib
586      * - Dumas J-G.,  Pernet C., and Sultan Z. <i>\c Simultaneous computation of the row and column rank profiles </i>, ISSAC'13, 2013
587      * .
588      */
589     template<class Field>
590     size_t PLUQ (const Field& F, const FFLAS::FFLAS_DIAG Diag,
591                  const size_t M, const size_t N,
592                  typename Field::Element_ptr A, const size_t lda,
593                  size_t*P, size_t *Q);
594 
595     template<class Field>
596     size_t pPLUQ (const Field& F, const FFLAS::FFLAS_DIAG Diag,
597                  const size_t M, const size_t N,
598                  typename Field::Element_ptr A, const size_t lda,
599                  size_t*P, size_t *Q);
600 
601     template<class Field>
602     size_t PLUQ (const Field& F, const FFLAS::FFLAS_DIAG Diag,
603                  const size_t M, const size_t N,
604                  typename Field::Element_ptr A, const size_t lda,
605                  size_t*P, size_t *Q, const FFLAS::ParSeqHelper::Sequential& PSHelper,
606                  size_t BCThreshold = __FFLASFFPACK_PLUQ_THRESHOLD);
607 
608     template<class Field, class Cut, class Param>
609     size_t PLUQ (const Field& F, const FFLAS::FFLAS_DIAG Diag,
610                  const size_t M, const size_t N,
611                  typename Field::Element_ptr A, const size_t lda,
612                  size_t*P, size_t *Q, const FFLAS::ParSeqHelper::Parallel<Cut,Param>& PSHelper);
613 
614 } // FFPACK PLUQ
615 // #include "ffpack_pluq.inl"
616 
617 namespace FFPACK { /* ludivine */
618 
619     /** @brief Compute the CUP or PLE factorization of the given matrix.
620      * Using
621      * a block algorithm and return its rank.
622      * The permutations P and Q are represented
623      * using LAPACK's convention.
624      * @param F base field
625      * @param Diag  whether the transformation matrix (U of the CUP, L of the PLE) should have a unit diagonal (FflasUnit)
626      * or not (FflasNoUnit)
627      * @param trans whether to compute the CUP decomposition (FflasNoTrans) or the PLE decomposition (FflasTrans)
628      * @param M matrix row dimension
629      * @param N matrix column dimension
630      * @param A input matrix
631      * @param lda leading dimension of \p A
632      * @param P the factor of CUP or PLE
633      * @param Q a permutation indicating the pivot position in the echelon form C or E in its first r positions
634      * @param LuTag flag for setting the earling termination if the matrix
635      * is singular
636      * @param cutoff threshold to basecase
637      *
638      * @return the rank of \p A
639      * @bib
640      * - Jeannerod C-P, Pernet, C. and Storjohann, A. <i>\c Rank-profile revealing Gaussian elimination and the CUP matrix decomposition  </i>, J. of Symbolic Comp., 2013
641      * - Pernet C, Brassel M <i>\c LUdivine, une divine factorisation \c LU</i>, 2002
642      * .
643      */
644     template <class Field>
645     size_t
646     LUdivine (const Field& F, const FFLAS::FFLAS_DIAG Diag,  const FFLAS::FFLAS_TRANSPOSE trans,
647               const size_t M, const size_t N,
648               typename Field::Element_ptr A, const size_t lda,
649               size_t* P, size_t* Qt,
650               const FFPACK_LU_TAG LuTag = FfpackSlabRecursive,
651               const size_t cutoff=__FFLASFFPACK_LUDIVINE_THRESHOLD);
652 
653     /* \cond */
654     template<class Element>
655     class callLUdivine_small;
656 
657     //! LUdivine small case
658     template <class Field>
659     size_t
660     LUdivine_small (const Field& F, const FFLAS::FFLAS_DIAG Diag,  const FFLAS::FFLAS_TRANSPOSE trans,
661                     const size_t M, const size_t N,
662                     typename Field::Element_ptr A, const size_t lda,
663                     size_t* P, size_t* Q,
664                     const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
665 
666     //! LUdivine gauss
667     template <class Field>
668     size_t
669     LUdivine_gauss (const Field& F, const FFLAS::FFLAS_DIAG Diag,
670                     const size_t M, const size_t N,
671                     typename Field::Element_ptr A, const size_t lda,
672                     size_t* P, size_t* Q,
673                     const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
674 
675     /* \endcond */
676     namespace Protected {
677 
678 
679 
680         //---------------------------------------------------------------------
681         // LUdivine_construct: (Specialisation of LUdivine)
682         // LUP factorisation of X, the Krylov base matrix of A^t and v, in A.
683         // X contains the nRowX first vectors v, vA, .., vA^{nRowX-1}
684         // A contains the LUP factorisation of the nUsedRowX first row of X.
685         // When all rows of X have been factorized in A, and rank is full,
686         // then X is updated by the following scheme: X <= ( X; X.B ), where
687         // B = A^2^i.
688         // This enables to make use of Matrix multiplication, and stop computing
689         // Krylov vector, when the rank is not longer full.
690         // P is the permutation matrix stored in an array of indexes
691         //---------------------------------------------------------------------
692 
693         template <class Field>
694         size_t
695         LUdivine_construct( const Field& F, const FFLAS::FFLAS_DIAG Diag,
696                             const size_t M, const size_t N,
697                             typename Field::ConstElement_ptr A, const size_t lda,
698                             typename Field::Element_ptr X, const size_t ldx,
699                             typename Field::Element_ptr u, const size_t incu, size_t* P,
700                             bool computeX, const FFPACK_MINPOLY_TAG MinTag= FfpackDense
701                             , const size_t kg_mc =0
702                             , const size_t kg_mb =0
703                             , const size_t kg_j  =0);
704 
705     } // Protected
706 
707 } //FFPACK ludivine, turbo
708 // #include "ffpack_ludivine.inl"
709 
710 namespace FFPACK { /* echelon */
711     /*****************/
712     /* ECHELON FORMS */
713     /*****************/
714 
715     /** Compute the Column Echelon form of the input matrix in-place.
716      *
717      * If LuTag == FfpackTileRecursive, then after the computation A = [ M \ V ]
718      * such that AU = C is a column echelon decomposition of A,
719      * with U = P^T [   V    ] and C = M + Q [ Ir ]
720      *              [ 0 In-r ]               [ 0  ]
721      * If LuTag == FfpackTileRecursive then A = [ N \ V ] such that the same holds with M = Q N
722      *
723      * Qt = Q^T
724      * If transform=false, the matrix V is not computed.
725      * See also test-colechelon for an example of use
726      * @param F base field
727      * @param M number of rows
728      * @param N number of columns
729      * @param[in] A input matrix
730      * @param lda leading dimension of A
731      * @param P the column permutation
732      * @param Qt the row position of the pivots in the echelon form
733      * @param transform decides whether V is computed
734      * @param LuTag chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
735      */
736     template <class Field>
737     size_t
738     ColumnEchelonForm (const Field& F, const size_t M, const size_t N,
739                        typename Field::Element_ptr A, const size_t lda,
740                        size_t* P, size_t* Qt, bool transform=false,
741                        const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
742 
743     template <class Field>
744     size_t
745     pColumnEchelonForm (const Field& F, const size_t M, const size_t N,
746                         typename Field::Element_ptr A, const size_t lda,
747                         size_t* P, size_t* Qt, bool transform=false,
748                         size_t numthreads = 0, const FFPACK_LU_TAG LuTag=FfpackTileRecursive);
749 
750     template <class Field, class PSHelper>
751     size_t
752     ColumnEchelonForm (const Field& F, const size_t M, const size_t N,
753                               typename Field::Element_ptr A, const size_t lda,
754                               size_t* P, size_t* Qt, const bool transform,
755                               const FFPACK_LU_TAG LuTag, const PSHelper& psH);
756 
757 
758 
759     /**  Compute the Row Echelon form of the input matrix in-place.
760      *
761      * If LuTag == FfpackTileRecursive, then after the computation A = [ L \ M ]
762      * such that X A = R is a row echelon decomposition of A,
763      * with X =  [ L  0   ] P  and R = M + [Ir 0] Q^T
764      *           [    In-r]
765      * If LuTag == FfpackTileRecursive then A = [ L \ N ] such that the same holds with M =  N Q^T
766      * Qt = Q^T
767      * If transform=false, the matrix L is not computed.
768      * See also test-rowechelon for an example of use
769      * @param F base field
770      * @param M number of rows
771      * @param N number of columns
772      * @param[in] A the input matrix
773      * @param lda leading dimension of A
774      * @param P the row permutation
775      * @param Qt the column position of the pivots in the echelon form
776      * @param transform decides whether L is computed
777      * @param LuTag chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
778      */
779     template <class Field>
780     size_t
781     RowEchelonForm (const Field& F, const size_t M, const size_t N,
782                     typename Field::Element_ptr A, const size_t lda,
783                     size_t* P, size_t* Qt, const bool transform=false,
784                     const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
785 
786     template <class Field>
787     size_t
788     pRowEchelonForm (const Field& F, const size_t M, const size_t N,
789                      typename Field::Element_ptr A, const size_t lda,
790                      size_t* P, size_t* Qt, const bool transform=false,
791                      size_t numthreads = 0, const FFPACK_LU_TAG LuTag=FfpackTileRecursive);
792 
793     template <class Field, class PSHelper>
794     size_t
795     RowEchelonForm (const Field& F, const size_t M, const size_t N,
796                            typename Field::Element_ptr A, const size_t lda,
797                            size_t* P, size_t* Qt, const bool transform,
798                            const FFPACK_LU_TAG LuTag, const PSHelper& psH);
799 
800 
801     /** Compute the Reduced Column Echelon form of the input matrix in-place.
802      *
803      * After the computation A = [ V   ] such that AX = R is a reduced col echelon
804      *                           [ M 0 ]
805      * decomposition of A, where X = P^T [ V      ] and R = Q [ Ir   ]
806      *                                   [ 0 In-r ]           [ M  0 ]
807      * Qt = Q^T
808      * If transform=false, the matrix X is not computed and the matrix A = R
809      *
810      * @param F base field
811      * @param M number of rows
812      * @param N number of columns
813      * @param[in] A input matrix
814      * @param lda leading dimension of A
815      * @param P the column permutation
816      * @param Qt the row position of the pivots in the echelon form
817      * @param transform decides whether X is computed
818      * @param LuTag chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
819      */
820     template <class Field>
821     size_t
822     ReducedColumnEchelonForm (const Field& F, const size_t M, const size_t N,
823                               typename Field::Element_ptr A, const size_t lda,
824                               size_t* P, size_t* Qt, const bool transform = false,
825                               const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
826 
827     template <class Field>
828     size_t
829     pReducedColumnEchelonForm (const Field& F, const size_t M, const size_t N,
830                                typename Field::Element_ptr A, const size_t lda,
831                                size_t* P, size_t* Qt, const bool transform = false,
832                                size_t numthreads = 0, const FFPACK_LU_TAG LuTag=FfpackTileRecursive);
833 
834     template <class Field, class PSHelper>
835     size_t
836     ReducedColumnEchelonForm (const Field& F, const size_t M, const size_t N,
837                               typename Field::Element_ptr A, const size_t lda,
838                               size_t* P, size_t* Qt, const bool transform,
839                               const FFPACK_LU_TAG LuTag, const PSHelper& psH);
840 
841 
842 
843     /** Compute the Reduced Row Echelon form of the input matrix in-place.
844      *
845      * After the computation A = [ V1 M ] such that X A = R is a reduced row echelon
846      *                           [ V2 0 ]
847      * decomposition of A, where X =  [ V1  0   ] P and R =  [ Ir M  ] Q^T
848      *                                [ V2 In-r ]            [ 0     ]
849      * Qt = Q^T
850      * If transform=false, the matrix X is not computed and the matrix A = R
851      * @param F base field
852      * @param M number of rows
853      * @param N number of columns
854      * @param[in] A input matrix
855      * @param lda leading dimension of A
856      * @param P the row permutation
857      * @param Qt the column position of the pivots in the echelon form
858      * @param transform decides whether X is computed
859      * @param LuTag chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
860      */
861     template <class Field>
862     size_t
863     ReducedRowEchelonForm (const Field& F, const size_t M, const size_t N,
864                            typename Field::Element_ptr A, const size_t lda,
865                            size_t* P, size_t* Qt, const bool transform = false,
866                            const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
867 
868     template <class Field>
869     size_t
870     pReducedRowEchelonForm (const Field& F, const size_t M, const size_t N,
871                            typename Field::Element_ptr A, const size_t lda,
872                            size_t* P, size_t* Qt, const bool transform = false,
873                            size_t numthreads = 0, const FFPACK_LU_TAG LuTag=FfpackTileRecursive);
874 
875     template <class Field, class PSHelper>
876     size_t
877     ReducedRowEchelonForm (const Field& F, const size_t M, const size_t N,
878                            typename Field::Element_ptr A, const size_t lda,
879                            size_t* P, size_t* Qt, const bool transform,
880                            const FFPACK_LU_TAG LuTag, const PSHelper& psH);
881 
882 
883 
884 
885     namespace Protected {
886         /**  @brief Gauss-Jordan algorithm computing the Reduced Row echelon form and its transform matrix.
887          * @bib
888          *  - Algorithm 2.8 of A. Storjohann Thesis 2000,
889          *  - Algorithm 11 of Jeannerod C-P., Pernet, C. and Storjohann, A. <i>\c Rank-profile revealing Gaussian elimination and the CUP matrix decomposition  </i>, J. of Symbolic Comp., 2013
890          * @param M row dimension of A
891          * @param N column dimension of A
892          * @param [inout] A an m x n matrix
893          * @param lda leading dimension of A
894          * @param P row permutation
895          * @param Q column permutation
896          * @param LuTag set the base case to a Tile (FfpackGaussJordanTile)  or Slab (FfpackGaussJordanSlab) recursive RedEchelon
897          */
898         template <class Field>
899         size_t
900         GaussJordan (const Field& F, const size_t M, const size_t N,
901                      typename Field::Element_ptr A, const size_t lda,
902                      const size_t colbeg, const size_t rowbeg, const size_t colsize,
903                      size_t* P, size_t* Q, const FFPACK::FFPACK_LU_TAG LuTag);
904     } // Protected
905 } // FFPACK
906 // #include "ffpack_echelonforms.inl"
907 
908 namespace FFPACK { /* invert */
909     /*****************/
910     /*   INVERSION   */
911     /*****************/
912     /**  @brief Invert the given matrix in place
913      * or computes its nullity if it is singular.
914      *
915      * An inplace \f$2n^3\f$ algorithm is used.
916      * @param F The computation domain
917      * @param M order of the matrix
918      * @param [in,out] A input matrix (\f$M \times M\f$)
919      * @param lda leading dimension of A
920      * @param nullity dimension of the kernel of A
921      * @return pointer to \f$A\f$ and \f$A \gets A^{-1}\f$
922      */
923     template <class Field>
924     typename Field::Element_ptr
925     Invert (const Field& F, const size_t M,
926             typename Field::Element_ptr A, const size_t lda,
927             int& nullity);
928 
929     /** @brief Invert the given matrix
930      * or computes its nullity if it is singular.
931      *
932      * @pre \p X is preallocated and should be large enough to store the
933      * \f$ m \times m\f$ matrix \p A.
934      *
935      * @param F The computation domain
936      * @param M order of the matrix
937      * @param [in] A input matrix (\f$M \times M\f$)
938      * @param lda leading dimension of \p A
939      * @param [out] X this is the inverse of \p A if \p A is invertible
940      * (non \c NULL and \f$ \mathtt{nullity} = 0\f$). It is untouched
941      * otherwise.
942      * @param ldx leading dimension of \p X
943      * @param nullity dimension of the kernel of \p A
944      * @return pointer to \f$X = A^{-1}\f$
945      */
946     template <class Field>
947     typename Field::Element_ptr
948     Invert (const Field& F, const size_t M,
949             typename Field::ConstElement_ptr A, const size_t lda,
950             typename Field::Element_ptr X, const size_t ldx,
951             int& nullity);
952 
953     /** @brief Invert the given matrix or computes its nullity if it is singular.
954      *
955      * An \f$2n^3f\f$ algorithm is used.
956      * This routine can be \% faster than FFPACK::Invert but is not totally inplace.
957      *
958      * @pre \p X is preallocated and should be large enough to store the
959      * \f$ m \times m\f$ matrix \p A.
960      *
961      * @warning A is overwritten here !
962      * @bug not tested.
963      * @param F the computation domain
964      * @param M order of the matrix
965      * @param [in,out] A input matrix (\f$M \times M\f$). On output, \p A
966      * is modified and represents a "psycological" factorisation \c LU.
967      * @param lda leading dimension of A
968      * @param [out] X this is the inverse of \p A if \p A is invertible
969      * (non \c NULL and \f$ \mathtt{nullity} = 0\f$). It is untouched
970      * otherwise.
971      * @param ldx leading dimension of \p X
972      * @param nullity dimension of the kernel of \p A
973      * @return pointer to \f$X = A^{-1}\f$
974      */
975     template <class Field>
976     typename Field::Element_ptr
977     Invert2( const Field& F, const size_t M,
978              typename Field::Element_ptr A, const size_t lda,
979              typename Field::Element_ptr X, const size_t ldx,
980              int& nullity);
981 
982 } // FFPACK invert
983 // #include "ffpack_invert.inl"
984 
985 namespace FFPACK { /* charpoly */
986     /*****************************/
987     /* CHARACTERISTIC POLYNOMIAL */
988     /*****************************/
989 
990 
991     /**
992      * @brief Compute the characteristic polynomial of the matrix A.
993      * @param R the polynomial ring of charp (contains the base field)
994      * @param [out] charp the characteristic polynomial of \p as a list of factors
995      * @param N order of the matrix \p A
996      * @param [in] A the input matrix (\f$ N \times N\f$) (could be overwritten in some algorithmic variants)
997      * @param lda leading dimension of \p A
998      * @param CharpTag the algorithmic variant
999      * @param G a random iterator (required for the randomized variants LUKrylov and ArithProg)
1000      */
1001     template <class PolRing>
1002     inline std::list<typename PolRing::Element>&
1003     CharPoly (const PolRing& R, std::list<typename PolRing::Element>& charp, const size_t N,
1004               typename PolRing::Domain_t::Element_ptr A, const size_t lda,
1005               typename PolRing::Domain_t::RandIter& G,
1006               const FFPACK_CHARPOLY_TAG CharpTag= FfpackAuto,
1007               const size_t degree = __FFLASFFPACK_ARITHPROG_THRESHOLD);
1008 
1009     /**
1010      * @brief Compute the characteristic polynomial of the matrix A.
1011      * @param R the polynomial ring of charp (contains the base field)
1012      * @param [out] charp the characteristic polynomial of \p as a single polynomial
1013      * @param N order of the matrix \p A
1014      * @param [in] A the input matrix (\f$ N \times N\f$) (could be overwritten in some algorithmic variants)
1015      * @param lda leading dimension of \p A
1016      * @param CharpTag the algorithmic variant
1017      * @param G a random iterator (required for the randomized variants LUKrylov and ArithProg)
1018      */
1019     template <class PolRing>
1020     inline typename PolRing::Element&
1021     CharPoly (const PolRing& R, typename PolRing::Element& charp, const size_t N,
1022               typename PolRing::Domain_t::Element_ptr A, const size_t lda,
1023               typename PolRing::Domain_t::RandIter& G,
1024               const FFPACK_CHARPOLY_TAG CharpTag= FfpackAuto,
1025               const size_t degree = __FFLASFFPACK_ARITHPROG_THRESHOLD);
1026 
1027     /**
1028      * @brief Compute the characteristic polynomial of the matrix A.
1029      * @param R the polynomial ring of charp (contains the base field)
1030      * @param [out] charp the characteristic polynomial of \p as a single polynomial
1031      * @param N order of the matrix \p A
1032      * @param [in] A the input matrix (\f$ N \times N\f$) (could be overwritten in some algorithmic variants)
1033      * @param lda leading dimension of \p A
1034      * @param CharpTag the algorithmic variant
1035      */
1036     template <class PolRing>
1037     inline typename PolRing::Element&
1038     CharPoly (const PolRing& R, typename PolRing::Element& charp, const size_t N,
1039               typename PolRing::Domain_t::Element_ptr A, const size_t lda,
1040               const FFPACK_CHARPOLY_TAG CharpTag= FfpackAuto,
1041               const size_t degree = __FFLASFFPACK_ARITHPROG_THRESHOLD){
1042         typename PolRing::Domain_t::RandIter G(R.getdomain());
1043         return CharPoly (R, charp, N, A, lda, G, CharpTag, degree);
1044     }
1045 
1046 
1047     namespace Protected {
1048         template <class Field, class Polynomial>
1049         std::list<Polynomial>&
1050         KellerGehrig( const Field& F, std::list<Polynomial>& charp, const size_t N,
1051                       typename Field::ConstElement_ptr A, const size_t lda );
1052 
1053         template <class Field, class Polynomial>
1054         int
1055         KGFast ( const Field& F, std::list<Polynomial>& charp, const size_t N,
1056                  typename Field::Element_ptr A, const size_t lda,
1057                  size_t * kg_mc, size_t* kg_mb, size_t* kg_j );
1058 
1059         template <class Field, class Polynomial>
1060         std::list<Polynomial>&
1061         KGFast_generalized (const Field& F, std::list<Polynomial>& charp,
1062                             const size_t N,
1063                             typename Field::Element_ptr A, const size_t lda);
1064 
1065 
1066         template<class Field>
1067         void
1068         fgemv_kgf( const Field& F,  const size_t N,
1069                    typename Field::ConstElement_ptr A, const size_t lda,
1070                    typename Field::ConstElement_ptr X, const size_t incX,
1071                    typename Field::Element_ptr Y, const size_t incY,
1072                    const size_t kg_mc, const size_t kg_mb, const size_t kg_j );
1073 
1074         template <class Field, class Polynomial, class RandIter>
1075         std::list<Polynomial>&
1076         LUKrylov( const Field& F, std::list<Polynomial>& charp, const size_t N,
1077                   typename Field::Element_ptr A, const size_t lda,
1078                   typename Field::Element_ptr U, const size_t ldu, RandIter& G);
1079 
1080         template <class Field, class Polynomial>
1081         std::list<Polynomial>&
1082         Danilevski (const Field& F, std::list<Polynomial>& charp,
1083                     const size_t N, typename Field::Element_ptr A, const size_t lda);
1084 
1085 
1086         template <class PolRing>
1087         inline void
1088         RandomKrylovPrecond (const PolRing& PR, std::list<typename PolRing::Element>& completedFactors, const size_t N,
1089                              typename PolRing::Domain_t::Element_ptr A, const size_t lda,
1090                              size_t& Nb, typename PolRing::Domain_t::Element_ptr& B, size_t& ldb,
1091                              typename PolRing::Domain_t::RandIter& g, const size_t degree=__FFLASFFPACK_ARITHPROG_THRESHOLD);
1092 
1093         template <class PolRing>
1094         inline std::list<typename PolRing::Element>&
1095         ArithProg (const PolRing& PR, std::list<typename PolRing::Element>& frobeniusForm,
1096                    const size_t N, typename PolRing::Domain_t::Element_ptr A, const size_t lda,
1097                    const size_t degree);
1098 
1099         template <class Field, class Polynomial>
1100         std::list<Polynomial>&
1101         LUKrylov_KGFast( const Field& F, std::list<Polynomial>& charp, const size_t N,
1102                          typename Field::Element_ptr A, const size_t lda,
1103                          typename Field::Element_ptr X, const size_t ldx);
1104     } // Protected
1105 } // FFPACK charpoly
1106 // #include "ffpack_charpoly_kglu.inl"
1107 // #include "ffpack_charpoly_kgfast.inl"
1108 // #include "ffpack_charpoly_kgfastgeneralized.inl"
1109 // #include "ffpack_charpoly_danilevski.inl"
1110 // #include "ffpack_charpoly.inl"
1111 
1112 namespace FFPACK { /* frobenius, charpoly */
1113 
1114 
1115 } // FFPACK frobenius
1116 // #include "ffpack_frobenius.inl"
1117 
1118 namespace FFPACK { /* minpoly */
1119 
1120 
1121     /**********************/
1122     /* MINIMAL POLYNOMIAL */
1123     /**********************/
1124 
1125     /**
1126      * @brief Compute the minimal polynomial of the matrix A.
1127      * The algorithm is randomized probabilistic, and computes the minimal polynomial of
1128      * the Krylov iterates of a random vector: (v, Av, .., A^kv)
1129      * @param F the base field
1130      * @param [out] minP the minimal polynomial of \p A
1131      * @param N order of the matrix \p A
1132      * @param [in] A the input matrix (\f$ N \times N\f$)
1133      * @param lda leading dimension of \p A
1134      */
1135     template <class Field, class Polynomial>
1136     Polynomial&
1137     MinPoly (const Field& F, Polynomial& minP, const size_t N,
1138              typename Field::ConstElement_ptr A, const size_t lda);
1139 
1140     /**
1141      * @brief Compute the minimal polynomial of the matrix A.
1142      * The algorithm is randomized probabilistic, and computes the minimal polynomial of
1143      * the Krylov iterates of a random vector: (v, Av, .., A^kv)
1144      * @param F the base field
1145      * @param [out] minP the minimal polynomial of \p A
1146      * @param N order of the matrix \p A
1147      * @param [in] A the input matrix (\f$ N \times N\f$)
1148      * @param lda leading dimension of \p A
1149      * @param G a random iterator
1150      */
1151     template <class Field, class Polynomial, class RandIter>
1152     Polynomial&
1153     MinPoly (const Field& F, Polynomial& minP, const size_t N,
1154              typename Field::ConstElement_ptr A, const size_t lda, RandIter& G);
1155 
1156     /**
1157      * @brief Compute the minimal polynomial of the matrix A and a vector v, namely the first linear dependency relation in the Krylov basis \f$(v,Av, ..., A^Nv)\f$.
1158      * @param F the base field
1159      * @param [out] minP the minimal polynomial of \p A and v
1160      * @param N order of the matrix \p A
1161      * @param [in] A the input matrix (\f$ N \times N\f$)
1162      * @param lda leading dimension of \p A
1163      * @param K an \f$ N \times (N+1)\f$ matrix containing the vector v on its first row
1164      * @param ldk leading dimension of \p K
1165      * @param P [out] (optional) the permutation used in the elimination of the Krylov matrix \p K
1166      */
1167     template <class Field, class Polynomial>
1168     Polynomial&
1169     MatVecMinPoly (const Field& F, Polynomial& minP, const size_t N,
1170                    typename Field::ConstElement_ptr A, const size_t lda,
1171                    typename Field::ConstElement_ptr v, const size_t incv);
1172 
1173     namespace Protected{
1174         template <class Field, class Polynomial>
1175         Polynomial&
1176         MatVecMinPoly (const Field& F, Polynomial& minP, const size_t N,
1177                        typename Field::ConstElement_ptr A, const size_t lda,
1178                        typename Field::Element_ptr v, const size_t incv,
1179                        typename Field::Element_ptr K, const size_t ldk,
1180                        size_t * P);
1181 
1182         template <class Field, class Polynomial>
1183         Polynomial&
1184         Hybrid_KGF_LUK_MinPoly (const Field& F, Polynomial& minP, const size_t N,
1185                                 typename Field::ConstElement_ptr A, const size_t lda,
1186                                 typename Field::Element_ptr X, const size_t ldx, size_t* P,
1187                                 const FFPACK_MINPOLY_TAG MinTag= FFPACK::FfpackDense,
1188                                 const size_t kg_mc=0, const size_t kg_mb=0, const size_t kg_j=0);
1189     } // Protected
1190 } // FFPACK minpoly
1191 // #include "ffpack_minpoly.inl"
1192 
1193 namespace FFPACK { /* Krylov Elim */
1194 
1195     /* \cond */
1196     template <class Field>
1197     size_t KrylovElim( const Field& F, const size_t M, const size_t N,
1198                        typename Field::Element_ptr A, const size_t lda, size_t*P,
1199                        size_t *Q, const size_t deg, size_t *iterates, size_t * inviterates, const size_t maxit,size_t virt);
1200 
1201     template <class Field>
1202     size_t  SpecRankProfile (const Field& F, const size_t M, const size_t N,
1203                              typename Field::Element_ptr A, const size_t lda, const size_t deg, size_t *rankProfile);
1204     /* \endcond */
1205 
1206 } // FFPACK KrylovElim
1207 // #include "ffpack_krylovelim.inl"
1208 
1209 namespace FFPACK { /* Solutions */
1210     /********/
1211     /* RANK */
1212     /********/
1213 
1214 
1215 
1216     /** Computes the rank of the given matrix using a PLUQ factorization.
1217      * The input matrix is modified.
1218      * @param F base field
1219      * @param M row dimension of the matrix
1220      * @param N column dimension of the matrix
1221      * @param [in] A input matrix
1222      * @param lda leading dimension of A
1223      * @param psH (optional) a ParSeqHelper to choose between sequential and parallel execution
1224      */
1225 
1226     template <class Field>
1227     size_t
1228     Rank( const Field& F, const size_t M, const size_t N,
1229           typename Field::Element_ptr A, const size_t lda);
1230 
1231     template <class Field>
1232     size_t
1233     pRank (const Field& F, const size_t M, const size_t N,
1234            typename Field::Element_ptr A, const size_t lda, size_t numthreads = 0);
1235 
1236     template <class Field, class PSHelper>
1237     size_t
1238     Rank( const Field& F, const size_t M, const size_t N,
1239           typename Field::Element_ptr A, const size_t lda, const PSHelper& psH) ;
1240 
1241 
1242     /********/
1243     /* DET  */
1244     /********/
1245 
1246 
1247     /**  Returns true if the given matrix is singular.
1248      * The method is a block elimination with early termination
1249      *
1250      * using LQUP factorization  with early termination.
1251      * If <code>M != N</code>,
1252      * then the matrix is virtually padded with zeros to make it square and
1253      * it's determinant is zero.
1254      * @warning The input matrix is modified.
1255      * @param F base field
1256      * @param M row dimension of the matrix
1257      * @param N column dimension of the matrix.
1258      * @param [in,out] A input matrix
1259      * @param lda leading dimension of A
1260      */
1261     template <class Field>
1262     bool
1263     IsSingular( const Field& F, const size_t M, const size_t N,
1264                 typename Field::Element_ptr A, const size_t lda);
1265 
1266     /** @brief Returns the determinant of the given square matrix.
1267      * @details The method is a block elimination
1268      * using PLUQ factorization. The input matrix A is overwritten.
1269      * @warning The input matrix is modified.
1270      * @param F base field
1271      * @param [out] det the determinant of A
1272      * @param N the order of the square matrix A.
1273      * @param [in,out] A input matrix
1274      * @param lda leading dimension of A
1275      * @param psH (optional) a ParSeqHelper to choose between sequential and parallel execution
1276      * @param P,Q (optional) row and column permutations to be used by the PLUQ factorization. randomized checkers (see cherckes/checker_det.inl) need them for certification
1277      */
1278 
1279     template <class Field>
1280     typename Field::Element&
1281     Det (const Field& F, typename Field::Element& det, const size_t N,
1282          typename Field::Element_ptr A, const size_t lda,
1283          size_t * P = NULL, size_t * Q = NULL);
1284 
1285     template <class Field>
1286     typename Field::Element&
1287     pDet (const Field& F, typename Field::Element& det, const size_t N,
1288           typename Field::Element_ptr A, const size_t lda,
1289           size_t numthreads = 0, size_t * P = NULL, size_t * Q = NULL);
1290 
1291     template <class Field, class PSHelper>
1292     typename Field::Element&
1293     Det(const Field& F, typename Field::Element& det, const size_t N,
1294         typename Field::Element_ptr A, const size_t lda, const PSHelper& psH,
1295         size_t * P = NULL, size_t * Q = NULL);
1296 
1297     /*********/
1298     /* SOLVE */
1299     /*********/
1300 
1301 
1302     /**
1303      * @brief Solves a linear system AX = b using PLUQ factorization.
1304      * @oaram F base field
1305      * @oaram M matrix order
1306      * @param [in] A input matrix
1307      * @param lda leading dimension of A
1308      * @param [out] x output solution vector
1309      * @param incx increment of x
1310      * @param b input right hand side of the system
1311      * @param incb increment of b
1312      */
1313     template <class Field>
1314     typename Field::Element_ptr
1315     Solve( const Field& F, const size_t M,
1316            typename Field::Element_ptr A, const size_t lda,
1317            typename Field::Element_ptr x, const int incx,
1318            typename Field::ConstElement_ptr b, const int incb );
1319 
1320     template <class Field, class PSHelper>
1321     typename Field::Element_ptr
1322     Solve( const Field& F, const size_t M,
1323            typename Field::Element_ptr A, const size_t lda,
1324            typename Field::Element_ptr x, const int incx,
1325            typename Field::ConstElement_ptr b, const int incb, PSHelper& psH);
1326 
1327     template <class Field>
1328     typename Field::Element_ptr
1329     pSolve (const Field& F, const size_t M,
1330             typename Field::Element_ptr A, const size_t lda,
1331             typename Field::Element_ptr x, const int incx,
1332             typename Field::ConstElement_ptr b, const int incb, size_t numthreads = 0);
1333 
1334     //! Solve L X = B or X L = B in place.
1335     //! L is M*M if Side == FFLAS::FflasLeft and N*N if Side == FFLAS::FflasRight, B is M*N.
1336     //! Only the R non trivial column of L are stored in the M*R matrix L
1337     //! Requirement :  so that L could  be expanded in-place
1338     /* \cond */
1339     template<class Field>
1340     void
1341     solveLB( const Field& F, const FFLAS::FFLAS_SIDE Side,
1342              const size_t M, const size_t N, const size_t R,
1343              typename Field::Element_ptr L, const size_t ldl,
1344              const size_t * Q,
1345              typename Field::Element_ptr B, const size_t ldb );
1346 
1347     //! Solve L X = B in place.
1348     //! L is M*M or N*N, B is M*N.
1349     //! Only the R non trivial column of L are stored in the M*R matrix L
1350     template<class Field>
1351     void
1352     solveLB2( const Field& F, const FFLAS::FFLAS_SIDE Side,
1353               const size_t M, const size_t N, const size_t R,
1354               typename Field::Element_ptr L, const size_t ldl,
1355               const size_t * Q,
1356               typename Field::Element_ptr B, const size_t ldb );
1357     /* \endcond */
1358 
1359     /*************/
1360     /* NULLSPACE */
1361     /*************/
1362 
1363     /**  Computes a vector of the Left/Right nullspace of the matrix A.
1364      *
1365      * @param F The computation domain
1366      * @param Side decides whether it computes the left (FflasLeft) or right (FflasRight) nullspace
1367      * @param M number of rows
1368      * @param N number of columns
1369      * @param[in,out] A input matrix of dimension M x N, A is modified to its LU version
1370      * @param lda leading dimension of A
1371      * @param[out] X output vector
1372      * @param incX increment of X
1373      *
1374      */
1375     template <class Field>
1376     void RandomNullSpaceVector (const Field& F, const FFLAS::FFLAS_SIDE Side,
1377                                 const size_t M, const size_t N,
1378                                 typename Field::Element_ptr A, const size_t lda,
1379                                 typename Field::Element_ptr X, const size_t incX);
1380 
1381     /**  Computes a basis of the Left/Right nullspace of the matrix A.
1382      * return the dimension of the nullspace.
1383      *
1384      * @param F The computation domain
1385      * @param Side decides whether it computes the left (FflasLeft) or right (FflasRight) nullspace
1386      * @param M number of rows
1387      * @param N number of columns
1388      * @param[in,out] A input matrix of dimension M x N, A is modified
1389      * @param lda leading dimension of A
1390      * @param[out] NS output matrix of dimension N x NSdim (allocated here)
1391      * @param[out] ldn leading dimension of NS
1392      * @param[out] NSdim the dimension of the Nullspace (N-rank(A))
1393      *
1394      */
1395     template <class Field>
1396     size_t NullSpaceBasis (const Field& F, const FFLAS::FFLAS_SIDE Side,
1397                            const size_t M, const size_t N,
1398                            typename Field::Element_ptr A, const size_t lda,
1399                            typename Field::Element_ptr& NS, size_t& ldn,
1400                            size_t& NSdim);
1401 
1402     /*****************/
1403     /* RANK PROFILES */
1404     /*****************/
1405 
1406     /** @brief Computes the row rank profile of A.
1407      *
1408      * @param F base field
1409      * @param M number of rows
1410      * @param N number of columns
1411      * @param [in] A input matrix of dimension M x N
1412      * @param lda leading dimension of A
1413      * @param [out] rkprofile return the rank profile as an array of row indexes, of dimension r=rank(A)
1414      * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
1415      *
1416      * A is modified
1417      * rkprofile is allocated during the computation.
1418      * @returns R
1419      */
1420     template <class Field>
1421     size_t RowRankProfile (const Field& F, const size_t M, const size_t N,
1422                            typename Field::Element_ptr A, const size_t lda,
1423                            size_t* &rkprofile, const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
1424 
1425     template <class Field>
1426     size_t pRowRankProfile (const Field& F, const size_t M, const size_t N,
1427                            typename Field::Element_ptr A, const size_t lda,
1428                            size_t* &rkprofile, size_t numthreads = 0, const FFPACK_LU_TAG LuTag=FfpackTileRecursive);
1429 
1430     template <class Field, class PSHelper>
1431     size_t RowRankProfile (const Field& F, const size_t M, const size_t N,
1432                                   typename Field::Element_ptr A, const size_t lda,
1433                                   size_t* &rkprofile, const FFPACK_LU_TAG LuTag, PSHelper& psH);
1434 
1435     /**  @brief Computes the column rank profile of A.
1436      *
1437      * @param F base field
1438      * @param M number of rows
1439      * @param N number of columns
1440      * @param [in] A input matrix of dimension
1441      * @param lda leading dimension of A
1442      * @param [out] rkprofile return the rank profile as an array of row indexes, of dimension r=rank(A)
1443      * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
1444      *
1445      * A is modified
1446      * rkprofile is allocated during the computation.
1447      * @returns R
1448      */
1449     template <class Field>
1450     size_t ColumnRankProfile (const Field& F, const size_t M, const size_t N,
1451                               typename Field::Element_ptr A, const size_t lda,
1452                               size_t* &rkprofile, const FFPACK_LU_TAG LuTag=FfpackSlabRecursive);
1453 
1454     template <class Field>
1455     size_t pColumnRankProfile (const Field& F, const size_t M, const size_t N,
1456                                typename Field::Element_ptr A, const size_t lda,
1457                                size_t* &rkprofile, size_t numthreads = 0,
1458                                const FFPACK_LU_TAG LuTag=FfpackTileRecursive);
1459 
1460     template <class Field, class PSHelper>
1461     size_t ColumnRankProfile (const Field& F, const size_t M, const size_t N,
1462                               typename Field::Element_ptr A, const size_t lda,
1463                               size_t* &rkprofile, const FFPACK_LU_TAG LuTag, PSHelper& psH);
1464 
1465 
1466     /**  @brief Recovers the column/row rank profile from the permutation of an LU decomposition.
1467      *
1468      * Works with both the CUP/PLE decompositions (obtained by LUdivine) or the PLUQ decomposition.
1469      * Assumes that the output vector containing the rank profile is already allocated.
1470      * @param P the permutation carrying the rank profile information
1471      * @param N the row/col dimension for a row/column rank profile
1472      * @param R the rank of the matrix
1473      * @param [out] rkprofile return the rank profile as an array of indices
1474      * @param LuTag: chooses the elimination algorithm. SlabRecursive for LUdivine, TileRecursive for PLUQ
1475      *
1476      *
1477      */
1478     void RankProfileFromLU (const size_t* P, const size_t N, const size_t R,
1479                             size_t* rkprofile, const FFPACK_LU_TAG LuTag);
1480 
1481     /**  @brief Recovers the row and column rank profiles of any leading submatrix from the PLUQ decomposition.
1482      *
1483      * Only works with the PLUQ decomposition
1484      * Assumes that the output vectors containing the rank profiles are already allocated.
1485      *
1486      * @param P the permutation carrying the rank profile information
1487      * @param M the row dimension of the initial matrix
1488      * @param N the column dimension of the initial matrix
1489      * @param R the rank of the initial matrix
1490      * @param LSm the row dimension of the leading submatrix considered
1491      * @param LSn the column dimension of the leading submatrix considered
1492      * @param P the row permutation of the PLUQ decomposition
1493      * @param Q the column permutation of the PLUQ decomposition
1494      * @param RRP return the row rank profile of the leading submatrix
1495      * @return the rank of the LSm x LSn leading submatrix
1496      *
1497      * A is modified
1498      * @bib
1499      * - Dumas J-G., Pernet C., and Sultan Z. <i>\c Simultaneous computation of the row and column rank profiles </i>, ISSAC'13.
1500      */
1501     size_t LeadingSubmatrixRankProfiles (const size_t M, const size_t N, const size_t R,
1502                                          const size_t LSm, const size_t LSn,
1503                                          const size_t* P, const size_t* Q,
1504                                          size_t* RRP, size_t* CRP);
1505     /** RowRankProfileSubmatrixIndices.
1506      * Computes the indices of the submatrix r*r X of A whose rows correspond to
1507      * the row rank profile of A.
1508      *
1509      * @param F base field
1510      * @param M number of rows
1511      * @param N number of columns
1512      * @param [in] A input matrix of dimension
1513      * @param rowindices array of the row indices of X in A
1514      * @param colindices array of the col indices of X in A
1515      * @param lda leading dimension of A
1516      * @param[out] R list of indices
1517      *
1518      * rowindices and colindices are allocated during the computation.
1519      * A is modified
1520      * @returns R
1521      */
1522     template <class Field>
1523     size_t RowRankProfileSubmatrixIndices (const Field& F,
1524                                            const size_t M, const size_t N,
1525                                            typename Field::Element_ptr A,
1526                                            const size_t lda,
1527                                            size_t*& rowindices,
1528                                            size_t*& colindices,
1529                                            size_t& R);
1530 
1531     /** Computes the indices of the submatrix r*r X of A whose columns correspond to
1532      * the column rank profile of A.
1533      *
1534      * @param F base field
1535      * @param M number of rows
1536      * @param N number of columns
1537      * @param [in] A input matrix of dimension
1538      * @param rowindices array of the row indices of X in A
1539      * @param colindices array of the col indices of X in A
1540      * @param lda leading dimension of A
1541      * @param[out] R list of indices
1542      *
1543      * rowindices and colindices are allocated during the computation.
1544      * @warning A is modified
1545      * \return R
1546      */
1547     template <class Field>
1548     size_t ColRankProfileSubmatrixIndices (const Field& F,
1549                                            const size_t M, const size_t N,
1550                                            typename Field::Element_ptr A,
1551                                            const size_t lda,
1552                                            size_t*& rowindices,
1553                                            size_t*& colindices,
1554                                            size_t& R);
1555 
1556     /** Computes the r*r submatrix X of A, by picking the row rank profile rows of A.
1557      *
1558      * @param F base field
1559      * @param M number of rows
1560      * @param N number of columns
1561      * @param [in] A input matrix of dimension M x N
1562      * @param lda leading dimension of A
1563      * @param [out] X the output matrix
1564      * @param[out] R list of indices
1565      *
1566      * A is not modified
1567      * X is allocated during the computation.
1568      * @return R
1569      */
1570     template <class Field>
1571     size_t RowRankProfileSubmatrix (const Field& F,
1572                                     const size_t M, const size_t N,
1573                                     typename Field::Element_ptr A,
1574                                     const size_t lda,
1575                                     typename Field::Element_ptr& X, size_t& R);
1576 
1577     /** Compute the \f$ r\times r\f$ submatrix X of A, by picking the row rank profile rows of A.
1578      *
1579      *
1580      * @param F base field
1581      * @param M number of rows
1582      * @param N number of columns
1583      * @param[in] A input matrix of dimension M x N
1584      * @param lda leading dimension of A
1585      * @param[out] X the output matrix
1586      * @param[out] R list of indices
1587      *
1588      * A is not modified
1589      * X is allocated during the computation.
1590      * \returns R
1591      */
1592     template <class Field>
1593     size_t ColRankProfileSubmatrix (const Field& F, const size_t M, const size_t N,
1594                                     typename Field::Element_ptr A, const size_t lda,
1595                                     typename Field::Element_ptr& X, size_t& R);
1596 
1597     /*********************************************/
1598     /* Accessors to Triangular and Echelon forms */
1599     /*********************************************/
1600 
1601     /** Extracts a triangular matrix from a compact storage A=L\U of rank R.
1602      * if OnlyNonZeroVectors is false, then T and A have the same dimensions
1603      * Otherwise, T is R x N if UpLo = FflasUpper, else T is  M x R
1604      * @param F: base field
1605      * @param UpLo: selects if the upper (FflasUpper) or lower (FflasLower) triangular matrix is returned
1606      * @param diag: selects if the triangular matrix unit-diagonal (FflasUnit/NoUnit)
1607      * @param M: row dimension of T
1608      * @param N: column dimension of T
1609      * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
1610      * @param[in] A: input matrix
1611      * @param lda: leading dimension of A
1612      * @param[out] T: output matrix
1613      * @param ldt: leading dimension of T
1614      * @param OnlyNonZeroVectors: decides whether the last zero rows/columns should be ignored
1615      */
1616     template <class Field>
1617     void
1618     getTriangular (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1619                    const FFLAS::FFLAS_DIAG diag,
1620                    const size_t M, const size_t N, const size_t R,
1621                    typename Field::ConstElement_ptr A, const size_t lda,
1622                    typename Field::Element_ptr T, const size_t ldt,
1623                    const bool OnlyNonZeroVectors = false);
1624 
1625     /** Cleans up a compact storage A=L\U to reveal a triangular matrix of rank R.
1626      * @param F: base field
1627      * @param UpLo: selects if the upper (FflasUpper) or lower (FflasLower) triangular matrix is revealed
1628      * @param diag: selects if the triangular matrix unit-diagonal (FflasUnit/NoUnit)
1629      * @param M: row dimension of A
1630      * @param N: column dimension of A
1631      * @param R: rank of the triangular matrix
1632      * @param[inout] A: input/output matrix
1633      * @param lda: leading dimension of A
1634      */
1635     template <class Field>
1636     void
1637     getTriangular (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1638                    const FFLAS::FFLAS_DIAG diag,
1639                    const size_t M, const size_t N, const size_t R,
1640                    typename Field::Element_ptr A, const size_t lda);
1641 
1642     /** Extracts a matrix in echelon form from a compact storage A=L\U of rank R obtained by
1643      * RowEchelonForm or ColumnEchelonForm.
1644      * Either L or U is in Echelon form (depending on Uplo)
1645      * The echelon structure is defined by the first R values of the array P.
1646      * row and column dimension of T are greater or equal to that of A
1647      * @param F: base field
1648      * @param UpLo: selects if the upper (FflasUpper) or lower (FflasLower) triangular matrix is returned
1649      * @param diag: selects if the echelon matrix has unit pivots (FflasUnit/NoUnit)
1650      * @param M: row dimension of T
1651      * @param N: column dimension of T
1652      * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
1653      * @param P: positions of the R pivots
1654      * @param[in] A: input matrix
1655      * @param lda: leading dimension of A
1656      * @param[out] T: output matrix
1657      * @param ldt: leading dimension of T
1658      * @param OnlyNonZeroVectors: decides whether the last zero rows/columns should be ignored
1659      * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
1660      */
1661     template <class Field>
1662     void
1663     getEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1664                     const FFLAS::FFLAS_DIAG diag,
1665                     const size_t M, const size_t N, const size_t R, const size_t* P,
1666                     typename Field::ConstElement_ptr A, const size_t lda,
1667                     typename Field::Element_ptr T, const size_t ldt,
1668                     const bool OnlyNonZeroVectors = false,
1669                     const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
1670 
1671     /** Cleans up a compact storage A=L\U obtained by RowEchelonForm or ColumnEchelonForm
1672      * to reveal an echelon form of rank R.
1673      * Either L or U is in Echelon form (depending on Uplo)
1674      * The echelon structure is defined by the first R values of the array P.
1675      * @param F: base field
1676      * @param UpLo: selects if the upper (FflasUpper) or lower (FflasLower) triangular matrix is returned
1677      * @param diag: selects if the echelon matrix has unit pivots (FflasUnit/NoUnit)
1678      * @param M: row dimension of A
1679      * @param N: column dimension of A
1680      * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
1681      * @param P: positions of the R pivots
1682      * @param[inout] A: input/output matrix
1683      * @param lda: leading dimension of A
1684      * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
1685      */
1686     template <class Field>
1687     void
1688     getEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1689                     const FFLAS::FFLAS_DIAG diag,
1690                     const size_t M, const size_t N, const size_t R, const size_t* P,
1691                     typename Field::Element_ptr A, const size_t lda,
1692                     const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
1693 
1694     /** Extracts a transformation matrix to echelon form from a compact storage A=L\U
1695      * of rank R obtained by RowEchelonForm or ColumnEchelonForm.
1696      * If Uplo == FflasLower:
1697      *   T is N x N (already allocated) such that A T = C is a transformation of A in
1698      *   Column echelon form
1699      * Else
1700      *   T is M x M (already allocated) such that T A = E is a transformation of A in
1701      *   Row Echelon form
1702      * @param F: base field
1703      * @param UpLo: Lower (FflasLower) means Transformation to Column Echelon Form, Upper (FflasUpper), to Row Echelon Form
1704      * @param diag: selects if the echelon matrix has unit pivots (FflasUnit/NoUnit)
1705      * @param M: row dimension of A
1706      * @param N: column dimension of A
1707      * @param R: rank of the triangular matrix
1708      * @param P: permutation matrix
1709      * @param[in] A: input matrix
1710      * @param lda: leading dimension of A
1711      * @param[out] T: output matrix
1712      * @param ldt: leading dimension of T
1713      * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
1714      */
1715     template <class Field>
1716     void
1717     getEchelonTransform (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1718                          const FFLAS::FFLAS_DIAG diag,
1719                          const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
1720                          typename Field::ConstElement_ptr A, const size_t lda,
1721                          typename Field::Element_ptr T, const size_t ldt,
1722                          const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
1723     /** Extracts a matrix in echelon form from a compact storage A=L\U of rank R obtained by
1724      * ReducedRowEchelonForm or ReducedColumnEchelonForm with transform = true.
1725      * Either L or U is in Echelon form (depending on Uplo)
1726      * The echelon structure is defined by the first R values of the array P.
1727      * row and column dimension of T are greater or equal to that of A
1728      * @param F: base field
1729      * @param UpLo: selects if the upper (FflasUpper) or lower (FflasLower) triangular matrix is returned
1730      * @param diag: selects if the echelon matrix has unit pivots (FflasUnit/NoUnit)
1731      * @param M: row dimension of T
1732      * @param N: column dimension of T
1733      * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
1734      * @param P: positions of the R pivots
1735      * @param[in] A: input matrix
1736      * @param lda: leading dimension of A
1737      * @param ldt: leading dimension of T
1738      * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
1739      * @param OnlyNonZeroVectors: decides whether the last zero rows/columns should be ignored
1740      */
1741     template <class Field>
1742     void
1743     getReducedEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1744                            const size_t M, const size_t N, const size_t R, const size_t* P,
1745                            typename Field::ConstElement_ptr A, const size_t lda,
1746                            typename Field::Element_ptr T, const size_t ldt,
1747                            const bool OnlyNonZeroVectors = false,
1748                            const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
1749 
1750     /** Cleans up a compact storage A=L\U of rank R obtained by ReducedRowEchelonForm or
1751      * ReducedColumnEchelonForm with transform = true.
1752      * Either L or U is in Echelon form (depending on Uplo)
1753      * The echelon structure is defined by the first R values of the array P.
1754      * @param F: base field
1755      * @param UpLo: selects if the upper (FflasUpper) or lower (FflasLower) triangular matrix is returned
1756      * @param diag: selects if the echelon matrix has unit pivots (FflasUnit/NoUnit)
1757      * @param M: row dimension of A
1758      * @param N: column dimension of A
1759      * @param R: rank of the triangular matrix (how many rows/columns need to be copied)
1760      * @param P: positions of the R pivots
1761      * @param[inout] A: input/output matrix
1762      * @param lda: leading dimension of A
1763      * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
1764      */
1765     template <class Field>
1766     void
1767     getReducedEchelonForm (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1768                            const size_t M, const size_t N, const size_t R, const size_t* P,
1769                            typename Field::Element_ptr A, const size_t lda,
1770                            const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
1771 
1772     /** Extracts a transformation matrix to echelon form from a compact storage A=L\U
1773      * of rank R obtained by RowEchelonForm or ColumnEchelonForm.
1774      * If Uplo == FflasLower:
1775      *   T is N x N (already allocated) such that A T = C is a transformation of A in
1776      *   Column echelon form
1777      * Else
1778      *   T is M x M (already allocated) such that T A = E is a transformation of A in
1779      *   Row Echelon form
1780      * @param F: base field
1781      * @param UpLo: selects Col (FflasLower) or Row (FflasUpper) Echelon Form
1782      * @param diag: selects if the echelon matrix has unit pivots (FflasUnit/NoUnit)
1783      * @param M: row dimension of A
1784      * @param N: column dimension of A
1785      * @param R: rank of the triangular matrix
1786      * @param P: permutation matrix
1787      * @param[in] A: input matrix
1788      * @param lda: leading dimension of A
1789      * @param[out] T: output matrix
1790      * @param ldt: leading dimension of T
1791      * @param LuTag: which factorized form (CUP/PLE if FfpackSlabRecursive, PLUQ if FfpackTileRecursive)
1792      */
1793     template <class Field>
1794     void
1795     getReducedEchelonTransform (const Field& F, const FFLAS::FFLAS_UPLO Uplo,
1796                                 const size_t M, const size_t N, const size_t R, const size_t* P, const size_t* Q,
1797                                 typename Field::ConstElement_ptr A, const size_t lda,
1798                                 typename Field::Element_ptr T, const size_t ldt,
1799                                 const FFPACK_LU_TAG LuTag = FfpackSlabRecursive);
1800     /** Auxiliary routine: determines the permutation that changes a PLUQ decomposition
1801      * into a echelon form revealing PLUQ decomposition
1802      */
1803     void
1804     PLUQtoEchelonPermutation (const size_t N, const size_t R, const size_t * P, size_t * outPerm);
1805 
1806 } // FFPACK
1807 // #include "ffpack.inl"
1808 
1809 namespace FFPACK { /* not used */
1810 
1811     /** LQUPtoInverseOfFullRankMinor.
1812      * Suppose A has been factorized as L.Q.U.P, with rank r.
1813      * Then Qt.A.Pt has an invertible leading principal r x r submatrix
1814      * This procedure efficiently computes the inverse of this minor and puts it into X.
1815      * @note It changes the lower entries of A_factors in the process (NB: unless A was nonsingular and square)
1816      *
1817      * @param F base field
1818      * @param rank       rank of the matrix.
1819      * @param A_factors  matrix containing the L and U entries of the factorization
1820      * @param lda leading dimension of A
1821      * @param QtPointer  theLQUP->getQ()->getPointer() (note: getQ returns Qt!)
1822      * @param X          desired location for output
1823      * @param ldx leading dimension of X
1824      */
1825     template <class Field>
1826     typename Field::Element_ptr
1827     LQUPtoInverseOfFullRankMinor( const Field& F, const size_t rank,
1828                                   typename Field::Element_ptr A_factors, const size_t lda,
1829                                   const size_t* QtPointer,
1830                                   typename Field::Element_ptr X, const size_t ldx);
1831 
1832 } // FFPACK
1833 // include precompiled instantiation headers (avoiding to recompile them)
1834 #ifdef FFPACK_COMPILED
1835 #include "fflas-ffpack/interfaces/libs/ffpack_inst.h"
1836 #endif
1837 
1838 //---------------------------------------------------------------------
1839 // Checkers
1840 #include "fflas-ffpack/checkers/checkers_ffpack.h"
1841 //---------------------------------------------------------------------
1842 
1843 #include "ffpack_fgesv.inl"
1844 #include "ffpack_fgetrs.inl"
1845 //---------------------------------------------------------------------
1846 // Checkers
1847 #include "fflas-ffpack/checkers/checkers_ffpack.inl"
1848 //---------------------------------------------------------------------
1849 #include "ffpack_pluq.inl"
1850 #include "ffpack_pluq_mp.inl"
1851 #include "ffpack_ppluq.inl"
1852 #include "ffpack_ludivine.inl"
1853 #include "ffpack_ludivine_mp.inl"
1854 #include "ffpack_echelonforms.inl"
1855 #include "ffpack_fsytrf.inl"
1856 #include "ffpack_invert.inl"
1857 #include "ffpack_ftrtr.inl"
1858 #include "ffpack_ftrstr.inl"
1859 #include "ffpack_ftrssyr2k.inl"
1860 #include "ffpack_charpoly_kglu.inl"
1861 #include "ffpack_charpoly_kgfast.inl"
1862 #include "ffpack_charpoly_kgfastgeneralized.inl"
1863 #include "ffpack_charpoly_danilevski.inl"
1864 #include "ffpack_charpoly.inl"
1865 #include "ffpack_frobenius.inl"
1866 #include "ffpack_minpoly.inl"
1867 #include "ffpack_krylovelim.inl"
1868 #include "ffpack_permutation.inl"
1869 #include "ffpack_rankprofiles.inl"
1870 #include "ffpack_det_mp.inl"
1871 #include "ffpack.inl"
1872 
1873 #endif // __FFLASFFPACK_ffpack_H
1874 
1875 /* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
1876 // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
1877 
1878