1/*
2 * Copyright (C) 2014 the FFLAS-FFPACK group
3 *
4 * Written by Clement Pernet <Clement.Pernet@imag.fr>
5 *            Brice Boyer (briceboyer) <boyer.brice@gmail.com>
6 *
7 *
8 * ========LICENCE========
9 * This file is part of the library FFLAS-FFPACK.
10 *
11 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
12 * it under the terms of the  GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
24 * ========LICENCE========
25 *.
26 */
27
28/** @file fflas/fflas_level3.h
29 * @brief  Matrix-Matrix operations
30 * or anything of \f$>n^2\f$ complexity.
31 */
32
33#ifndef __FFLASFFPACK_fflas_fflas_level3_INL
34#define __FFLASFFPACK_fflas_fflas_level3_INL
35
36//#include <givaro/zring.h>
37
38#include "fflas_bounds.inl"
39#include "fflas_helpers.inl"
40
41namespace FFLAS { namespace Protected {
42    //-----------------------------------------------------------------------------
43    // Some conversion functions
44    //-----------------------------------------------------------------------------
45
46
47    //---------------------------------------------------------------------
48    // Finite Field matrix => double matrix
49    // Special design for upper-triangular matrices
50    //---------------------------------------------------------------------
51    template<class Field>
52    void MatF2MatD_Triangular (const Field& F,
53                               Givaro::DoubleDomain::Element_ptr S, const size_t lds,
54                               typename Field::ConstElement_ptr const E,
55                               const size_t lde,
56                               const size_t m, const size_t n)
57    {
58
59        typename Field::ConstElement_ptr Ei = E;
60        Givaro::DoubleDomain::Element_ptr Si = S;
61        size_t i=0, j;
62        for ( ; i<m;++i, Ei+=lde, Si+=lds)
63            for ( j=i; j<n;++j)
64                F.convert(*(Si+j),*(Ei+j));
65    }
66
67    //---------------------------------------------------------------------
68    // Finite Field matrix => float matrix
69    // Special design for upper-triangular matrices
70    //---------------------------------------------------------------------
71    //! @todo do finit(...,FFLAS_TRANS,FFLAS_DIAG)
72    //! @todo do fconvert(...,FFLAS_TRANS,FFLAS_DIAG)
73    template<class Field>
74    void MatF2MatFl_Triangular (const Field& F,
75                                Givaro::FloatDomain::Element_ptr S, const size_t lds,
76                                typename Field::ConstElement_ptr const E,
77                                const size_t lde,
78                                const size_t m, const size_t n)
79    {
80
81        typename Field::ConstElement_ptr Ei = E;
82        Givaro::FloatDomain::Element_ptr Si = S;
83        size_t i=0, j;
84        for ( ; i<m;++i, Ei+=lde, Si+=lds)
85            for ( j=i; j<n;++j)
86                F.convert(*(Si+j),*(Ei+j));
87    }
88
89    /**
90     * Computes the maximal size for delaying the modular reduction
91     *         in a triangular system resolution.
92     *
93     *  Compute the maximal dimension k, such that a unit diagonal triangular
94     *  system of dimension k can be solved over Z without overflow of the
95     *  underlying floating point representation.
96     *
97     *  @bib
98     *  - Dumas, Giorgi, Pernet 06, arXiv:cs/0601133.
99     *
100     * \param F Finite Field/Ring of the computation
101     *
102     */
103    // Specialized routines for ftrsm
104    template <class Element> class ftrsmLeftUpperNoTransNonUnit;
105    template <class Element> class ftrsmLeftUpperNoTransUnit;
106    template <class Element> class ftrsmLeftUpperTransNonUnit;
107    template <class Element> class ftrsmLeftUpperTransUnit;
108    template <class Element> class ftrsmLeftLowerNoTransNonUnit;
109    template <class Element> class ftrsmLeftLowerNoTransUnit;
110    template <class Element> class ftrsmLeftLowerTransNonUnit;
111    template <class Element> class ftrsmLeftLowerTransUnit;
112    template <class Element> class ftrsmRightUpperNoTransNonUnit;
113    template <class Element> class ftrsmRightUpperNoTransUnit;
114    template <class Element> class ftrsmRightUpperTransNonUnit;
115    template <class Element> class ftrsmRightUpperTransUnit;
116    template <class Element> class ftrsmRightLowerNoTransNonUnit;
117    template <class Element> class ftrsmRightLowerNoTransUnit;
118    template <class Element> class ftrsmRightLowerTransNonUnit;
119    template <class Element> class ftrsmRightLowerTransUnit;
120
121    // Specialized routines for ftrmm
122    template <class Element> class ftrmmLeftUpperNoTransNonUnit;
123    template <class Element> class ftrmmLeftUpperNoTransUnit;
124    template <class Element> class ftrmmLeftUpperTransNonUnit;
125    template <class Element> class ftrmmLeftUpperTransUnit;
126    template <class Element> class ftrmmLeftLowerNoTransNonUnit;
127    template <class Element> class ftrmmLeftLowerNoTransUnit;
128    template <class Element> class ftrmmLeftLowerTransNonUnit;
129    template <class Element> class ftrmmLeftLowerTransUnit;
130    template <class Element> class ftrmmRightUpperNoTransNonUnit;
131    template <class Element> class ftrmmRightUpperNoTransUnit;
132    template <class Element> class ftrmmRightUpperTransNonUnit;
133    template <class Element> class ftrmmRightUpperTransUnit;
134    template <class Element> class ftrmmRightLowerNoTransNonUnit;
135    template <class Element> class ftrmmRightLowerNoTransUnit;
136    template <class Element> class ftrmmRightLowerTransNonUnit;
137    template <class Element> class ftrmmRightLowerTransUnit;
138
139} // protected
140} // FFLAS
141
142namespace FFLAS {
143
144    //---------------------------------------------------------------------
145    // Level 3 routines
146    //---------------------------------------------------------------------
147    // set by default for ftrsm to be thread safe
148    // undef it at your own risk, and only if you run it in sequential
149#define __FFLAS__TRSM_READONLY
150
151    /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix.
152     * Computes  \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or  \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$.
153     * \param F field
154     * \param Side if \c Side==FflasLeft then  \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed.
155     * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
156     * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
157     * \param Diag if \c Diag==FflasUnit then \p A is unit.
158     * \param M rows of \p B
159     * \param N cols of \p B
160     * @param alpha scalar
161     * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
162     * @param lda leading dim of \p A
163     * @param B matrix of size \p MxN
164     * @param ldb leading dim of \p B
165     * @bug \f$\alpha\f$ must be non zero.
166     */
167    template<class Field>
168    void
169    ftrsm (const Field& F, const FFLAS_SIDE Side,
170           const FFLAS_UPLO Uplo,
171           const FFLAS_TRANSPOSE TransA,
172           const FFLAS_DIAG Diag,
173           const size_t M, const size_t N,
174           const typename Field::Element alpha,
175#ifdef __FFLAS__TRSM_READONLY
176           typename Field::ConstElement_ptr A,
177#else
178           typename Field::Element_ptr A,
179#endif
180           const size_t lda,
181           typename Field::Element_ptr B, const size_t ldb);
182
183    /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply.
184     * Computes  \f$ B \gets \alpha \mathrm{op}(A) B\f$ or  \f$B \gets \alpha B \mathrm{op}(A)\f$.
185     * @param F field
186     * \param Side if \c Side==FflasLeft then  \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed.
187     * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
188     * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
189     * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit.
190     * \param M rows of \p B
191     * \param N cols of \p B
192     * @param alpha scalar
193     * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
194     * @param lda leading dim of \p A
195     * @param B matrix of size \p MxN
196     * @param ldb leading dim of \p B
197     */
198    template<class Field>
199    void
200    ftrmm (const Field& F, const FFLAS_SIDE Side,
201           const FFLAS_UPLO Uplo,
202           const FFLAS_TRANSPOSE TransA,
203           const FFLAS_DIAG Diag,
204           const size_t M, const size_t N,
205           const typename Field::Element alpha,
206           typename Field::ConstElement_ptr A, const size_t lda,
207           typename Field::Element_ptr B, const size_t ldb);
208
209    /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply with 3 operands
210     * Computes  \f$ C \gets \alpha \mathrm{op}(A) B + beta C\f$ or  \f$C \gets \alpha B \mathrm{op}(A) + beta C\f$.
211     * @param F field
212     * \param Side if \c Side==FflasLeft then  \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed.
213     * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
214     * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
215     * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit.
216     * \param M rows of \p B
217     * \param N cols of \p B
218     * @param alpha scalar
219     * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
220     * @param lda leading dim of \p A
221     * @param B matrix of size \p MxN
222     * @param ldb leading dim of \p B
223     * @param beta scalar
224     * @param C matrix of size \p MxN
225     * @param ldc leading dim of \p C
226     */
227    template<class Field>
228    void
229    ftrmm (const Field& F, const FFLAS_SIDE Side,
230           const FFLAS_UPLO Uplo,
231           const FFLAS_TRANSPOSE TransA,
232           const FFLAS_DIAG Diag,
233           const size_t M, const size_t N,
234           const typename Field::Element alpha,
235           typename Field::ConstElement_ptr A, const size_t lda,
236           typename Field::ConstElement_ptr B, const size_t ldb,
237           const typename Field::Element beta,
238           typename Field::Element_ptr C, const size_t ldc);
239
240    /** @brief  fsyrk: Symmetric Rank K update
241     *
242     * Computes the Lower or Upper triangular part of \f$C = \alpha A \times A^T + \beta C\f$ or \f$C = \alpha A^T \times A + \beta C\f$
243     * \param F field.
244     * \param UpLo whether to compute the upper or the lower triangular part of the symmetric matrix \p C
245     * \param trans if \c ta==FflasNoTrans then comput \f$C = \alpha A \times A^T + \beta C\f$, else  \f$C = \alpha A^T \times A + \beta C\f$
246     * \param n order of matrix \p C
247     * \param k see \p A
248     * \param alpha scalar
249     * \param A \f$A\f$ is \f$n \times k\f$ or \f$A\f$ is \f$k \times n\f$
250     * \param lda leading dimension of \p A
251     * \param beta scalar
252     * \param C \f$C\f$ is \f$n \times n\f$
253     * \param ldc leading dimension of \p C
254     * @warning \f$\alpha\f$ \e must be invertible
255     */
256    template<class Field>
257    typename Field::Element_ptr
258    fsyrk (const Field& F,
259           const FFLAS_UPLO UpLo,
260           const FFLAS_TRANSPOSE trans,
261           const size_t n,
262           const size_t k,
263           const typename Field::Element alpha,
264           typename Field::ConstElement_ptr A, const size_t lda,
265           const typename Field::Element beta,
266           typename Field::Element_ptr C, const size_t ldc);
267
268    template<class Field>
269    typename Field::Element_ptr
270    fsyr2k (const Field& F,
271            const FFLAS_UPLO UpLo,
272            const FFLAS_TRANSPOSE trans,
273            const size_t n,
274            const size_t k,
275            const typename Field::Element alpha,
276            typename Field::ConstElement_ptr A, const size_t lda,
277            typename Field::ConstElement_ptr B, const size_t ldb,
278            const typename Field::Element beta,
279            typename Field::Element_ptr C, const size_t ldc);
280
281    /** @brief  fsyrk: Symmetric Rank K update with diagonal scaling
282     *
283     * Computes the Lower or Upper triangular part of
284     * \f$C = \alpha A \times D \times A^T + \beta C\f$ or
285     * \f$C = \alpha A^T \times D \times A + \beta C\f$ where \p D is a diagonal matrix.
286     * Matrix \p A is updated into \f$ D\times A\f$ (if trans = FflasTrans) or
287     * \f$ A\times D\f$ (if trans = FflasNoTrans).
288     * \param F field.
289     * \param UpLo whether to compute the upper or the lower triangular part of the symmetric
290     *        matrix \p C
291     * \param trans if \c ta==FflasNoTrans then compute \f$C = \alpha A \times A^T + \beta C\f$,
292     *              else  \f$C = \alpha A^T \times A + \beta C\f$
293     * \param n order of matrix \p C
294     * \param k see \p A
295     * \param alpha scalar
296     * \param A \f$A\f$ is \f$n \times k\f$ or \f$A\f$ is \f$k \times n\f$
297     * \param lda leading dimension of \p A
298     * \param D \f$D\f$ is \f$k \times k\f$ diagonal matrix, stored as a vector of k coefficients
299     * \param lda leading dimension of \p A
300     * \param beta scalar
301     * \param C \f$C\f$ is \f$n \times n\f$
302     * \param ldc leading dimension of \p C
303     * @warning \f$\alpha\f$ \e must be invertible
304     */
305    template<class Field>
306    typename Field::Element_ptr
307    fsyrk (const Field& F,
308           const FFLAS_UPLO UpLo,
309           const FFLAS_TRANSPOSE trans,
310           const size_t n,
311           const size_t k,
312           const typename Field::Element alpha,
313           typename Field::Element_ptr A, const size_t lda,
314           typename Field::ConstElement_ptr D, const size_t incD,
315           const typename Field::Element beta,
316           typename Field::Element_ptr C, const size_t ldc, const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD);
317    template<class Field>
318    typename Field::Element_ptr
319    fsyrk (const Field& F,
320           const FFLAS_UPLO UpLo,
321           const FFLAS_TRANSPOSE trans,
322           const size_t n,
323           const size_t k,
324           const typename Field::Element alpha,
325           typename Field::Element_ptr A, const size_t lda,
326           typename Field::ConstElement_ptr D, const size_t incD,
327           const typename Field::Element beta,
328           typename Field::Element_ptr C, const size_t ldc,
329           const ParSeqHelper::Sequential seq,
330           const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD);
331    template<class Field, class Cut, class Param>
332    typename Field::Element_ptr
333    fsyrk (const Field& F,
334           const FFLAS_UPLO UpLo,
335           const FFLAS_TRANSPOSE trans,
336           const size_t n,
337           const size_t k,
338           const typename Field::Element alpha,
339           typename Field::Element_ptr A, const size_t lda,
340           typename Field::ConstElement_ptr D, const size_t incD,
341           const typename Field::Element beta,
342           typename Field::Element_ptr C, const size_t ldc,
343           const ParSeqHelper::Parallel<Cut,Param> par,
344           const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD);
345    /** @brief  fsyrk: Symmetric Rank K update with diagonal scaling
346     *
347     * Computes the Lower or Upper triangular part of
348     * \f$C = \alpha A \times Delta D \times A^T + \beta C\f$ or
349     * \f$C = \alpha A^T \times Delta D \times A + \beta C\f$ where \p D is a diagonal matrix
350     * and \p Delta is a block diagonal with either 1 on the diagonal or 2x2 swap blocks
351     * Matrix \p A is updated into \f$ D\times A\f$ (if trans = FflasTrans) or
352     * \f$ A\times D\f$ (if trans = FflasNoTrans).
353     * \param F field.
354     * \param UpLo whether to compute the upper or the lower triangular part of the symmetric
355     *        matrix \p C
356     * \param trans if \c ta==FflasNoTrans then compute \f$C = \alpha A Delta D \times A^T + \beta C\f$,
357     *              else  \f$C = \alpha A^T Delta D \times A + \beta C\f$
358     * \param n see \p B
359     * \param k see \p A
360     * \param alpha scalar
361     * \param A \f$A\f$ is \f$n \times k\f$ or \f$A\f$ is \f$k \times n\f$
362     * \param lda leading dimension of \p A
363     * \param D \f$D\f$ is \f$k \times k\f$ diagonal matrix, stored as a vector of k coefficients
364     * \param twoBlocks a vector boolean indicating the beginning of each 2x2 blocs in Delta
365     * \param lda leading dimension of \p A
366     * \param beta scalar
367     * \param C \f$C\f$ is \f$n \times n\f$
368     * \param ldc leading dimension of \p C
369     * @warning \f$\alpha\f$ \e must be invertible
370     */
371    template<class Field>
372    typename Field::Element_ptr
373    fsyrk (const Field& F,
374           const FFLAS_UPLO UpLo,
375           const FFLAS_TRANSPOSE trans,
376           const size_t n,
377           const size_t k,
378           const typename Field::Element alpha,
379           typename Field::Element_ptr A, const size_t lda,
380           typename Field::ConstElement_ptr D, const size_t incD,
381           const std::vector<bool>& twoBlock,
382           const typename Field::Element beta,
383           typename Field::Element_ptr C, const size_t ldc, const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD);
384
385    /** @brief  fsyr2k: Symmetric Rank 2K update
386     *
387     * Computes the Lower or Upper triangular part of \f$C = \alpha ( A \times B^T + B \times A^T) + \beta C\f$ or \f$C = \alpha ( A^T \times B + B^T \times A ) + \beta C\f$
388     * \param F field.
389     * \param UpLo whether to compute the upper or the lower triangular part of the symmetric matrix \p C
390     * \param trans if \c ta==FflasNoTrans then compute \f$C = \alpha ( A \times B^T + B \times A^T ) + \beta C\f$, else  \f$C = \alpha ( A^T \times B + B^T \times A) + \beta C\f$
391     * \param n order of matrix \p C
392     * \param k see \p A
393     * \param alpha scalar
394     * \param A \f$A\f$ is \f$n \times k\f$ (FflasNoTrans) or \f$A\f$ is \f$k \times n\f$ (FflasTrans)
395     * \param lda leading dimension of \p A
396     * \param beta scalar
397     * \param C \f$C\f$ is \f$n \times n\f$
398     * \param ldc leading dimension of \p C
399     * @warning \f$\alpha\f$ \e must be invertible
400     */
401    template<class Field>
402    typename Field::Element_ptr
403    fsyr2k (const Field& F,
404            const FFLAS_UPLO UpLo,
405            const FFLAS_TRANSPOSE trans,
406            const size_t n,
407            const size_t k,
408            const typename Field::Element alpha,
409            typename Field::ConstElement_ptr A, const size_t lda,
410            typename Field::ConstElement_ptr B, const size_t ldb,
411            const typename Field::Element beta,
412            typename Field::Element_ptr C, const size_t ldc);
413
414    /** @brief  fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
415     *
416     * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
417     * Automatically set Winograd recursion level
418     * \param F field.
419     * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
420     * \param tb same for matrix \p B
421     * \param m see \p A
422     * \param n see \p B
423     * \param k see \p A
424     * \param alpha scalar
425     * \param beta scalar
426     * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
427     * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
428     * \param C \f$C\f$ is \f$m \times n\f$
429     * \param lda leading dimension of \p A
430     * \param ldb leading dimension of \p B
431     * \param ldc leading dimension of \p C
432     * \param w recursive levels of Winograd's algorithm are used. No argument (or -1) does auto computation of \p w.
433     * @warning \f$\alpha\f$ \e must be invertible
434     */
435    template<class Field>
436    typename Field::Element_ptr
437    fgemm( const Field& F,
438           const FFLAS_TRANSPOSE ta,
439           const FFLAS_TRANSPOSE tb,
440           const size_t m,
441           const size_t n,
442           const size_t k,
443           const typename Field::Element alpha,
444           typename Field::ConstElement_ptr A, const size_t lda,
445           typename Field::ConstElement_ptr B, const size_t ldb,
446           const typename Field::Element beta,
447           typename Field::Element_ptr C, const size_t ldc);
448
449    template<typename Field>
450    typename Field::Element_ptr
451    fgemm( const Field& F,
452           const FFLAS_TRANSPOSE ta,
453           const FFLAS_TRANSPOSE tb,
454           const size_t m,
455           const size_t n,
456           const size_t k,
457           const typename Field::Element alpha,
458           typename Field::ConstElement_ptr A, const size_t lda,
459           typename Field::ConstElement_ptr B, const size_t ldb,
460           const typename Field::Element beta,
461           typename Field::Element_ptr C, const size_t ldc,
462           const ParSeqHelper::Sequential seq);
463
464    template<typename Field, class Cut, class Param>
465    typename Field::Element_ptr
466    fgemm( const Field& F,
467           const FFLAS_TRANSPOSE ta,
468           const FFLAS_TRANSPOSE tb,
469           const size_t m,
470           const size_t n,
471           const size_t k,
472           const typename Field::Element alpha,
473           typename Field::ConstElement_ptr A, const size_t lda,
474           typename Field::ConstElement_ptr B, const size_t ldb,
475           const typename Field::Element beta,
476           typename Field::Element_ptr C, const size_t ldc,
477           const ParSeqHelper::Parallel<Cut,Param> par);
478
479    template<typename Field>
480    typename Field::Element_ptr
481    pfgemm (const Field& F,
482            const FFLAS_TRANSPOSE ta,
483            const FFLAS_TRANSPOSE tb,
484            const size_t m,
485            const size_t n,
486            const size_t k,
487            const typename Field::Element alpha,
488            typename Field::ConstElement_ptr A, const size_t lda,
489            typename Field::ConstElement_ptr B, const size_t ldb,
490            const typename Field::Element beta,
491            typename Field::Element_ptr C, const size_t ldc,
492            size_t numthreads = 0){
493        PAR_BLOCK{
494            size_t nt = numthreads ? numthreads : NUM_THREADS;
495            ParSeqHelper::Parallel<CuttingStrategy::Block,StrategyParameter::Threads> par(nt);
496            fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,par);
497        }
498    }
499
500    template<class Field>
501    typename Field::Element*
502    pfgemm_1D_rec( const Field& F,
503                   const FFLAS_TRANSPOSE ta,
504                   const FFLAS_TRANSPOSE tb,
505                   const size_t m,
506                   const size_t n,
507                   const size_t k,
508                   const typename Field::Element alpha,
509                   const typename Field::Element_ptr A, const size_t lda,
510                   const typename Field::Element_ptr B, const size_t ldb,
511                   const typename Field::Element beta,
512                   typename Field::Element * C, const size_t ldc, size_t seuil);
513
514    template<class Field>
515    typename Field::Element*
516    pfgemm_2D_rec( const Field& F,
517                   const FFLAS_TRANSPOSE ta,
518                   const FFLAS_TRANSPOSE tb,
519                   const size_t m,
520                   const size_t n,
521                   const size_t k,
522                   const typename Field::Element alpha,
523                   const typename Field::Element_ptr A, const size_t lda,
524                   const typename Field::Element_ptr B, const size_t ldb,
525                   const typename Field::Element beta,
526                   typename Field::Element * C, const size_t ldc, size_t seuil);
527
528    template<class Field>
529    typename Field::Element*
530    pfgemm_3D_rec( const Field& F,
531                   const FFLAS_TRANSPOSE ta,
532                   const FFLAS_TRANSPOSE tb,
533                   const size_t m,
534                   const size_t n,
535                   const size_t k,
536                   const typename Field::Element alpha,
537                   const typename Field::Element_ptr A, const size_t lda,
538                   const typename Field::Element_ptr B, const size_t ldb,
539                   const typename Field::Element beta,
540                   typename Field::Element_ptr C, const size_t ldc, size_t seuil, size_t * x);
541
542    template<class Field>
543    typename Field::Element_ptr
544    pfgemm_3D_rec2( const Field& F,
545                    const FFLAS_TRANSPOSE ta,
546                    const FFLAS_TRANSPOSE tb,
547                    const size_t m,
548                    const size_t n,
549                    const size_t k,
550                    const typename Field::Element alpha,
551                    const typename Field::Element_ptr A, const size_t lda,
552                    const typename Field::Element_ptr B, const size_t ldb,
553                    const typename Field::Element beta,
554                    typename Field::Element_ptr C, const size_t ldc, size_t seuil, size_t *x);
555
556    /** @brief  fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
557     *
558     * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
559     * Version with Helper. Input and Output are not supposed to be reduced.
560     * \param F field.
561     * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
562     * \param tb same for matrix \p B
563     * \param m see \p A
564     * \param n see \p B
565     * \param k see \p A
566     * \param alpha scalar
567     * \param beta scalar
568     * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
569     * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
570     * \param C \f$C\f$ is \f$m \times n\f$
571     * \param lda leading dimension of \p A
572     * \param ldb leading dimension of \p B
573     * \param ldc leading dimension of \p C
574     * \param H helper, driving the computation (algorithm, delayed modular reduction, switch of base type, etc)
575     * @warning \f$\alpha\f$ \e must be invertible
576     */
577    // template<class Field, class AlgoT, class FieldTrait, class ParSeqTrait>
578    // inline  typename Field::Element_ptr
579    // fgemm (const Field& F,
580    //        const FFLAS_TRANSPOSE ta,
581    //        const FFLAS_TRANSPOSE tb,
582    //        const size_t m, const size_t n, const size_t k,
583    //        const typename Field::Element alpha,
584    //        typename Field::Element_ptr A, const size_t lda,
585    //        typename Field::Element_ptr B, const size_t ldb,
586    //        const typename Field::Element beta,
587    //        typename Field::Element_ptr C, const size_t ldc,
588    //        MMHelper<Field, AlgoT, FieldTrait, ParSeqTrait> & H);
589
590} // FFLAS
591
592#include "fflas-ffpack/paladin/parallel.h"
593
594namespace FFLAS {
595
596    /** @brief fsquare: Squares a matrix.
597     * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a Field \p F
598     * Avoid the conversion of B
599     * @param ta  if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$.
600     * @param F field
601     * @param n size of \p A
602     * @param alpha scalar
603     * @param beta scalar
604     * @param A dense matrix of size \c nxn
605     * @param lda leading dimension of \p A
606     * @param C dense matrix of size \c nxn
607     * @param ldc leading dimension of \p C
608     */
609    template<class Field>
610    typename Field::Element_ptr fsquare (const Field& F,
611                                         const FFLAS_TRANSPOSE ta,
612                                         const size_t n,
613                                         const typename Field::Element alpha,
614                                         typename Field::ConstElement_ptr A,
615                                         const size_t lda,
616                                         const typename Field::Element beta,
617                                         typename Field::Element_ptr C,
618                                         const size_t ldc);
619
620
621} // FFLAS
622
623#endif // __FFLASFFPACK_fflas_fflas_level3_INL
624/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
625// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
626