1/*
2 * Copyright (C) 2014 the FFLAS-FFPACK group
3 *
4 * Written by Clement Pernet <Clement.Pernet@imag.fr>
5 *            Brice Boyer (briceboyer) <boyer.brice@gmail.com>
6 *
7 *
8 * ========LICENCE========
9 * This file is part of the library FFLAS-FFPACK.
10 *
11 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
12 * it under the terms of the  GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
24 * ========LICENCE========
25 *.
26 */
27
28
29namespace FFLAS {
30    //---------------------------------------------------------------------
31    // Level 3 routines
32    //---------------------------------------------------------------------
33    // set by default for ftrsm to be thread safe
34    // undef it at your own risk, and only if you run it in sequential
35#define __FFLAS__TRSM_READONLY
36
37    /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix.
38     * Computes  \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or  \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$.
39     * \param F field
40     * \param Side if \c Side==FflasLeft then  \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed.
41     * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
42     * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
43     * \param Diag if \c Diag==FflasUnit then \p A is unit.
44     * \param M rows of \p B
45     * \param N cols of \p B
46     * @param alpha scalar
47     * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
48     * @param lda leading dim of \p A
49     * @param B matrix of size \p MxN
50     * @param ldb leading dim of \p B
51     * @bug \f$\alpha\f$ must be non zero.
52     */
53    template INST_OR_DECL
54    void
55    ftrsm (const FFLAS_FIELD <FFLAS_ELT>& F, const FFLAS_SIDE Side,
56           const FFLAS_UPLO Uplo,
57           const FFLAS_TRANSPOSE TransA,
58           const FFLAS_DIAG Diag,
59           const size_t M, const size_t N,
60           const FFLAS_ELT alpha,
61#ifdef __FFLAS__TRSM_READONLY
62           const FFLAS_ELT* A,
63#else
64           FFLAS_ELT* A,
65#endif
66           const size_t lda,
67           FFLAS_ELT* B, const size_t ldb);
68
69    /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply.
70     * Computes  \f$ B \gets \alpha \mathrm{op}(A) B\f$ or  \f$B \gets \alpha B \mathrm{op}(A)\f$.
71     * @param F field
72     * \param Side if \c Side==FflasLeft then  \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed.
73     * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular
74     * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$.
75     * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit.
76     * \param M rows of \p B
77     * \param N cols of \p B
78     * @param alpha scalar
79     * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$
80     * @param lda leading dim of \p A
81     * @param B matrix of size \p MxN
82     * @param ldb leading dim of \p B
83     */
84    template INST_OR_DECL
85    void
86    ftrmm (const FFLAS_FIELD <FFLAS_ELT>& F, const FFLAS_SIDE Side,
87           const FFLAS_UPLO Uplo,
88           const FFLAS_TRANSPOSE TransA,
89           const FFLAS_DIAG Diag,
90           const size_t M, const size_t N,
91           const FFLAS_ELT alpha,
92           const FFLAS_ELT* A, const size_t lda,
93           FFLAS_ELT* B, const size_t ldb);
94
95    /** @brief  fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply.
96     *
97     * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$
98     * Automatically set Winograd recursion level
99     * \param F field.
100     * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$,
101     * \param tb same for matrix \p B
102     * \param m see \p A
103     * \param n see \p B
104     * \param k see \p A
105     * \param alpha scalar
106     * \param beta scalar
107     * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$
108     * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$
109     * \param C \f$C\f$ is \f$m \times n\f$
110     * \param lda leading dimension of \p A
111     * \param ldb leading dimension of \p B
112     * \param ldc leading dimension of \p C
113     * \param w recursive levels of Winograd's algorithm are used. No argument (or -1) does auto computation of \p w.
114     * @warning \f$\alpha\f$ \e must be invertible
115     */
116    template INST_OR_DECL
117    FFLAS_ELT* fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
118                      const FFLAS_TRANSPOSE ta,
119                      const FFLAS_TRANSPOSE tb,
120                      const size_t m, const size_t n, const size_t k,
121                      const FFLAS_ELT alpha,
122                      const FFLAS_ELT* A, const size_t lda,
123                      const FFLAS_ELT* B, const size_t ldb,
124                      const FFLAS_ELT beta,
125                      FFLAS_ELT* C, const size_t ldc);
126
127    template INST_OR_DECL
128    FFLAS_ELT*
129    fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
130           const FFLAS_TRANSPOSE ta,
131           const FFLAS_TRANSPOSE tb,
132           const size_t m,
133           const size_t n,
134           const size_t k,
135           const FFLAS_ELT alpha,
136           const FFLAS_ELT* A, const size_t lda,
137           const FFLAS_ELT* B, const size_t ldb,
138           const FFLAS_ELT beta,
139           FFLAS_ELT* C, const size_t ldc,
140           const ParSeqHelper::Sequential seq);
141
142    template INST_OR_DECL
143    FFLAS_ELT*
144    fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
145           const FFLAS_TRANSPOSE ta,
146           const FFLAS_TRANSPOSE tb,
147           const size_t m,
148           const size_t n,
149           const size_t k,
150           const FFLAS_ELT alpha,
151           const FFLAS_ELT* A, const size_t lda,
152           const FFLAS_ELT* B, const size_t ldb,
153           const FFLAS_ELT beta,
154           FFLAS_ELT* C, const size_t ldc,
155           const ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive> par);
156
157    template INST_OR_DECL
158    FFLAS_ELT*
159    fgemm( const FFLAS_FIELD <FFLAS_ELT>& F,
160           const FFLAS_TRANSPOSE ta,
161           const FFLAS_TRANSPOSE tb,
162           const size_t m,
163           const size_t n,
164           const size_t k,
165           const FFLAS_ELT alpha,
166           const FFLAS_ELT* A, const size_t lda,
167           const FFLAS_ELT* B, const size_t ldb,
168           const FFLAS_ELT beta,
169           FFLAS_ELT* C, const size_t ldc,
170           const ParSeqHelper::Parallel<CuttingStrategy::Block,StrategyParameter::Threads> par);
171
172
173    /** @brief fsquare: Squares a matrix.
174     * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a FFLAS_FIELD <FFLAS_ELT> \p F
175     * Avoid the conversion of B
176     * @param ta  if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$.
177     * @param F field
178     * @param n size of \p A
179     * @param alpha scalar
180     * @param beta scalar
181     * @param A dense matrix of size \c nxn
182     * @param lda leading dimension of \p A
183     * @param C dense matrix of size \c nxn
184     * @param ldc leading dimension of \p C
185     */
186    template INST_OR_DECL
187    FFLAS_ELT* fsquare (const FFLAS_FIELD <FFLAS_ELT>& F,
188                        const FFLAS_TRANSPOSE ta,
189                        const size_t n,
190                        const FFLAS_ELT alpha,
191                        const FFLAS_ELT* A, const size_t lda,
192                        const FFLAS_ELT beta,
193                        FFLAS_ELT* C, const size_t ldc);
194
195
196} // FFLAS
197
198/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
199// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
200