1/* 2 * Copyright (C) 2014 the FFLAS-FFPACK group 3 * 4 * Written by Clement Pernet <Clement.Pernet@imag.fr> 5 * Brice Boyer (briceboyer) <boyer.brice@gmail.com> 6 * 7 * 8 * ========LICENCE======== 9 * This file is part of the library FFLAS-FFPACK. 10 * 11 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with this library; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 * ========LICENCE======== 25 *. 26 */ 27 28 29namespace FFLAS { 30 //--------------------------------------------------------------------- 31 // Level 3 routines 32 //--------------------------------------------------------------------- 33 // set by default for ftrsm to be thread safe 34 // undef it at your own risk, and only if you run it in sequential 35#define __FFLAS__TRSM_READONLY 36 37 /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix. 38 * Computes \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$. 39 * \param F field 40 * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed. 41 * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular 42 * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$. 43 * \param Diag if \c Diag==FflasUnit then \p A is unit. 44 * \param M rows of \p B 45 * \param N cols of \p B 46 * @param alpha scalar 47 * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$ 48 * @param lda leading dim of \p A 49 * @param B matrix of size \p MxN 50 * @param ldb leading dim of \p B 51 * @bug \f$\alpha\f$ must be non zero. 52 */ 53 template INST_OR_DECL 54 void 55 ftrsm (const FFLAS_FIELD <FFLAS_ELT>& F, const FFLAS_SIDE Side, 56 const FFLAS_UPLO Uplo, 57 const FFLAS_TRANSPOSE TransA, 58 const FFLAS_DIAG Diag, 59 const size_t M, const size_t N, 60 const FFLAS_ELT alpha, 61#ifdef __FFLAS__TRSM_READONLY 62 const FFLAS_ELT* A, 63#else 64 FFLAS_ELT* A, 65#endif 66 const size_t lda, 67 FFLAS_ELT* B, const size_t ldb); 68 69 /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply. 70 * Computes \f$ B \gets \alpha \mathrm{op}(A) B\f$ or \f$B \gets \alpha B \mathrm{op}(A)\f$. 71 * @param F field 72 * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed. 73 * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular 74 * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$. 75 * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit. 76 * \param M rows of \p B 77 * \param N cols of \p B 78 * @param alpha scalar 79 * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$ 80 * @param lda leading dim of \p A 81 * @param B matrix of size \p MxN 82 * @param ldb leading dim of \p B 83 */ 84 template INST_OR_DECL 85 void 86 ftrmm (const FFLAS_FIELD <FFLAS_ELT>& F, const FFLAS_SIDE Side, 87 const FFLAS_UPLO Uplo, 88 const FFLAS_TRANSPOSE TransA, 89 const FFLAS_DIAG Diag, 90 const size_t M, const size_t N, 91 const FFLAS_ELT alpha, 92 const FFLAS_ELT* A, const size_t lda, 93 FFLAS_ELT* B, const size_t ldb); 94 95 /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply. 96 * 97 * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$ 98 * Automatically set Winograd recursion level 99 * \param F field. 100 * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$, 101 * \param tb same for matrix \p B 102 * \param m see \p A 103 * \param n see \p B 104 * \param k see \p A 105 * \param alpha scalar 106 * \param beta scalar 107 * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$ 108 * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$ 109 * \param C \f$C\f$ is \f$m \times n\f$ 110 * \param lda leading dimension of \p A 111 * \param ldb leading dimension of \p B 112 * \param ldc leading dimension of \p C 113 * \param w recursive levels of Winograd's algorithm are used. No argument (or -1) does auto computation of \p w. 114 * @warning \f$\alpha\f$ \e must be invertible 115 */ 116 template INST_OR_DECL 117 FFLAS_ELT* fgemm( const FFLAS_FIELD <FFLAS_ELT>& F, 118 const FFLAS_TRANSPOSE ta, 119 const FFLAS_TRANSPOSE tb, 120 const size_t m, const size_t n, const size_t k, 121 const FFLAS_ELT alpha, 122 const FFLAS_ELT* A, const size_t lda, 123 const FFLAS_ELT* B, const size_t ldb, 124 const FFLAS_ELT beta, 125 FFLAS_ELT* C, const size_t ldc); 126 127 template INST_OR_DECL 128 FFLAS_ELT* 129 fgemm( const FFLAS_FIELD <FFLAS_ELT>& F, 130 const FFLAS_TRANSPOSE ta, 131 const FFLAS_TRANSPOSE tb, 132 const size_t m, 133 const size_t n, 134 const size_t k, 135 const FFLAS_ELT alpha, 136 const FFLAS_ELT* A, const size_t lda, 137 const FFLAS_ELT* B, const size_t ldb, 138 const FFLAS_ELT beta, 139 FFLAS_ELT* C, const size_t ldc, 140 const ParSeqHelper::Sequential seq); 141 142 template INST_OR_DECL 143 FFLAS_ELT* 144 fgemm( const FFLAS_FIELD <FFLAS_ELT>& F, 145 const FFLAS_TRANSPOSE ta, 146 const FFLAS_TRANSPOSE tb, 147 const size_t m, 148 const size_t n, 149 const size_t k, 150 const FFLAS_ELT alpha, 151 const FFLAS_ELT* A, const size_t lda, 152 const FFLAS_ELT* B, const size_t ldb, 153 const FFLAS_ELT beta, 154 FFLAS_ELT* C, const size_t ldc, 155 const ParSeqHelper::Parallel<CuttingStrategy::Recursive,StrategyParameter::TwoDAdaptive> par); 156 157 template INST_OR_DECL 158 FFLAS_ELT* 159 fgemm( const FFLAS_FIELD <FFLAS_ELT>& F, 160 const FFLAS_TRANSPOSE ta, 161 const FFLAS_TRANSPOSE tb, 162 const size_t m, 163 const size_t n, 164 const size_t k, 165 const FFLAS_ELT alpha, 166 const FFLAS_ELT* A, const size_t lda, 167 const FFLAS_ELT* B, const size_t ldb, 168 const FFLAS_ELT beta, 169 FFLAS_ELT* C, const size_t ldc, 170 const ParSeqHelper::Parallel<CuttingStrategy::Block,StrategyParameter::Threads> par); 171 172 173 /** @brief fsquare: Squares a matrix. 174 * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a FFLAS_FIELD <FFLAS_ELT> \p F 175 * Avoid the conversion of B 176 * @param ta if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$. 177 * @param F field 178 * @param n size of \p A 179 * @param alpha scalar 180 * @param beta scalar 181 * @param A dense matrix of size \c nxn 182 * @param lda leading dimension of \p A 183 * @param C dense matrix of size \c nxn 184 * @param ldc leading dimension of \p C 185 */ 186 template INST_OR_DECL 187 FFLAS_ELT* fsquare (const FFLAS_FIELD <FFLAS_ELT>& F, 188 const FFLAS_TRANSPOSE ta, 189 const size_t n, 190 const FFLAS_ELT alpha, 191 const FFLAS_ELT* A, const size_t lda, 192 const FFLAS_ELT beta, 193 FFLAS_ELT* C, const size_t ldc); 194 195 196} // FFLAS 197 198/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 199// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 200