1/* 2 * Copyright (C) 2014 the FFLAS-FFPACK group 3 * 4 * Written by Clement Pernet <Clement.Pernet@imag.fr> 5 * Brice Boyer (briceboyer) <boyer.brice@gmail.com> 6 * 7 * 8 * ========LICENCE======== 9 * This file is part of the library FFLAS-FFPACK. 10 * 11 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with this library; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 24 * ========LICENCE======== 25 *. 26 */ 27 28/** @file fflas/fflas_level3.h 29 * @brief Matrix-Matrix operations 30 * or anything of \f$>n^2\f$ complexity. 31 */ 32 33#ifndef __FFLASFFPACK_fflas_fflas_level3_INL 34#define __FFLASFFPACK_fflas_fflas_level3_INL 35 36//#include <givaro/zring.h> 37 38#include "fflas_bounds.inl" 39#include "fflas_helpers.inl" 40 41namespace FFLAS { namespace Protected { 42 //----------------------------------------------------------------------------- 43 // Some conversion functions 44 //----------------------------------------------------------------------------- 45 46 47 //--------------------------------------------------------------------- 48 // Finite Field matrix => double matrix 49 // Special design for upper-triangular matrices 50 //--------------------------------------------------------------------- 51 template<class Field> 52 void MatF2MatD_Triangular (const Field& F, 53 Givaro::DoubleDomain::Element_ptr S, const size_t lds, 54 typename Field::ConstElement_ptr const E, 55 const size_t lde, 56 const size_t m, const size_t n) 57 { 58 59 typename Field::ConstElement_ptr Ei = E; 60 Givaro::DoubleDomain::Element_ptr Si = S; 61 size_t i=0, j; 62 for ( ; i<m;++i, Ei+=lde, Si+=lds) 63 for ( j=i; j<n;++j) 64 F.convert(*(Si+j),*(Ei+j)); 65 } 66 67 //--------------------------------------------------------------------- 68 // Finite Field matrix => float matrix 69 // Special design for upper-triangular matrices 70 //--------------------------------------------------------------------- 71 //! @todo do finit(...,FFLAS_TRANS,FFLAS_DIAG) 72 //! @todo do fconvert(...,FFLAS_TRANS,FFLAS_DIAG) 73 template<class Field> 74 void MatF2MatFl_Triangular (const Field& F, 75 Givaro::FloatDomain::Element_ptr S, const size_t lds, 76 typename Field::ConstElement_ptr const E, 77 const size_t lde, 78 const size_t m, const size_t n) 79 { 80 81 typename Field::ConstElement_ptr Ei = E; 82 Givaro::FloatDomain::Element_ptr Si = S; 83 size_t i=0, j; 84 for ( ; i<m;++i, Ei+=lde, Si+=lds) 85 for ( j=i; j<n;++j) 86 F.convert(*(Si+j),*(Ei+j)); 87 } 88 89 /** 90 * Computes the maximal size for delaying the modular reduction 91 * in a triangular system resolution. 92 * 93 * Compute the maximal dimension k, such that a unit diagonal triangular 94 * system of dimension k can be solved over Z without overflow of the 95 * underlying floating point representation. 96 * 97 * @bib 98 * - Dumas, Giorgi, Pernet 06, arXiv:cs/0601133. 99 * 100 * \param F Finite Field/Ring of the computation 101 * 102 */ 103 // Specialized routines for ftrsm 104 template <class Element> class ftrsmLeftUpperNoTransNonUnit; 105 template <class Element> class ftrsmLeftUpperNoTransUnit; 106 template <class Element> class ftrsmLeftUpperTransNonUnit; 107 template <class Element> class ftrsmLeftUpperTransUnit; 108 template <class Element> class ftrsmLeftLowerNoTransNonUnit; 109 template <class Element> class ftrsmLeftLowerNoTransUnit; 110 template <class Element> class ftrsmLeftLowerTransNonUnit; 111 template <class Element> class ftrsmLeftLowerTransUnit; 112 template <class Element> class ftrsmRightUpperNoTransNonUnit; 113 template <class Element> class ftrsmRightUpperNoTransUnit; 114 template <class Element> class ftrsmRightUpperTransNonUnit; 115 template <class Element> class ftrsmRightUpperTransUnit; 116 template <class Element> class ftrsmRightLowerNoTransNonUnit; 117 template <class Element> class ftrsmRightLowerNoTransUnit; 118 template <class Element> class ftrsmRightLowerTransNonUnit; 119 template <class Element> class ftrsmRightLowerTransUnit; 120 121 // Specialized routines for ftrmm 122 template <class Element> class ftrmmLeftUpperNoTransNonUnit; 123 template <class Element> class ftrmmLeftUpperNoTransUnit; 124 template <class Element> class ftrmmLeftUpperTransNonUnit; 125 template <class Element> class ftrmmLeftUpperTransUnit; 126 template <class Element> class ftrmmLeftLowerNoTransNonUnit; 127 template <class Element> class ftrmmLeftLowerNoTransUnit; 128 template <class Element> class ftrmmLeftLowerTransNonUnit; 129 template <class Element> class ftrmmLeftLowerTransUnit; 130 template <class Element> class ftrmmRightUpperNoTransNonUnit; 131 template <class Element> class ftrmmRightUpperNoTransUnit; 132 template <class Element> class ftrmmRightUpperTransNonUnit; 133 template <class Element> class ftrmmRightUpperTransUnit; 134 template <class Element> class ftrmmRightLowerNoTransNonUnit; 135 template <class Element> class ftrmmRightLowerNoTransUnit; 136 template <class Element> class ftrmmRightLowerTransNonUnit; 137 template <class Element> class ftrmmRightLowerTransUnit; 138 139} // protected 140} // FFLAS 141 142namespace FFLAS { 143 144 //--------------------------------------------------------------------- 145 // Level 3 routines 146 //--------------------------------------------------------------------- 147 // set by default for ftrsm to be thread safe 148 // undef it at your own risk, and only if you run it in sequential 149#define __FFLAS__TRSM_READONLY 150 151 /** @brief ftrsm: <b>TR</b>iangular <b>S</b>ystem solve with <b>M</b>atrix. 152 * Computes \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ or \f$B \gets \alpha B \mathrm{op}(A^{-1})\f$. 153 * \param F field 154 * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A^{-1}) B\f$ is computed. 155 * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular 156 * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$. 157 * \param Diag if \c Diag==FflasUnit then \p A is unit. 158 * \param M rows of \p B 159 * \param N cols of \p B 160 * @param alpha scalar 161 * \param A triangular invertible matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$ 162 * @param lda leading dim of \p A 163 * @param B matrix of size \p MxN 164 * @param ldb leading dim of \p B 165 * @bug \f$\alpha\f$ must be non zero. 166 */ 167 template<class Field> 168 void 169 ftrsm (const Field& F, const FFLAS_SIDE Side, 170 const FFLAS_UPLO Uplo, 171 const FFLAS_TRANSPOSE TransA, 172 const FFLAS_DIAG Diag, 173 const size_t M, const size_t N, 174 const typename Field::Element alpha, 175#ifdef __FFLAS__TRSM_READONLY 176 typename Field::ConstElement_ptr A, 177#else 178 typename Field::Element_ptr A, 179#endif 180 const size_t lda, 181 typename Field::Element_ptr B, const size_t ldb); 182 183 /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply. 184 * Computes \f$ B \gets \alpha \mathrm{op}(A) B\f$ or \f$B \gets \alpha B \mathrm{op}(A)\f$. 185 * @param F field 186 * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed. 187 * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular 188 * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$. 189 * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit. 190 * \param M rows of \p B 191 * \param N cols of \p B 192 * @param alpha scalar 193 * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$ 194 * @param lda leading dim of \p A 195 * @param B matrix of size \p MxN 196 * @param ldb leading dim of \p B 197 */ 198 template<class Field> 199 void 200 ftrmm (const Field& F, const FFLAS_SIDE Side, 201 const FFLAS_UPLO Uplo, 202 const FFLAS_TRANSPOSE TransA, 203 const FFLAS_DIAG Diag, 204 const size_t M, const size_t N, 205 const typename Field::Element alpha, 206 typename Field::ConstElement_ptr A, const size_t lda, 207 typename Field::Element_ptr B, const size_t ldb); 208 209 /** @brief ftrmm: <b>TR</b>iangular <b>M</b>atrix <b>M</b>ultiply with 3 operands 210 * Computes \f$ C \gets \alpha \mathrm{op}(A) B + beta C\f$ or \f$C \gets \alpha B \mathrm{op}(A) + beta C\f$. 211 * @param F field 212 * \param Side if \c Side==FflasLeft then \f$ B \gets \alpha \mathrm{op}(A) B\f$ is computed. 213 * \param Uplo if \c Uplo==FflasUpper then \p A is upper triangular 214 * \param TransA if \c TransA==FflasTrans then \f$\mathrm{op}(A)=A^t\f$. 215 * \param Diag if \c Diag==FflasUnit then \p A is implicitly unit. 216 * \param M rows of \p B 217 * \param N cols of \p B 218 * @param alpha scalar 219 * \param A triangular matrix. If \c Side==FflasLeft then \p A is \f$N\times N\f$, otherwise \p A is \f$M\times M\f$ 220 * @param lda leading dim of \p A 221 * @param B matrix of size \p MxN 222 * @param ldb leading dim of \p B 223 * @param beta scalar 224 * @param C matrix of size \p MxN 225 * @param ldc leading dim of \p C 226 */ 227 template<class Field> 228 void 229 ftrmm (const Field& F, const FFLAS_SIDE Side, 230 const FFLAS_UPLO Uplo, 231 const FFLAS_TRANSPOSE TransA, 232 const FFLAS_DIAG Diag, 233 const size_t M, const size_t N, 234 const typename Field::Element alpha, 235 typename Field::ConstElement_ptr A, const size_t lda, 236 typename Field::ConstElement_ptr B, const size_t ldb, 237 const typename Field::Element beta, 238 typename Field::Element_ptr C, const size_t ldc); 239 240 /** @brief fsyrk: Symmetric Rank K update 241 * 242 * Computes the Lower or Upper triangular part of \f$C = \alpha A \times A^T + \beta C\f$ or \f$C = \alpha A^T \times A + \beta C\f$ 243 * \param F field. 244 * \param UpLo whether to compute the upper or the lower triangular part of the symmetric matrix \p C 245 * \param trans if \c ta==FflasNoTrans then comput \f$C = \alpha A \times A^T + \beta C\f$, else \f$C = \alpha A^T \times A + \beta C\f$ 246 * \param n order of matrix \p C 247 * \param k see \p A 248 * \param alpha scalar 249 * \param A \f$A\f$ is \f$n \times k\f$ or \f$A\f$ is \f$k \times n\f$ 250 * \param lda leading dimension of \p A 251 * \param beta scalar 252 * \param C \f$C\f$ is \f$n \times n\f$ 253 * \param ldc leading dimension of \p C 254 * @warning \f$\alpha\f$ \e must be invertible 255 */ 256 template<class Field> 257 typename Field::Element_ptr 258 fsyrk (const Field& F, 259 const FFLAS_UPLO UpLo, 260 const FFLAS_TRANSPOSE trans, 261 const size_t n, 262 const size_t k, 263 const typename Field::Element alpha, 264 typename Field::ConstElement_ptr A, const size_t lda, 265 const typename Field::Element beta, 266 typename Field::Element_ptr C, const size_t ldc); 267 268 template<class Field> 269 typename Field::Element_ptr 270 fsyr2k (const Field& F, 271 const FFLAS_UPLO UpLo, 272 const FFLAS_TRANSPOSE trans, 273 const size_t n, 274 const size_t k, 275 const typename Field::Element alpha, 276 typename Field::ConstElement_ptr A, const size_t lda, 277 typename Field::ConstElement_ptr B, const size_t ldb, 278 const typename Field::Element beta, 279 typename Field::Element_ptr C, const size_t ldc); 280 281 /** @brief fsyrk: Symmetric Rank K update with diagonal scaling 282 * 283 * Computes the Lower or Upper triangular part of 284 * \f$C = \alpha A \times D \times A^T + \beta C\f$ or 285 * \f$C = \alpha A^T \times D \times A + \beta C\f$ where \p D is a diagonal matrix. 286 * Matrix \p A is updated into \f$ D\times A\f$ (if trans = FflasTrans) or 287 * \f$ A\times D\f$ (if trans = FflasNoTrans). 288 * \param F field. 289 * \param UpLo whether to compute the upper or the lower triangular part of the symmetric 290 * matrix \p C 291 * \param trans if \c ta==FflasNoTrans then compute \f$C = \alpha A \times A^T + \beta C\f$, 292 * else \f$C = \alpha A^T \times A + \beta C\f$ 293 * \param n order of matrix \p C 294 * \param k see \p A 295 * \param alpha scalar 296 * \param A \f$A\f$ is \f$n \times k\f$ or \f$A\f$ is \f$k \times n\f$ 297 * \param lda leading dimension of \p A 298 * \param D \f$D\f$ is \f$k \times k\f$ diagonal matrix, stored as a vector of k coefficients 299 * \param lda leading dimension of \p A 300 * \param beta scalar 301 * \param C \f$C\f$ is \f$n \times n\f$ 302 * \param ldc leading dimension of \p C 303 * @warning \f$\alpha\f$ \e must be invertible 304 */ 305 template<class Field> 306 typename Field::Element_ptr 307 fsyrk (const Field& F, 308 const FFLAS_UPLO UpLo, 309 const FFLAS_TRANSPOSE trans, 310 const size_t n, 311 const size_t k, 312 const typename Field::Element alpha, 313 typename Field::Element_ptr A, const size_t lda, 314 typename Field::ConstElement_ptr D, const size_t incD, 315 const typename Field::Element beta, 316 typename Field::Element_ptr C, const size_t ldc, const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD); 317 template<class Field> 318 typename Field::Element_ptr 319 fsyrk (const Field& F, 320 const FFLAS_UPLO UpLo, 321 const FFLAS_TRANSPOSE trans, 322 const size_t n, 323 const size_t k, 324 const typename Field::Element alpha, 325 typename Field::Element_ptr A, const size_t lda, 326 typename Field::ConstElement_ptr D, const size_t incD, 327 const typename Field::Element beta, 328 typename Field::Element_ptr C, const size_t ldc, 329 const ParSeqHelper::Sequential seq, 330 const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD); 331 template<class Field, class Cut, class Param> 332 typename Field::Element_ptr 333 fsyrk (const Field& F, 334 const FFLAS_UPLO UpLo, 335 const FFLAS_TRANSPOSE trans, 336 const size_t n, 337 const size_t k, 338 const typename Field::Element alpha, 339 typename Field::Element_ptr A, const size_t lda, 340 typename Field::ConstElement_ptr D, const size_t incD, 341 const typename Field::Element beta, 342 typename Field::Element_ptr C, const size_t ldc, 343 const ParSeqHelper::Parallel<Cut,Param> par, 344 const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD); 345 /** @brief fsyrk: Symmetric Rank K update with diagonal scaling 346 * 347 * Computes the Lower or Upper triangular part of 348 * \f$C = \alpha A \times Delta D \times A^T + \beta C\f$ or 349 * \f$C = \alpha A^T \times Delta D \times A + \beta C\f$ where \p D is a diagonal matrix 350 * and \p Delta is a block diagonal with either 1 on the diagonal or 2x2 swap blocks 351 * Matrix \p A is updated into \f$ D\times A\f$ (if trans = FflasTrans) or 352 * \f$ A\times D\f$ (if trans = FflasNoTrans). 353 * \param F field. 354 * \param UpLo whether to compute the upper or the lower triangular part of the symmetric 355 * matrix \p C 356 * \param trans if \c ta==FflasNoTrans then compute \f$C = \alpha A Delta D \times A^T + \beta C\f$, 357 * else \f$C = \alpha A^T Delta D \times A + \beta C\f$ 358 * \param n see \p B 359 * \param k see \p A 360 * \param alpha scalar 361 * \param A \f$A\f$ is \f$n \times k\f$ or \f$A\f$ is \f$k \times n\f$ 362 * \param lda leading dimension of \p A 363 * \param D \f$D\f$ is \f$k \times k\f$ diagonal matrix, stored as a vector of k coefficients 364 * \param twoBlocks a vector boolean indicating the beginning of each 2x2 blocs in Delta 365 * \param lda leading dimension of \p A 366 * \param beta scalar 367 * \param C \f$C\f$ is \f$n \times n\f$ 368 * \param ldc leading dimension of \p C 369 * @warning \f$\alpha\f$ \e must be invertible 370 */ 371 template<class Field> 372 typename Field::Element_ptr 373 fsyrk (const Field& F, 374 const FFLAS_UPLO UpLo, 375 const FFLAS_TRANSPOSE trans, 376 const size_t n, 377 const size_t k, 378 const typename Field::Element alpha, 379 typename Field::Element_ptr A, const size_t lda, 380 typename Field::ConstElement_ptr D, const size_t incD, 381 const std::vector<bool>& twoBlock, 382 const typename Field::Element beta, 383 typename Field::Element_ptr C, const size_t ldc, const size_t threshold=__FFLASFFPACK_FSYRK_THRESHOLD); 384 385 /** @brief fsyr2k: Symmetric Rank 2K update 386 * 387 * Computes the Lower or Upper triangular part of \f$C = \alpha ( A \times B^T + B \times A^T) + \beta C\f$ or \f$C = \alpha ( A^T \times B + B^T \times A ) + \beta C\f$ 388 * \param F field. 389 * \param UpLo whether to compute the upper or the lower triangular part of the symmetric matrix \p C 390 * \param trans if \c ta==FflasNoTrans then compute \f$C = \alpha ( A \times B^T + B \times A^T ) + \beta C\f$, else \f$C = \alpha ( A^T \times B + B^T \times A) + \beta C\f$ 391 * \param n order of matrix \p C 392 * \param k see \p A 393 * \param alpha scalar 394 * \param A \f$A\f$ is \f$n \times k\f$ (FflasNoTrans) or \f$A\f$ is \f$k \times n\f$ (FflasTrans) 395 * \param lda leading dimension of \p A 396 * \param beta scalar 397 * \param C \f$C\f$ is \f$n \times n\f$ 398 * \param ldc leading dimension of \p C 399 * @warning \f$\alpha\f$ \e must be invertible 400 */ 401 template<class Field> 402 typename Field::Element_ptr 403 fsyr2k (const Field& F, 404 const FFLAS_UPLO UpLo, 405 const FFLAS_TRANSPOSE trans, 406 const size_t n, 407 const size_t k, 408 const typename Field::Element alpha, 409 typename Field::ConstElement_ptr A, const size_t lda, 410 typename Field::ConstElement_ptr B, const size_t ldb, 411 const typename Field::Element beta, 412 typename Field::Element_ptr C, const size_t ldc); 413 414 /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply. 415 * 416 * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$ 417 * Automatically set Winograd recursion level 418 * \param F field. 419 * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$, 420 * \param tb same for matrix \p B 421 * \param m see \p A 422 * \param n see \p B 423 * \param k see \p A 424 * \param alpha scalar 425 * \param beta scalar 426 * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$ 427 * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$ 428 * \param C \f$C\f$ is \f$m \times n\f$ 429 * \param lda leading dimension of \p A 430 * \param ldb leading dimension of \p B 431 * \param ldc leading dimension of \p C 432 * \param w recursive levels of Winograd's algorithm are used. No argument (or -1) does auto computation of \p w. 433 * @warning \f$\alpha\f$ \e must be invertible 434 */ 435 template<class Field> 436 typename Field::Element_ptr 437 fgemm( const Field& F, 438 const FFLAS_TRANSPOSE ta, 439 const FFLAS_TRANSPOSE tb, 440 const size_t m, 441 const size_t n, 442 const size_t k, 443 const typename Field::Element alpha, 444 typename Field::ConstElement_ptr A, const size_t lda, 445 typename Field::ConstElement_ptr B, const size_t ldb, 446 const typename Field::Element beta, 447 typename Field::Element_ptr C, const size_t ldc); 448 449 template<typename Field> 450 typename Field::Element_ptr 451 fgemm( const Field& F, 452 const FFLAS_TRANSPOSE ta, 453 const FFLAS_TRANSPOSE tb, 454 const size_t m, 455 const size_t n, 456 const size_t k, 457 const typename Field::Element alpha, 458 typename Field::ConstElement_ptr A, const size_t lda, 459 typename Field::ConstElement_ptr B, const size_t ldb, 460 const typename Field::Element beta, 461 typename Field::Element_ptr C, const size_t ldc, 462 const ParSeqHelper::Sequential seq); 463 464 template<typename Field, class Cut, class Param> 465 typename Field::Element_ptr 466 fgemm( const Field& F, 467 const FFLAS_TRANSPOSE ta, 468 const FFLAS_TRANSPOSE tb, 469 const size_t m, 470 const size_t n, 471 const size_t k, 472 const typename Field::Element alpha, 473 typename Field::ConstElement_ptr A, const size_t lda, 474 typename Field::ConstElement_ptr B, const size_t ldb, 475 const typename Field::Element beta, 476 typename Field::Element_ptr C, const size_t ldc, 477 const ParSeqHelper::Parallel<Cut,Param> par); 478 479 template<typename Field> 480 typename Field::Element_ptr 481 pfgemm (const Field& F, 482 const FFLAS_TRANSPOSE ta, 483 const FFLAS_TRANSPOSE tb, 484 const size_t m, 485 const size_t n, 486 const size_t k, 487 const typename Field::Element alpha, 488 typename Field::ConstElement_ptr A, const size_t lda, 489 typename Field::ConstElement_ptr B, const size_t ldb, 490 const typename Field::Element beta, 491 typename Field::Element_ptr C, const size_t ldc, 492 size_t numthreads = 0){ 493 PAR_BLOCK{ 494 size_t nt = numthreads ? numthreads : NUM_THREADS; 495 ParSeqHelper::Parallel<CuttingStrategy::Block,StrategyParameter::Threads> par(nt); 496 fgemm(F,ta,tb,m,n,k,alpha,A,lda,B,ldb,beta,C,ldc,par); 497 } 498 } 499 500 template<class Field> 501 typename Field::Element* 502 pfgemm_1D_rec( const Field& F, 503 const FFLAS_TRANSPOSE ta, 504 const FFLAS_TRANSPOSE tb, 505 const size_t m, 506 const size_t n, 507 const size_t k, 508 const typename Field::Element alpha, 509 const typename Field::Element_ptr A, const size_t lda, 510 const typename Field::Element_ptr B, const size_t ldb, 511 const typename Field::Element beta, 512 typename Field::Element * C, const size_t ldc, size_t seuil); 513 514 template<class Field> 515 typename Field::Element* 516 pfgemm_2D_rec( const Field& F, 517 const FFLAS_TRANSPOSE ta, 518 const FFLAS_TRANSPOSE tb, 519 const size_t m, 520 const size_t n, 521 const size_t k, 522 const typename Field::Element alpha, 523 const typename Field::Element_ptr A, const size_t lda, 524 const typename Field::Element_ptr B, const size_t ldb, 525 const typename Field::Element beta, 526 typename Field::Element * C, const size_t ldc, size_t seuil); 527 528 template<class Field> 529 typename Field::Element* 530 pfgemm_3D_rec( const Field& F, 531 const FFLAS_TRANSPOSE ta, 532 const FFLAS_TRANSPOSE tb, 533 const size_t m, 534 const size_t n, 535 const size_t k, 536 const typename Field::Element alpha, 537 const typename Field::Element_ptr A, const size_t lda, 538 const typename Field::Element_ptr B, const size_t ldb, 539 const typename Field::Element beta, 540 typename Field::Element_ptr C, const size_t ldc, size_t seuil, size_t * x); 541 542 template<class Field> 543 typename Field::Element_ptr 544 pfgemm_3D_rec2( const Field& F, 545 const FFLAS_TRANSPOSE ta, 546 const FFLAS_TRANSPOSE tb, 547 const size_t m, 548 const size_t n, 549 const size_t k, 550 const typename Field::Element alpha, 551 const typename Field::Element_ptr A, const size_t lda, 552 const typename Field::Element_ptr B, const size_t ldb, 553 const typename Field::Element beta, 554 typename Field::Element_ptr C, const size_t ldc, size_t seuil, size_t *x); 555 556 /** @brief fgemm: <b>F</b>ield <b>GE</b>neral <b>M</b>atrix <b>M</b>ultiply. 557 * 558 * Computes \f$C = \alpha \mathrm{op}(A) \times \mathrm{op}(B) + \beta C\f$ 559 * Version with Helper. Input and Output are not supposed to be reduced. 560 * \param F field. 561 * \param ta if \c ta==FflasTrans then \f$\mathrm{op}(A)=A^t\f$, else \f$\mathrm{op}(A)=A\f$, 562 * \param tb same for matrix \p B 563 * \param m see \p A 564 * \param n see \p B 565 * \param k see \p A 566 * \param alpha scalar 567 * \param beta scalar 568 * \param A \f$\mathrm{op}(A)\f$ is \f$m \times k\f$ 569 * \param B \f$\mathrm{op}(B)\f$ is \f$k \times n\f$ 570 * \param C \f$C\f$ is \f$m \times n\f$ 571 * \param lda leading dimension of \p A 572 * \param ldb leading dimension of \p B 573 * \param ldc leading dimension of \p C 574 * \param H helper, driving the computation (algorithm, delayed modular reduction, switch of base type, etc) 575 * @warning \f$\alpha\f$ \e must be invertible 576 */ 577 // template<class Field, class AlgoT, class FieldTrait, class ParSeqTrait> 578 // inline typename Field::Element_ptr 579 // fgemm (const Field& F, 580 // const FFLAS_TRANSPOSE ta, 581 // const FFLAS_TRANSPOSE tb, 582 // const size_t m, const size_t n, const size_t k, 583 // const typename Field::Element alpha, 584 // typename Field::Element_ptr A, const size_t lda, 585 // typename Field::Element_ptr B, const size_t ldb, 586 // const typename Field::Element beta, 587 // typename Field::Element_ptr C, const size_t ldc, 588 // MMHelper<Field, AlgoT, FieldTrait, ParSeqTrait> & H); 589 590} // FFLAS 591 592#include "fflas-ffpack/paladin/parallel.h" 593 594namespace FFLAS { 595 596 /** @brief fsquare: Squares a matrix. 597 * compute \f$ C \gets \alpha \mathrm{op}(A) \mathrm{op}(A) + \beta C\f$ over a Field \p F 598 * Avoid the conversion of B 599 * @param ta if \c ta==FflasTrans, \f$\mathrm{op}(A)=A^T\f$. 600 * @param F field 601 * @param n size of \p A 602 * @param alpha scalar 603 * @param beta scalar 604 * @param A dense matrix of size \c nxn 605 * @param lda leading dimension of \p A 606 * @param C dense matrix of size \c nxn 607 * @param ldc leading dimension of \p C 608 */ 609 template<class Field> 610 typename Field::Element_ptr fsquare (const Field& F, 611 const FFLAS_TRANSPOSE ta, 612 const size_t n, 613 const typename Field::Element alpha, 614 typename Field::ConstElement_ptr A, 615 const size_t lda, 616 const typename Field::Element beta, 617 typename Field::Element_ptr C, 618 const size_t ldc); 619 620 621} // FFLAS 622 623#endif // __FFLASFFPACK_fflas_fflas_level3_INL 624/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 625// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 626