1/* 2 * Copyright (C) 2014 the FFLAS-FFPACK group 3 * 4 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr> 5 * 6 * 7 * ========LICENCE======== 8 * This file is part of the library FFLAS-FFPACK. 9 * 10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 * ========LICENCE======== 24 *. 25 */ 26/** @file fflas/fflas_ftrsm_mp.inl 27 * @brief triangular system with matrix right hand side over multiprecision domain (either over Z or over Z/pZ) 28 */ 29#ifndef __FFPACK_ftrsm_mp_INL 30#define __FFPACK_ftrsm_mp_INL 31 32#include <cmath> 33#include <givaro/modular-integer.h> 34#include <givaro/givinteger.h> 35 36#include "fflas-ffpack/fflas/fflas_bounds.inl" 37#include "fflas-ffpack/fflas/fflas_level3.inl" 38#include "fflas-ffpack/field/rns-integer-mod.h" 39#include "fflas-ffpack/field/rns-integer.h" 40 41namespace FFLAS { 42 43 44 inline void ftrsm (const Givaro::Modular<Givaro::Integer> & F, 45 const FFLAS_SIDE Side, 46 const FFLAS_UPLO Uplo, 47 const FFLAS_TRANSPOSE TransA, 48 const FFLAS_DIAG Diag, 49 const size_t M, const size_t N, 50 const Givaro::Integer alpha, 51 const Givaro::Integer * A, const size_t lda, 52 Givaro::Integer * B, const size_t ldb){ 53 54 55#ifdef BENCH_PERF_TRSM_MP 56 double t_init=0, t_trsm=0, t_mod=0, t_rec=0; 57 FFLAS::Timer chrono; 58 chrono.start(); 59#endif 60 Givaro::Integer p; 61 F.cardinality(p); 62 size_t logp=p.bitsize(); 63 size_t K; 64 if (Side == FFLAS::FflasLeft) 65 K=M; 66 else 67 K=N; 68 69 if (K==0) return; 70 71 // compute bit size of feasible prime 72 size_t _k=std::max(K,logp/20), lk=0; 73 while ( _k ) {_k>>=1; ++lk;} 74 size_t prime_bitsize= (53-lk)>>1; 75 76 // construct rns basis 77 Givaro::Integer maxC= 4*p*p*uint64_t(K); 78 FFPACK::rns_double RNS(maxC, prime_bitsize, true); 79 FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS); 80#ifdef BENCH_PERF_TRSM_MP 81 chrono.stop(); 82 t_init+=chrono.usertime(); 83 chrono.clear();chrono.start(); 84#endif 85 // compute A and B in RNS 86 FFPACK::rns_double::Element_ptr Ap,Bp; 87 Ap = FFLAS::fflas_new(Zp,K,K); 88 Bp = FFLAS::fflas_new(Zp,M,N); 89 90 if (Side == FFLAS::FflasLeft){ 91 finit_rns(Zp,K,K,(logp/16)+(logp%16?1:0),A,lda,Ap); 92 finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),B,ldb,Bp); 93 } 94 else { 95 finit_trans_rns(Zp,K,K,(logp/16)+(logp%16?1:0),A,lda,Ap); 96 finit_trans_rns(Zp,M,N,(logp/16)+(logp%16?1:0),B,ldb,Bp); 97 } 98#ifdef BENCH_PERF_TRSM_MP 99 chrono.stop(); 100 t_mod+=chrono.usertime(); 101 chrono.clear();chrono.start(); 102#endif 103 104 // call ftrsm in rns 105 if (Side == FFLAS::FflasLeft) 106 ftrsm(Zp, Side, Uplo, TransA, Diag, M, N, Zp.one, Ap, K, Bp, N); 107 else { 108 if (Uplo == FFLAS::FflasUpper) 109 ftrsm(Zp, FFLAS::FflasLeft, FFLAS::FflasLower, TransA, Diag, N, M, Zp.one, Ap, K, Bp, M); 110 else 111 ftrsm(Zp, FFLAS::FflasLeft, FFLAS::FflasUpper, TransA, Diag, N, M, Zp.one, Ap, K, Bp, M); 112 } 113#ifdef BENCH_PERF_TRSM_MP 114 chrono.stop(); 115 t_trsm+=chrono.usertime(); 116 chrono.clear();chrono.start(); 117#endif 118 // reconstruct the result 119 if (Side == FFLAS::FflasLeft) 120 fconvert_rns(Zp,M,N,F.zero,B,ldb,Bp); 121 else{ 122 fconvert_trans_rns(Zp,M,N,F.zero,B,ldb,Bp); 123 } 124 125 // reduce it modulo p 126 freduce (F, M, N, B, ldb); 127 // scale it with alpha 128 if (!F.isOne(alpha)) 129 fscalin(F,M,N,alpha,B,ldb); 130 131#ifdef BENCH_PERF_TRSM_MP 132 chrono.stop(); 133 t_rec+=chrono.usertime(); 134 cout<<"FTRSM RNS PERF:"<<endl; 135 cout<<" *** init : "<<t_init<<endl; 136 cout<<" *** rns mod : "<<t_mod<<endl; 137 cout<<" *** rns trsm : "<<t_trsm<<" ( igemm="<<Zp.t_igemm<<" scal="<<Zp.t_scal<<" modp="<<Zp.t_modp<<endl;; 138 cout<<" *** rns rec : "<<t_rec<<endl; 139#endif 140 141 FFLAS::fflas_delete(Ap); 142 FFLAS::fflas_delete(Bp); 143 } 144 145 /* bb: do not use CBLAS_ORDER, or make it compatible with MKL */ 146 147 inline void cblas_imptrsm(const enum FFLAS_ORDER Order, 148 const enum FFLAS_SIDE Side, 149 const enum FFLAS_UPLO Uplo, 150 const enum FFLAS_TRANSPOSE TransA, 151 const enum FFLAS_DIAG Diag, 152 const int M, const int N, const FFPACK::rns_double_elt alpha, 153 FFPACK::rns_double_elt_cstptr A, const int lda, 154 FFPACK::rns_double_elt_ptr B, const int ldb) {} 155 156#ifndef DOXYGEN_SHOULD_SKIP_THIS 157 namespace Protected { 158 159 template<> 160 inline size_t TRSMBound (const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F) 161 { 162 return 1; 163 } 164 165 template <> 166 inline size_t DotProdBoundClassic (const FFPACK::RNSIntegerMod<FFPACK::rns_double>& F, 167 const FFPACK::rns_double_elt& beta) 168 { 169 Givaro::Integer p,b,M; 170 F.cardinality(p); 171 p--; 172 F.convert(b,beta); 173 M=F.rns()._M; 174 uint64_t kmax= (M-b*p)/(p*p); 175 return (size_t)std::max(uint64_t(1),kmax); 176 } 177 178#ifndef __FTRSM_MP_FAST 179#define __FFLAS_MULTIPRECISION 180 181#define __FFLAS__LEFT 182#define __FFLAS__UP 183#define __FFLAS__NOTRANSPOSE 184#define __FFLAS__NONUNIT 185#include "fflas_ftrsm_src.inl" 186#undef __FFLAS__LEFT 187#undef __FFLAS__UP 188#undef __FFLAS__NOTRANSPOSE 189#undef __FFLAS__NONUNIT 190 191 192 193#define __FFLAS__LEFT 194#define __FFLAS__UP 195#define __FFLAS__NOTRANSPOSE 196#define __FFLAS__UNIT 197#include "fflas_ftrsm_src.inl" 198#undef __FFLAS__LEFT 199#undef __FFLAS__UP 200#undef __FFLAS__NOTRANSPOSE 201#undef __FFLAS__UNIT 202 203#define __FFLAS__LEFT 204#define __FFLAS__UP 205#define __FFLAS__TRANSPOSE 206#define __FFLAS__NONUNIT 207#include "fflas_ftrsm_src.inl" 208#undef __FFLAS__LEFT 209#undef __FFLAS__UP 210#undef __FFLAS__TRANSPOSE 211#undef __FFLAS__NONUNIT 212 213#define __FFLAS__LEFT 214#define __FFLAS__UP 215#define __FFLAS__TRANSPOSE 216#define __FFLAS__UNIT 217#include "fflas_ftrsm_src.inl" 218#undef __FFLAS__LEFT 219#undef __FFLAS__UP 220#undef __FFLAS__TRANSPOSE 221#undef __FFLAS__UNIT 222 223 224#define __FFLAS__LEFT 225#define __FFLAS__LOW 226#define __FFLAS__NOTRANSPOSE 227#define __FFLAS__NONUNIT 228#include "fflas_ftrsm_src.inl" 229#undef __FFLAS__LEFT 230#undef __FFLAS__LOW 231#undef __FFLAS__NOTRANSPOSE 232#undef __FFLAS__NONUNIT 233 234#define __FFLAS__LEFT 235#define __FFLAS__LOW 236#define __FFLAS__NOTRANSPOSE 237#define __FFLAS__UNIT 238#include "fflas_ftrsm_src.inl" 239#undef __FFLAS__LEFT 240#undef __FFLAS__LOW 241#undef __FFLAS__NOTRANSPOSE 242#undef __FFLAS__UNIT 243 244#define __FFLAS__LEFT 245#define __FFLAS__LOW 246#define __FFLAS__TRANSPOSE 247#define __FFLAS__NONUNIT 248#include "fflas_ftrsm_src.inl" 249#undef __FFLAS__LEFT 250#undef __FFLAS__LOW 251#undef __FFLAS__TRANSPOSE 252#undef __FFLAS__NONUNIT 253 254#define __FFLAS__LEFT 255#define __FFLAS__LOW 256#define __FFLAS__TRANSPOSE 257#define __FFLAS__UNIT 258#include "fflas_ftrsm_src.inl" 259#undef __FFLAS__LEFT 260#undef __FFLAS__LOW 261#undef __FFLAS__TRANSPOSE 262#undef __FFLAS__UNIT 263 264#define __FFLAS__RIGHT 265#define __FFLAS__UP 266#define __FFLAS__NOTRANSPOSE 267#define __FFLAS__NONUNIT 268#include "fflas_ftrsm_src.inl" 269#undef __FFLAS__RIGHT 270#undef __FFLAS__UP 271#undef __FFLAS__NOTRANSPOSE 272#undef __FFLAS__NONUNIT 273 274#define __FFLAS__RIGHT 275#define __FFLAS__UP 276#define __FFLAS__NOTRANSPOSE 277#define __FFLAS__UNIT 278#include "fflas_ftrsm_src.inl" 279#undef __FFLAS__RIGHT 280#undef __FFLAS__UP 281#undef __FFLAS__NOTRANSPOSE 282#undef __FFLAS__UNIT 283 284#define __FFLAS__RIGHT 285#define __FFLAS__UP 286#define __FFLAS__TRANSPOSE 287#define __FFLAS__NONUNIT 288#include "fflas_ftrsm_src.inl" 289#undef __FFLAS__RIGHT 290#undef __FFLAS__UP 291#undef __FFLAS__TRANSPOSE 292#undef __FFLAS__NONUNIT 293 294#define __FFLAS__RIGHT 295#define __FFLAS__UP 296#define __FFLAS__TRANSPOSE 297#define __FFLAS__UNIT 298#include "fflas_ftrsm_src.inl" 299#undef __FFLAS__RIGHT 300#undef __FFLAS__UP 301#undef __FFLAS__TRANSPOSE 302#undef __FFLAS__UNIT 303 304 305#define __FFLAS__RIGHT 306#define __FFLAS__LOW 307#define __FFLAS__NOTRANSPOSE 308#define __FFLAS__NONUNIT 309#include "fflas_ftrsm_src.inl" 310#undef __FFLAS__RIGHT 311#undef __FFLAS__LOW 312#undef __FFLAS__NOTRANSPOSE 313#undef __FFLAS__NONUNIT 314 315#define __FFLAS__RIGHT 316#define __FFLAS__LOW 317#define __FFLAS__NOTRANSPOSE 318#define __FFLAS__UNIT 319#include "fflas_ftrsm_src.inl" 320#undef __FFLAS__RIGHT 321#undef __FFLAS__LOW 322#undef __FFLAS__NOTRANSPOSE 323#undef __FFLAS__UNIT 324 325#define __FFLAS__RIGHT 326#define __FFLAS__LOW 327#define __FFLAS__TRANSPOSE 328#define __FFLAS__NONUNIT 329#include "fflas_ftrsm_src.inl" 330#undef __FFLAS__RIGHT 331#undef __FFLAS__LOW 332#undef __FFLAS__TRANSPOSE 333#undef __FFLAS__NONUNIT 334 335#define __FFLAS__RIGHT 336#define __FFLAS__LOW 337#define __FFLAS__TRANSPOSE 338#define __FFLAS__UNIT 339#include "fflas_ftrsm_src.inl" 340#undef __FFLAS__RIGHT 341#undef __FFLAS__LOW 342#undef __FFLAS__TRANSPOSE 343#undef __FFLAS__UNIT 344#endif // #ifdef __FTRSM_MP_FAST 345 346 } // end of namespace protected 347#endif // #ifndef DOXYGEN_SHOULD_SKIP_THIS 348} // END OF NAMESPACE FFLAS 349 350 351 352 353#endif 354 355/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 356// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 357