1/*
2 * Copyright (C) 2014 the FFLAS-FFPACK group
3 *
4 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr>
5 *
6 *
7 * ========LICENCE========
8 * This file is part of the library FFLAS-FFPACK.
9 *
10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
11 * it under the terms of the  GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 * ========LICENCE========
24 *.
25 */
26/** @file fflas/fflas_ftrsm_mp.inl
27 * @brief triangular system with matrix right hand side over multiprecision domain (either over Z or over Z/pZ)
28 */
29#ifndef __FFPACK_ftrsm_mp_INL
30#define __FFPACK_ftrsm_mp_INL
31
32#include <cmath>
33#include <givaro/modular-integer.h>
34#include <givaro/givinteger.h>
35
36#include "fflas-ffpack/fflas/fflas_bounds.inl"
37#include "fflas-ffpack/fflas/fflas_level3.inl"
38#include "fflas-ffpack/field/rns-integer-mod.h"
39#include "fflas-ffpack/field/rns-integer.h"
40
41namespace FFLAS {
42
43
44    inline void ftrsm (const Givaro::Modular<Givaro::Integer> & F,
45                       const FFLAS_SIDE Side,
46                       const FFLAS_UPLO Uplo,
47                       const FFLAS_TRANSPOSE TransA,
48                       const FFLAS_DIAG Diag,
49                       const size_t M, const size_t N,
50                       const Givaro::Integer alpha,
51                       const Givaro::Integer * A, const size_t lda,
52                       Givaro::Integer * B, const size_t ldb){
53
54
55#ifdef BENCH_PERF_TRSM_MP
56        double t_init=0, t_trsm=0, t_mod=0, t_rec=0;
57        FFLAS::Timer chrono;
58        chrono.start();
59#endif
60        Givaro::Integer p;
61        F.cardinality(p);
62        size_t logp=p.bitsize();
63        size_t K;
64        if (Side == FFLAS::FflasLeft)
65            K=M;
66        else
67            K=N;
68
69        if (K==0) return;
70
71        // compute bit size of feasible prime
72        size_t _k=std::max(K,logp/20), lk=0;
73        while ( _k ) {_k>>=1; ++lk;}
74        size_t prime_bitsize= (53-lk)>>1;
75
76        // construct rns basis
77        Givaro::Integer maxC= 4*p*p*uint64_t(K);
78        FFPACK::rns_double RNS(maxC, prime_bitsize, true);
79        FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS);
80#ifdef BENCH_PERF_TRSM_MP
81        chrono.stop();
82        t_init+=chrono.usertime();
83        chrono.clear();chrono.start();
84#endif
85        // compute A and B in RNS
86        FFPACK::rns_double::Element_ptr Ap,Bp;
87        Ap = FFLAS::fflas_new(Zp,K,K);
88        Bp = FFLAS::fflas_new(Zp,M,N);
89
90        if (Side == FFLAS::FflasLeft){
91            finit_rns(Zp,K,K,(logp/16)+(logp%16?1:0),A,lda,Ap);
92            finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),B,ldb,Bp);
93        }
94        else {
95            finit_trans_rns(Zp,K,K,(logp/16)+(logp%16?1:0),A,lda,Ap);
96            finit_trans_rns(Zp,M,N,(logp/16)+(logp%16?1:0),B,ldb,Bp);
97        }
98#ifdef BENCH_PERF_TRSM_MP
99        chrono.stop();
100        t_mod+=chrono.usertime();
101        chrono.clear();chrono.start();
102#endif
103
104        // call ftrsm in rns
105        if (Side == FFLAS::FflasLeft)
106            ftrsm(Zp, Side, Uplo, TransA, Diag, M, N, Zp.one, Ap, K, Bp, N);
107        else {
108            if (Uplo == FFLAS::FflasUpper)
109                ftrsm(Zp, FFLAS::FflasLeft, FFLAS::FflasLower, TransA, Diag, N, M, Zp.one, Ap, K, Bp, M);
110            else
111                ftrsm(Zp, FFLAS::FflasLeft, FFLAS::FflasUpper, TransA, Diag, N, M, Zp.one, Ap, K, Bp, M);
112        }
113#ifdef BENCH_PERF_TRSM_MP
114        chrono.stop();
115        t_trsm+=chrono.usertime();
116        chrono.clear();chrono.start();
117#endif
118        // reconstruct the result
119        if (Side == FFLAS::FflasLeft)
120            fconvert_rns(Zp,M,N,F.zero,B,ldb,Bp);
121        else{
122            fconvert_trans_rns(Zp,M,N,F.zero,B,ldb,Bp);
123        }
124
125        // reduce it modulo p
126        freduce (F, M, N, B, ldb);
127        // scale it with alpha
128        if (!F.isOne(alpha))
129            fscalin(F,M,N,alpha,B,ldb);
130
131#ifdef BENCH_PERF_TRSM_MP
132        chrono.stop();
133        t_rec+=chrono.usertime();
134        cout<<"FTRSM RNS PERF:"<<endl;
135        cout<<"  ***      init  : "<<t_init<<endl;
136        cout<<"  ***  rns  mod  : "<<t_mod<<endl;
137        cout<<"  ***  rns trsm  : "<<t_trsm<<" ( igemm="<<Zp.t_igemm<<" scal="<<Zp.t_scal<<" modp="<<Zp.t_modp<<endl;;
138        cout<<"  ***  rns  rec  : "<<t_rec<<endl;
139#endif
140
141        FFLAS::fflas_delete(Ap);
142        FFLAS::fflas_delete(Bp);
143    }
144
145    /*  bb: do not use CBLAS_ORDER, or make it compatible with MKL */
146
147    inline void cblas_imptrsm(const enum FFLAS_ORDER Order,
148                              const enum FFLAS_SIDE Side,
149                              const enum FFLAS_UPLO Uplo,
150                              const enum FFLAS_TRANSPOSE TransA,
151                              const enum FFLAS_DIAG Diag,
152                              const int M, const int N, const FFPACK::rns_double_elt alpha,
153                              FFPACK::rns_double_elt_cstptr A, const int lda,
154                              FFPACK::rns_double_elt_ptr B, const int ldb) {}
155
156#ifndef DOXYGEN_SHOULD_SKIP_THIS
157    namespace Protected {
158
159        template<>
160        inline size_t TRSMBound (const FFPACK::RNSIntegerMod<FFPACK::rns_double> &F)
161        {
162            return 1;
163        }
164
165        template <>
166        inline size_t DotProdBoundClassic (const FFPACK::RNSIntegerMod<FFPACK::rns_double>& F,
167                                           const FFPACK::rns_double_elt& beta)
168        {
169            Givaro::Integer p,b,M;
170            F.cardinality(p);
171            p--;
172            F.convert(b,beta);
173            M=F.rns()._M;
174            uint64_t kmax= (M-b*p)/(p*p);
175            return  (size_t)std::max(uint64_t(1),kmax);
176        }
177
178#ifndef __FTRSM_MP_FAST
179#define __FFLAS_MULTIPRECISION
180
181#define __FFLAS__LEFT
182#define __FFLAS__UP
183#define __FFLAS__NOTRANSPOSE
184#define __FFLAS__NONUNIT
185#include "fflas_ftrsm_src.inl"
186#undef __FFLAS__LEFT
187#undef __FFLAS__UP
188#undef __FFLAS__NOTRANSPOSE
189#undef __FFLAS__NONUNIT
190
191
192
193#define __FFLAS__LEFT
194#define __FFLAS__UP
195#define __FFLAS__NOTRANSPOSE
196#define __FFLAS__UNIT
197#include "fflas_ftrsm_src.inl"
198#undef __FFLAS__LEFT
199#undef __FFLAS__UP
200#undef __FFLAS__NOTRANSPOSE
201#undef __FFLAS__UNIT
202
203#define __FFLAS__LEFT
204#define __FFLAS__UP
205#define __FFLAS__TRANSPOSE
206#define __FFLAS__NONUNIT
207#include "fflas_ftrsm_src.inl"
208#undef __FFLAS__LEFT
209#undef __FFLAS__UP
210#undef __FFLAS__TRANSPOSE
211#undef __FFLAS__NONUNIT
212
213#define __FFLAS__LEFT
214#define __FFLAS__UP
215#define __FFLAS__TRANSPOSE
216#define __FFLAS__UNIT
217#include "fflas_ftrsm_src.inl"
218#undef __FFLAS__LEFT
219#undef __FFLAS__UP
220#undef __FFLAS__TRANSPOSE
221#undef __FFLAS__UNIT
222
223
224#define __FFLAS__LEFT
225#define __FFLAS__LOW
226#define __FFLAS__NOTRANSPOSE
227#define __FFLAS__NONUNIT
228#include "fflas_ftrsm_src.inl"
229#undef __FFLAS__LEFT
230#undef __FFLAS__LOW
231#undef __FFLAS__NOTRANSPOSE
232#undef __FFLAS__NONUNIT
233
234#define __FFLAS__LEFT
235#define __FFLAS__LOW
236#define __FFLAS__NOTRANSPOSE
237#define __FFLAS__UNIT
238#include "fflas_ftrsm_src.inl"
239#undef __FFLAS__LEFT
240#undef __FFLAS__LOW
241#undef __FFLAS__NOTRANSPOSE
242#undef __FFLAS__UNIT
243
244#define __FFLAS__LEFT
245#define __FFLAS__LOW
246#define __FFLAS__TRANSPOSE
247#define __FFLAS__NONUNIT
248#include "fflas_ftrsm_src.inl"
249#undef __FFLAS__LEFT
250#undef __FFLAS__LOW
251#undef __FFLAS__TRANSPOSE
252#undef __FFLAS__NONUNIT
253
254#define __FFLAS__LEFT
255#define __FFLAS__LOW
256#define __FFLAS__TRANSPOSE
257#define __FFLAS__UNIT
258#include "fflas_ftrsm_src.inl"
259#undef __FFLAS__LEFT
260#undef __FFLAS__LOW
261#undef __FFLAS__TRANSPOSE
262#undef __FFLAS__UNIT
263
264#define __FFLAS__RIGHT
265#define __FFLAS__UP
266#define __FFLAS__NOTRANSPOSE
267#define __FFLAS__NONUNIT
268#include "fflas_ftrsm_src.inl"
269#undef __FFLAS__RIGHT
270#undef __FFLAS__UP
271#undef __FFLAS__NOTRANSPOSE
272#undef __FFLAS__NONUNIT
273
274#define __FFLAS__RIGHT
275#define __FFLAS__UP
276#define __FFLAS__NOTRANSPOSE
277#define __FFLAS__UNIT
278#include "fflas_ftrsm_src.inl"
279#undef __FFLAS__RIGHT
280#undef __FFLAS__UP
281#undef __FFLAS__NOTRANSPOSE
282#undef __FFLAS__UNIT
283
284#define __FFLAS__RIGHT
285#define __FFLAS__UP
286#define __FFLAS__TRANSPOSE
287#define __FFLAS__NONUNIT
288#include "fflas_ftrsm_src.inl"
289#undef __FFLAS__RIGHT
290#undef __FFLAS__UP
291#undef __FFLAS__TRANSPOSE
292#undef __FFLAS__NONUNIT
293
294#define __FFLAS__RIGHT
295#define __FFLAS__UP
296#define __FFLAS__TRANSPOSE
297#define __FFLAS__UNIT
298#include "fflas_ftrsm_src.inl"
299#undef __FFLAS__RIGHT
300#undef __FFLAS__UP
301#undef __FFLAS__TRANSPOSE
302#undef __FFLAS__UNIT
303
304
305#define __FFLAS__RIGHT
306#define __FFLAS__LOW
307#define __FFLAS__NOTRANSPOSE
308#define __FFLAS__NONUNIT
309#include "fflas_ftrsm_src.inl"
310#undef __FFLAS__RIGHT
311#undef __FFLAS__LOW
312#undef __FFLAS__NOTRANSPOSE
313#undef __FFLAS__NONUNIT
314
315#define __FFLAS__RIGHT
316#define __FFLAS__LOW
317#define __FFLAS__NOTRANSPOSE
318#define __FFLAS__UNIT
319#include "fflas_ftrsm_src.inl"
320#undef __FFLAS__RIGHT
321#undef __FFLAS__LOW
322#undef __FFLAS__NOTRANSPOSE
323#undef __FFLAS__UNIT
324
325#define __FFLAS__RIGHT
326#define __FFLAS__LOW
327#define __FFLAS__TRANSPOSE
328#define __FFLAS__NONUNIT
329#include "fflas_ftrsm_src.inl"
330#undef __FFLAS__RIGHT
331#undef __FFLAS__LOW
332#undef __FFLAS__TRANSPOSE
333#undef __FFLAS__NONUNIT
334
335#define __FFLAS__RIGHT
336#define __FFLAS__LOW
337#define __FFLAS__TRANSPOSE
338#define __FFLAS__UNIT
339#include "fflas_ftrsm_src.inl"
340#undef __FFLAS__RIGHT
341#undef __FFLAS__LOW
342#undef __FFLAS__TRANSPOSE
343#undef __FFLAS__UNIT
344#endif // #ifdef __FTRSM_MP_FAST
345
346    } // end of namespace protected
347#endif // #ifndef DOXYGEN_SHOULD_SKIP_THIS
348} // END OF NAMESPACE FFLAS
349
350
351
352
353#endif
354
355/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
356// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
357