1/*
2 * Copyright (C) 2014 the FFLAS-FFPACK group
3 *
4 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr>
5 *
6 *
7 * ========LICENCE========
8 * This file is part of the library FFLAS-FFPACK.
9 *
10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
11 * it under the terms of the  GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 * ========LICENCE========
24 *.
25 */
26
27#ifndef __FFPACK_pluq_mp_INL
28#define __FFPACK_pluq_mp_INL
29
30#ifdef BENCH_PERF_LQUP_MP
31#define BENCH_PERF_FGEMM_MP
32#endif
33#include "fflas-ffpack/field/rns-integer-mod.h"
34#include "fflas-ffpack/field/rns-integer.h"
35#include "fflas-ffpack/fflas-ffpack.h"
36
37#include "givaro/givinteger.h"
38#include "givaro/modular-integer.h"
39namespace FFPACK {
40
41    template <class Cut, class Param>
42    inline size_t
43    PLUQ (const Givaro::Modular<Givaro::Integer>& F,
44          const FFLAS::FFLAS_DIAG Diag,
45          const size_t M, const size_t N,
46          typename Givaro::Integer* A, const size_t lda,
47          size_t*P, size_t *Q, size_t BCThreshold, FFLAS::ParSeqHelper::Parallel<Cut,Param>& PSHelper)
48    {
49
50        const size_t K = std::max(M,N);
51        if (K) {
52#ifdef BENCH_PERF_LQUP_MP
53        double t_init=0, t_lqup=0, t_mod=0, t_rec=0;
54        FFLAS::Timer chrono;
55        chrono.start();
56#endif
57        Givaro::Integer p;
58        F.cardinality(p);
59        size_t logp=p.bitsize();
60
61        // compute bit size of feasible prime
62        size_t _k=std::max(K,logp/20), lk=0;
63        while ( _k ) {_k>>=1; ++lk;}
64        size_t prime_bitsize= (53-lk)>>1;
65
66        // construct rns basis
67        Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K);
68        uint64_t n_pr =uint64_t(ceil(double(maxC.bitsize())/double(prime_bitsize)));
69        maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*n_pr;
70
71        FFPACK::rns_double RNS(maxC, prime_bitsize, true);
72        FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS);
73#ifdef BENCH_PERF_LQUP_MP
74        chrono.stop();
75        t_init+=chrono.usertime();
76        chrono.clear();chrono.start();
77#endif
78        // compute A in RNS
79        FFPACK::rns_double::Element_ptr Ap;
80        Ap = FFLAS::fflas_new(Zp,M,N);
81        FFLAS::finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),A,lda,Ap);
82
83
84#ifdef BENCH_PERF_LQUP_MP
85        chrono.stop();
86        t_mod+=chrono.usertime();
87        chrono.clear();chrono.start();
88#endif
89        // call lqup in rns
90        size_t R=FFPACK::PLUQ(Zp, Diag, M, N, Ap, N, P, Q, BCThreshold, PSHelper);
91#ifdef BENCH_PERF_LQUP_MP
92        chrono.stop();
93        t_lqup+=chrono.usertime();
94        chrono.clear();chrono.start();
95#endif
96        //Zp.write(std::cout,*Ap);
97        // reconstruct the result
98        FFLAS::fconvert_rns(Zp,M,N,F.zero,A,lda,Ap);
99#ifdef BENCH_PERF_LQUP_MP
100        chrono.stop();
101        t_rec+=chrono.usertime();
102        chrono.clear();chrono.start();
103#endif
104        // reduce it modulo p
105        FFLAS::freduce (F,M,N,A,lda);
106        //F.write(std::cout,*A);
107
108#ifdef BENCH_PERF_LQUP_MP
109        chrono.stop();
110        //t_rec+=chrono.usertime();
111        std::cout<<"PLUQ RNS PERF:"<<std::endl;
112        std::cout<<"  ---  RNS basis size: "<<Zp.size() <<std::endl;
113        std::cout<<"  ***      init  : "<<t_init<<std::endl;
114        std::cout<<"  ***  rns  mod  : "<<t_mod<<std::endl;
115        std::cout<<"  ***  rns lqup  : "<<t_lqup<<" ( igemm="<<Zp.t_igemm<<" ftrsm="<<Zp.t_trsm<<" scal="<<Zp.t_scal
116        <<" modp="<<Zp.t_modp<<std::endl;
117        std::cout<<"  ***  rns  rec  : "<<t_rec<<std::endl;
118        std::cout<<"  ***       mod  : "<<chrono.usertime()<<std::endl;
119
120#endif
121        FFLAS::fflas_delete(Ap);
122        return R;
123        } else {
124            return 0;
125        }
126    }
127
128} // namespace FFPACK
129
130#endif
131
132/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
133// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
134