1/*
2 * Copyright (C) 2014 the FFLAS-FFPACK group
3 *
4 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr>
5 *
6 *
7 * ========LICENCE========
8 * This file is part of the library FFLAS-FFPACK.
9 *
10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify
11 * it under the terms of the  GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23 * ========LICENCE========
24 *.
25 */
26
27#ifndef __FFPACK_ludivine_mp_INL
28#define __FFPACK_ludivine_mp_INL
29
30#include <givaro/modular-integer.h>
31#include <givaro/givinteger.h>
32
33#ifdef BENCH_PERF_LQUP_MP
34#define BENCH_PERF_FGEMM_MP
35#endif
36
37#include "fflas-ffpack/field/rns-integer-mod.h"
38#include "fflas-ffpack/field/rns-integer.h"
39#include "fflas-ffpack/fflas/fflas.h"
40#include "fflas-ffpack/ffpack/ffpack_ludivine.inl"
41
42namespace FFPACK {
43
44    template <>
45    inline size_t
46    LUdivine (const Givaro::Modular<Givaro::Integer>& F,
47              const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans,
48              const size_t M, const size_t N,
49              typename Givaro::Integer* A, const size_t lda,
50              size_t*P, size_t *Q,
51              const FFPACK::FFPACK_LU_TAG LuTag,
52              const size_t cutoff
53             )
54    {
55        const size_t K = std::max(M,N);
56        if (K) {
57#ifdef BENCH_PERF_LQUP_MP
58        double t_init=0, t_lqup=0, t_mod=0, t_rec=0;
59        FFLAS::Timer chrono;
60        chrono.start();
61#endif
62        Givaro::Integer p;
63        F.cardinality(p);
64        size_t logp=p.bitsize();
65
66        // compute bit size of feasible prime
67        size_t _k=std::max(K+1,logp/20), lk=0;
68        while ( _k ) {_k>>=1; ++lk;}
69        size_t prime_bitsize= (53-lk)>>1;
70
71        // construct rns basis
72        Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K);
73        uint64_t n_pr =uint64_t(ceil(double(maxC.bitsize())/double(prime_bitsize)));
74        maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*n_pr;
75
76
77        FFPACK::rns_double RNS(maxC, prime_bitsize, true);
78        FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS);
79#ifdef BENCH_PERF_LQUP_MP
80        chrono.stop();
81        t_init+=chrono.usertime();
82        chrono.clear();chrono.start();
83#endif
84        // compute A in RNS
85        FFPACK::rns_double::Element_ptr Ap;
86        Ap = FFLAS::fflas_new(Zp,M,N);
87        FFLAS::finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),A,lda,Ap);
88
89
90#ifdef BENCH_PERF_LQUP_MP
91        chrono.stop();
92        t_mod+=chrono.usertime();
93        chrono.clear();chrono.start();
94#endif
95
96        // call lqup in rns
97        size_t R=FFPACK::LUdivine(Zp, Diag, trans, M, N, Ap, N, P, Q, LuTag, cutoff);
98
99        //std::cout<<"LUDivine RNS done"<<std::endl;
100#ifdef BENCH_PERF_LQUP_MP
101        chrono.stop();
102        t_lqup+=chrono.usertime();
103        chrono.clear();chrono.start();
104#endif
105        // reconstruct the result
106        FFLAS::fconvert_rns(Zp,M,N,F.zero,A,lda,Ap);
107#ifdef BENCH_PERF_LQUP_MP
108        chrono.stop();
109        t_rec+=chrono.usertime();
110        chrono.clear();chrono.start();
111#endif
112        // reduce it modulo p
113        FFLAS::freduce (F,M,N,A,lda);
114
115#ifdef BENCH_PERF_LQUP_MP
116        chrono.stop();
117        //t_rec+=chrono.usertime();
118        std::cout<<"LUDIVINE RNS PERF:"<<std::endl;
119        std::cout<<"  ---  RNS basis size: "<<Zp.size() <<std::endl;
120        std::cout<<"  ***      init  : "<<t_init<<std::endl;
121        std::cout<<"  ***  rns  mod  : "<<t_mod<<std::endl;
122        std::cout<<"  ***  rns lqup  : "<<t_lqup<<" ( igemm="<<Zp.t_igemm<<" ftrsm="<<Zp.t_trsm<<" scal="<<Zp.t_scal
123        <<" modp="<<Zp.t_modp<<std::endl;
124        std::cout<<"  ***  rns  rec  : "<<t_rec<<std::endl;
125        std::cout<<"  ***       mod  : "<<chrono.usertime()<<std::endl;
126
127#endif
128        FFLAS::fflas_delete(Ap);
129        return R;
130        } else {
131            return 0;
132        }
133    }
134
135} // namespace FFPACK
136
137#endif
138/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
139// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
140