1/* 2 * Copyright (C) 2014 the FFLAS-FFPACK group 3 * 4 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr> 5 * 6 * 7 * ========LICENCE======== 8 * This file is part of the library FFLAS-FFPACK. 9 * 10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 * ========LICENCE======== 24 *. 25 */ 26 27#ifndef __FFPACK_ludivine_mp_INL 28#define __FFPACK_ludivine_mp_INL 29 30#include <givaro/modular-integer.h> 31#include <givaro/givinteger.h> 32 33#ifdef BENCH_PERF_LQUP_MP 34#define BENCH_PERF_FGEMM_MP 35#endif 36 37#include "fflas-ffpack/field/rns-integer-mod.h" 38#include "fflas-ffpack/field/rns-integer.h" 39#include "fflas-ffpack/fflas/fflas.h" 40#include "fflas-ffpack/ffpack/ffpack_ludivine.inl" 41 42namespace FFPACK { 43 44 template <> 45 inline size_t 46 LUdivine (const Givaro::Modular<Givaro::Integer>& F, 47 const FFLAS::FFLAS_DIAG Diag, const FFLAS::FFLAS_TRANSPOSE trans, 48 const size_t M, const size_t N, 49 typename Givaro::Integer* A, const size_t lda, 50 size_t*P, size_t *Q, 51 const FFPACK::FFPACK_LU_TAG LuTag, 52 const size_t cutoff 53 ) 54 { 55 const size_t K = std::max(M,N); 56 if (K) { 57#ifdef BENCH_PERF_LQUP_MP 58 double t_init=0, t_lqup=0, t_mod=0, t_rec=0; 59 FFLAS::Timer chrono; 60 chrono.start(); 61#endif 62 Givaro::Integer p; 63 F.cardinality(p); 64 size_t logp=p.bitsize(); 65 66 // compute bit size of feasible prime 67 size_t _k=std::max(K+1,logp/20), lk=0; 68 while ( _k ) {_k>>=1; ++lk;} 69 size_t prime_bitsize= (53-lk)>>1; 70 71 // construct rns basis 72 Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K); 73 uint64_t n_pr =uint64_t(ceil(double(maxC.bitsize())/double(prime_bitsize))); 74 maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*n_pr; 75 76 77 FFPACK::rns_double RNS(maxC, prime_bitsize, true); 78 FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS); 79#ifdef BENCH_PERF_LQUP_MP 80 chrono.stop(); 81 t_init+=chrono.usertime(); 82 chrono.clear();chrono.start(); 83#endif 84 // compute A in RNS 85 FFPACK::rns_double::Element_ptr Ap; 86 Ap = FFLAS::fflas_new(Zp,M,N); 87 FFLAS::finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),A,lda,Ap); 88 89 90#ifdef BENCH_PERF_LQUP_MP 91 chrono.stop(); 92 t_mod+=chrono.usertime(); 93 chrono.clear();chrono.start(); 94#endif 95 96 // call lqup in rns 97 size_t R=FFPACK::LUdivine(Zp, Diag, trans, M, N, Ap, N, P, Q, LuTag, cutoff); 98 99 //std::cout<<"LUDivine RNS done"<<std::endl; 100#ifdef BENCH_PERF_LQUP_MP 101 chrono.stop(); 102 t_lqup+=chrono.usertime(); 103 chrono.clear();chrono.start(); 104#endif 105 // reconstruct the result 106 FFLAS::fconvert_rns(Zp,M,N,F.zero,A,lda,Ap); 107#ifdef BENCH_PERF_LQUP_MP 108 chrono.stop(); 109 t_rec+=chrono.usertime(); 110 chrono.clear();chrono.start(); 111#endif 112 // reduce it modulo p 113 FFLAS::freduce (F,M,N,A,lda); 114 115#ifdef BENCH_PERF_LQUP_MP 116 chrono.stop(); 117 //t_rec+=chrono.usertime(); 118 std::cout<<"LUDIVINE RNS PERF:"<<std::endl; 119 std::cout<<" --- RNS basis size: "<<Zp.size() <<std::endl; 120 std::cout<<" *** init : "<<t_init<<std::endl; 121 std::cout<<" *** rns mod : "<<t_mod<<std::endl; 122 std::cout<<" *** rns lqup : "<<t_lqup<<" ( igemm="<<Zp.t_igemm<<" ftrsm="<<Zp.t_trsm<<" scal="<<Zp.t_scal 123 <<" modp="<<Zp.t_modp<<std::endl; 124 std::cout<<" *** rns rec : "<<t_rec<<std::endl; 125 std::cout<<" *** mod : "<<chrono.usertime()<<std::endl; 126 127#endif 128 FFLAS::fflas_delete(Ap); 129 return R; 130 } else { 131 return 0; 132 } 133 } 134 135} // namespace FFPACK 136 137#endif 138/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 139// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 140