1/* 2 * Copyright (C) 2014 the FFLAS-FFPACK group 3 * 4 * Written by Pascal Giorgi <pascal.giorgi@lirmm.fr> 5 * 6 * 7 * ========LICENCE======== 8 * This file is part of the library FFLAS-FFPACK. 9 * 10 * FFLAS-FFPACK is free software: you can redistribute it and/or modify 11 * it under the terms of the GNU Lesser General Public 12 * License as published by the Free Software Foundation; either 13 * version 2.1 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with this library; if not, write to the Free Software 22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 23 * ========LICENCE======== 24 *. 25 */ 26 27#ifndef __FFPACK_pluq_mp_INL 28#define __FFPACK_pluq_mp_INL 29 30#ifdef BENCH_PERF_LQUP_MP 31#define BENCH_PERF_FGEMM_MP 32#endif 33#include "fflas-ffpack/field/rns-integer-mod.h" 34#include "fflas-ffpack/field/rns-integer.h" 35#include "fflas-ffpack/fflas-ffpack.h" 36 37#include "givaro/givinteger.h" 38#include "givaro/modular-integer.h" 39namespace FFPACK { 40 41 template <class Cut, class Param> 42 inline size_t 43 PLUQ (const Givaro::Modular<Givaro::Integer>& F, 44 const FFLAS::FFLAS_DIAG Diag, 45 const size_t M, const size_t N, 46 typename Givaro::Integer* A, const size_t lda, 47 size_t*P, size_t *Q, size_t BCThreshold, FFLAS::ParSeqHelper::Parallel<Cut,Param>& PSHelper) 48 { 49 50 const size_t K = std::max(M,N); 51 if (K) { 52#ifdef BENCH_PERF_LQUP_MP 53 double t_init=0, t_lqup=0, t_mod=0, t_rec=0; 54 FFLAS::Timer chrono; 55 chrono.start(); 56#endif 57 Givaro::Integer p; 58 F.cardinality(p); 59 size_t logp=p.bitsize(); 60 61 // compute bit size of feasible prime 62 size_t _k=std::max(K,logp/20), lk=0; 63 while ( _k ) {_k>>=1; ++lk;} 64 size_t prime_bitsize= (53-lk)>>1; 65 66 // construct rns basis 67 Givaro::Integer maxC= (p-1)*(p-1)*(p-1)*uint64_t(K); 68 uint64_t n_pr =uint64_t(ceil(double(maxC.bitsize())/double(prime_bitsize))); 69 maxC=(p-1)*(p-1)*uint64_t(K)*(1<<prime_bitsize)*n_pr; 70 71 FFPACK::rns_double RNS(maxC, prime_bitsize, true); 72 FFPACK::RNSIntegerMod<FFPACK::rns_double> Zp(p, RNS); 73#ifdef BENCH_PERF_LQUP_MP 74 chrono.stop(); 75 t_init+=chrono.usertime(); 76 chrono.clear();chrono.start(); 77#endif 78 // compute A in RNS 79 FFPACK::rns_double::Element_ptr Ap; 80 Ap = FFLAS::fflas_new(Zp,M,N); 81 FFLAS::finit_rns(Zp,M,N,(logp/16)+(logp%16?1:0),A,lda,Ap); 82 83 84#ifdef BENCH_PERF_LQUP_MP 85 chrono.stop(); 86 t_mod+=chrono.usertime(); 87 chrono.clear();chrono.start(); 88#endif 89 // call lqup in rns 90 size_t R=FFPACK::PLUQ(Zp, Diag, M, N, Ap, N, P, Q, BCThreshold, PSHelper); 91#ifdef BENCH_PERF_LQUP_MP 92 chrono.stop(); 93 t_lqup+=chrono.usertime(); 94 chrono.clear();chrono.start(); 95#endif 96 //Zp.write(std::cout,*Ap); 97 // reconstruct the result 98 FFLAS::fconvert_rns(Zp,M,N,F.zero,A,lda,Ap); 99#ifdef BENCH_PERF_LQUP_MP 100 chrono.stop(); 101 t_rec+=chrono.usertime(); 102 chrono.clear();chrono.start(); 103#endif 104 // reduce it modulo p 105 FFLAS::freduce (F,M,N,A,lda); 106 //F.write(std::cout,*A); 107 108#ifdef BENCH_PERF_LQUP_MP 109 chrono.stop(); 110 //t_rec+=chrono.usertime(); 111 std::cout<<"PLUQ RNS PERF:"<<std::endl; 112 std::cout<<" --- RNS basis size: "<<Zp.size() <<std::endl; 113 std::cout<<" *** init : "<<t_init<<std::endl; 114 std::cout<<" *** rns mod : "<<t_mod<<std::endl; 115 std::cout<<" *** rns lqup : "<<t_lqup<<" ( igemm="<<Zp.t_igemm<<" ftrsm="<<Zp.t_trsm<<" scal="<<Zp.t_scal 116 <<" modp="<<Zp.t_modp<<std::endl; 117 std::cout<<" *** rns rec : "<<t_rec<<std::endl; 118 std::cout<<" *** mod : "<<chrono.usertime()<<std::endl; 119 120#endif 121 FFLAS::fflas_delete(Ap); 122 return R; 123 } else { 124 return 0; 125 } 126 } 127 128} // namespace FFPACK 129 130#endif 131 132/* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 133// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s 134