1 /*
2  * Copyright (C) 2016 FFLAS-FFPACK group.
3  *
4  * Written by Clément Pernet <clement.pernet@imag.fr>
5  *
6  *
7  * ========LICENCE========
8  * This file is part of the library FFLAS-FFPACK.
9  *
10  * FFLAS-FFPACK is free software: you can redistribute it and/or modify
11  * it under the terms of the  GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
23  * ========LICENCE========
24  *
25  */
26 
27 
28 
29 #include "fflas-ffpack/fflas-ffpack-config.h"
30 #include "fflas-ffpack/utils/fflas_randommatrix.h"
31 #include <iostream>
32 #include <givaro/modular-balanced.h>
33 #include "fflas-ffpack/utils/timer.h"
34 #include "fflas-ffpack/ffpack/ffpack.h"
35 
36 
37 #ifdef __GIVARO_USE_OPENMP
38 typedef Givaro::OMPTimer TTimer;
39 #else
40 typedef Givaro::Timer TTimer;
41 #endif
42 
43 #include <ctime>
44 #define CUBE(x) ((x)*(x)*(x))
45 #define GFOPS(m,n,r,t) (2.0/3.0*CUBE(double(n)/1000.0) +2*m/1000.0*n/1000.0*double(r)/1000.0  - double(r)/1000.0*double(r)/1000.0*(m+n)/1000)/t
46 
main()47 int main () {
48     using namespace std;
49 
50     typedef Givaro::ModularBalanced<double> Field;
51     Field F(131071);
52     typedef Field::Element Element ;
53     size_t n=128, nmax=1000, prec=64, nbest=0, count=0;
54     TTimer chrono,tim;
55     bool bound=false;
56 
57     Element * A = FFLAS::fflas_new (F, nmax, nmax);
58     Element * B = FFLAS::fflas_new (F, nmax, nmax);
59     size_t lda = nmax;
60     size_t r;
61     size_t * P = new size_t[nmax];
62     size_t * Q = new size_t[nmax];
63     FFPACK::RandomMatrix (F, nmax, nmax, A,nmax);
64     FFLAS::fassign (F, nmax,nmax, A, nmax, B, nmax);
65     time_t result = std::time(NULL);
66     cerr << std::endl
67     << "---------------------------------------------------------------------"
68     << std::endl << std::asctime(std::localtime(&result))
69     << std::endl
70     << "Threshold for PLUQ base case" ;
71     F.write(cerr << " (using ") << ')' << endl << endl;
72 
73     cerr << "PLUQ:  n                   Base case                        Recursive 1 level" << std::endl;
74     cerr << "                    seconds            Gfops          seconds            Gfops" << std::endl;
75     do {
76         double BCTime, RecTime;
77         int iter=10;
78 
79         //warm up computation
80         r=FFPACK::PLUQ_basecaseCrout (F, FFLAS::FflasNonUnit, n, n, A, lda, P, Q);
81         FFLAS::fassign (F, n, n, B, lda, A, lda);
82         chrono.clear();tim.clear();
83         for (int i=0;i<iter;i++){
84             chrono.start();
85             r=FFPACK::PLUQ_basecaseCrout (F, FFLAS::FflasNonUnit, n, n, A, lda, P, Q);
86             chrono.stop();
87             tim+=chrono;
88             FFLAS::fassign (F, n, n, B, lda, A, lda);
89         }
90         BCTime = tim.realtime()/iter;
91 
92         tim.clear();chrono.clear();
93         for (int i=0;i<iter;i++){
94             chrono.start();
95             r=FFPACK::_PLUQ (F, FFLAS::FflasNonUnit, n, n, A, lda, P, Q, n);
96             chrono.stop();
97             tim+=chrono;
98             FFLAS::fassign (F, n, n, B, lda, A, lda);
99         }
100         //FFLAS::fflas_delete(A);
101         //FFLAS::fflas_delete(B);
102         RecTime = tim.realtime()/iter;
103 
104         cerr << "      ";
105         cerr.width(4);
106         cerr << n;
107         cerr << "  ";
108         cerr.width(15);
109         cerr << BCTime;
110         cerr << "  ";
111         cerr.width(15);
112         cerr << GFOPS(n,n,r, BCTime) << "  ";
113         cerr.width(15);
114         cerr << RecTime;
115         cerr << "  ";
116         cerr.width(15);
117         cerr << GFOPS(n,n,r, RecTime) << endl;
118 
119         if (BCTime > RecTime){
120             count++;
121             if (count > 1){
122                 nbest = n;
123                 bound = true;
124                 prec = prec >> 1;
125                 n -= prec;
126             }
127         }
128         else{
129             count=0;
130             if (bound)
131                 prec=prec>>1;
132             n+=prec;
133         }
134     } while ((prec > 4 ) && (n < nmax));
135 
136     cerr<<endl;
137     if (nbest != 0 ) {
138         cout << "#ifndef __FFLASFFPACK_PLUQ_THRESHOLD"  << endl;
139         cout << "#define __FFLASFFPACK_PLUQ_THRESHOLD" << ' ' <<  nbest << endl;
140         cerr << "defined __FFLASFFPACK_PLUQ_THRESHOLD to " << nbest << "" << std::endl;
141         std::cout << "#endif" << endl  << endl;
142     }
143     FFLAS::fflas_delete(A);
144     FFLAS::fflas_delete(B);
145 
146     return 0;
147 }
148 /* -*- mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
149 // vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
150