1 /****************************************************************************/
2 /* This file is part of FreeFEM. */
3 /* */
4 /* FreeFEM is free software: you can redistribute it and/or modify */
5 /* it under the terms of the GNU Lesser General Public License as */
6 /* published by the Free Software Foundation, either version 3 of */
7 /* the License, or (at your option) any later version. */
8 /* */
9 /* FreeFEM is distributed in the hope that it will be useful, */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
12 /* GNU Lesser General Public License for more details. */
13 /* */
14 /* You should have received a copy of the GNU Lesser General Public License */
15 /* along with FreeFEM. If not, see <http://www.gnu.org/licenses/>. */
16 /****************************************************************************/
17 // SUMMARY : ...
18 // LICENSE : LGPLv3
19 // ORG : LJLL Universite Pierre et Marie Curie, Paris, FRANCE
20 // AUTHORS : Frederic Hecht
21 // E-MAIL : frederic.hecht@sorbonne-unviersite.fr
22
23 // *INDENT-OFF* //
24 //ff-c++-LIBRARY-dep: mumps parmetis metis [ptscotch scotch] scalapack blas mpifc fc mpi pthread
25 //ff-c++-cpp-dep:
26 // *INDENT-ON* //
27
28 // F. Hecht december 2011
29 // ----------------------------
30 // file to add MUMPS sequentiel interface for sparce linear solver with dynamic load.
31 #include <mpi.h>
32 #ifdef _WIN32
33 __declspec(dllexport) int toto;
34 MPI_Fint* _imp__MPI_F_STATUS_IGNORE;
35 MPI_Fint* _imp__MPI_F_STATUSES_IGNORE;
36 //__declspec(dllexport) void __guard_check_icall_fptr(unsigned long ptr) { }
37 #endif
38 #include <iostream>
39 using namespace std;
40
41 #include "ff++.hpp"
42
43
44 #include <dmumps_c.h>
45 #include <zmumps_c.h>
46
47 const int JOB_INIT = -1;
48 const int JOB_END = -2;
49 const int JOB_ANA = 1;
50 const int JOB_FAC = 2;
51 const int JOB_ANA_FAC = 4;
52 const int JOB_SOLVE = 3;
53 const int USE_COMM_WORLD = -987654;
54
55 template<typename RR> struct MUMPS_STRUC_TRAIT {typedef void MUMPS; typedef void R; };
56 template<> struct MUMPS_STRUC_TRAIT<double> {typedef DMUMPS_STRUC_C MUMPS; typedef double R;};
57 template<> struct MUMPS_STRUC_TRAIT<Complex> {typedef ZMUMPS_STRUC_C MUMPS; typedef ZMUMPS_COMPLEX R;};
mumps_c(DMUMPS_STRUC_C * id)58 void mumps_c(DMUMPS_STRUC_C *id) { dmumps_c(id);}
mumps_c(ZMUMPS_STRUC_C * id)59 void mumps_c(ZMUMPS_STRUC_C *id) { zmumps_c(id);}
60
TYPEMPI_TYPE61 template<class T> struct MPI_TYPE {static MPI_Datatype TYPE(){return MPI_BYTE;}};;
TYPEMPI_TYPE62 template<> struct MPI_TYPE<long> {static MPI_Datatype TYPE(){return MPI_LONG;}};
TYPEMPI_TYPE63 template<> struct MPI_TYPE<int> {static MPI_Datatype TYPE(){return MPI_INT;}};
TYPEMPI_TYPE64 template<> struct MPI_TYPE<double> {static MPI_Datatype TYPE(){return MPI_DOUBLE;}};
TYPEMPI_TYPE65 template<> struct MPI_TYPE<char> {static MPI_Datatype TYPE(){return MPI_BYTE;}};
TYPEMPI_TYPE66 template<> struct MPI_TYPE<Complex> {static MPI_Datatype TYPE(){return MPI_DOUBLE_COMPLEX;}};
67
68
69 static std::string analysis[] = {"AMD", "", "AMF", "SCOTCH", "PORD", "METIS", "QAMD", "automatic sequential", "automatic parallel", "PT-SCOTCH", "ParMetis"};
70
71 //template<typename R>
72 template<class R=double>
73 class SolveMUMPS_mpi: public VirtualSolver<int,R>
74 {
75 public:
76 static const int orTypeSol;
77 typedef HashMatrix<int,R> HMat;
78 typedef R K; //
79 HMat &A;
80
81
82 // typedef double R;
83 long verb;
84 double eps;
85 double tgv;
86 int cn,cs;
87 typedef typename MUMPS_STRUC_TRAIT<R>::R MR;
88 mutable typename MUMPS_STRUC_TRAIT<R>::MUMPS id;
89 KN<double> *rinfog;
90 KN<long> *infog;
91 mutable unsigned char strategy;
92 bool distributed;
93 MPI_Comm comm;
94 int mpirank;
95 int matrank;
96 // int distributed;
97
ICNTL(int i) const98 int& ICNTL (int i) const {return id.icntl[i - 1];}
CNTL(int i) const99 double& CNTL (int i) const {return id.cntl[i - 1];}
INFO(int i) const100 int& INFO (int i) const {return id.info[i - 1];}
RINFO(int i) const101 double& RINFO (int i) const {return id.rinfo[i - 1];}
INFOG(int i) const102 int& INFOG (int i) const {return id.infog[i - 1];}
RINFOG(int i) const103 double& RINFOG (int i) const {return id.rinfog[i - 1];}
104
SetVerb() const105 void SetVerb () const {
106 ICNTL(1) = 6;// output stream for error messages.
107 ICNTL(2) = 6;// stream for diagnostic printing, statistics, and warning messages.
108 ICNTL(3) = 6;// output stream global information, collected on the host.
109 ICNTL(4) = min(max(verb-2,1L),4L); // the level of printing for error, warning, and diag
110 if(verb ==0 )ICNTL(4) =0;
111 ICNTL(11)=0; // noerroranalysisisperformed(nostatistics).
112 if( id.job ==JOB_SOLVE && verb >99)
113 { //computes statistics related to an error analysis of the linear system
114 if( verb > 999) ICNTL(11)=1; // All Stat (veryexpensive)
115 else ICNTL(11)=2;// compute main statistics
116 }
117
118
119 }
Clean()120 void Clean ()
121 {
122 delete [] id.irn;
123 delete [] id.jcn;
124 delete [] id.a;
125 id.irn=0;
126 id.jcn=0;
127 id.a =0;
128 }
to_mumps_mat()129 void to_mumps_mat()
130 {
131 Clean ();
132
133 id.nrhs = 0;//
134 int n = A.n;
135 int nz = A.nnz;
136 ffassert(A.n == A.m);
137 if( distributed || (mpirank == matrank) )
138 {
139 int *irn = new int[nz];
140 int *jcn = new int[nz];
141 R *a = new R[nz];
142 A.COO();
143
144 for (int k = 0; k < nz; ++k) {
145 {
146 irn[k] = A.i[k]+1;
147 jcn[k] = A.j[k] + 1;
148 a[k] = A.aij[k];
149 }
150 }
151
152 id.n = n;
153
154 if(!distributed)
155 {
156 if(mpirank == matrank)
157 {
158 id.nz = nz;
159 id.irn = irn;
160 id.jcn = jcn;
161 id.a = (MR *)(void *)a;
162 }
163 else
164 { // no matrix
165 id.nz=0;
166 id.a =0;
167 id.irn = 0;;
168 id.jcn = 0;
169
170 }
171
172 }
173 else
174 {
175 id.nz_loc = nz;
176 id.irn_loc = irn;
177 id.jcn_loc = jcn;
178 id.a_loc = (MR *)(void *)a;
179
180 }
181 id.rhs = 0;
182 ffassert( A.half == id.sym );//
183 ICNTL(5) = 0; // input matrix type
184 ICNTL(7) = 7; // NUMBERING ...
185
186 ICNTL(9) = 1; // 1: A x = b, !1 : tA x = b during slove phase
187 ICNTL(18) = 0;
188 if(strategy > 0 && strategy < 9 && strategy != 2)
189 {
190 ICNTL(28) = 1; // 1: sequential analysis
191 ICNTL(7) = strategy - 1; // 0: AMD
192 }
193 // 1:
194 // 2: AMF
195 // 3: SCOTCH
196 // 4: PORD
197 // 5: METIS
198 // 6: QAMD
199 // 7: automatic
200 else
201 {
202 ICNTL(28) = 1;
203 ICNTL(7) = 7;
204 }
205 if(strategy > 8 && strategy < 12)
206 {
207 ICNTL(28) = 2; // 2: parallel analysis
208 ICNTL(29) = strategy - 9; // 0: automatic
209 } // 1: PT-STOCH
210 // 2: ParMetis
211 ICNTL(9) = 1;
212 ICNTL(11) = 0; // verbose level
213 ICNTL(18) = distributed ? 3: 0; // centralized matrix input if !distributed
214 ICNTL(20) = 0; // dense RHS
215 ICNTL(14) = 30;
216 }
217 }
Check(const char * msg="mumps_mpi")218 void Check (const char *msg = "mumps_mpi")
219 {
220 if (INFO(1) != 0) {
221 cout << " Erreur Mumps mpi: number " << INFO(1) << endl;
222 cout << " Fatal Erreur " << msg << endl;
223 Clean ();
224 id.job = JOB_END;
225 mumps_c(&id); /* Terminate instance */
226 ErrorExec(msg, INFO(1));
227 }
228 }
CopyInfo()229 void CopyInfo()
230 {
231 if (rinfog) {
232 // copy rinfog
233 if (rinfog->N() < 40) {rinfog->resize(40);}
234
235 for (int i = 0; i < 40; ++i) {
236 (*rinfog)[i] = RINFOG(i + 1);
237 }
238 }
239
240 if (infog) {
241 // copy ginfo
242 if (infog->N() < 40) {infog->resize(40);}
243
244 for (int i = 0; i < 40; ++i) {
245 (*infog)[i] = INFOG(i + 1);
246 }
247 }
248 }
SolveMUMPS_mpi(HMat & AA,const Data_Sparse_Solver & ds,Stack stack)249 SolveMUMPS_mpi (HMat &AA, const Data_Sparse_Solver & ds,Stack stack )
250 : A(AA), verb(ds.verb),
251 eps(ds.epsilon),
252 tgv(ds.tgv),cn(0),cs(0),
253 rinfog(ds.rinfo), infog(ds.info),
254 matrank(ds.master),distributed(ds.master<0),
255 strategy(ds.strategy)
256 {
257
258 if(ds.commworld)
259 MPI_Comm_dup(*((MPI_Comm*)ds.commworld), &comm);
260 else
261 MPI_Comm_dup(MPI_COMM_WORLD, &comm);
262
263 MPI_Comm_rank(comm, &mpirank);
264 int master = mpirank==matrank;
265 int myid = 0;
266 MPI_Comm_rank(MPI_COMM_WORLD, &myid);
267
268 id.irn=0;
269 id.jcn=0;
270 id.a =0;
271
272 id.job = JOB_INIT;
273 id.par = 1;
274 id.sym = A.half;
275 id.comm_fortran = MPI_Comm_c2f(comm);
276 SetVerb();
277 mumps_c(&id);
278
279
280 Check("MUMPS_mpi build/init");
281 if (verbosity > 3 && master) {
282 cout << " -- MUMPS n= " << id.n << ", peak Mem: " << INFOG(22) << " Mb" << " sym: " << id.sym << endl;
283 }
284
285
286 }
287
288
289
~SolveMUMPS_mpi()290 ~SolveMUMPS_mpi () {
291 Clean ();
292 id.job = JOB_END;
293 SetVerb () ;
294 mumps_c(&id); /* Terminate instance */
295 /*int ierr = */
296 MPI_Comm_free(&comm);
297 }
298
299
dosolver(K * x,K * b,int N,int trans)300 void dosolver(K *x,K*b,int N,int trans)
301 {
302 size_t nN=id.n*N;
303 if (verbosity > 1 && mpirank==0) {
304 cout << " -- MUMPS solve, peak Mem : " << INFOG(22) << " Mb, n = "
305 << id.n << " sym =" << id.sym <<" trans = " << trans << endl;
306 }
307 ICNTL(9) = trans == 0; // 1: A x = b, !1 : tA x = b during slove phase
308 id.nrhs = N;
309 // x = b;
310 if(distributed)
311 {
312 MPI_Reduce( (void *) b,(void *) x , nN , MPI_TYPE<R>::TYPE(),MPI_SUM,0,comm);
313 }
314 else if(mpirank==0) std::copy(b,b+nN,x);
315 id.rhs = (MR *)(void *)(R *)x;
316 id.job = JOB_SOLVE; // performs the analysis. and performs the factorization.
317 SetVerb();
318 mumps_c(&id);
319 Check("MUMPS_mpi dosolver");
320 if(distributed) // send the solution ...
321 MPI_Bcast(reinterpret_cast<void*> (x),nN, MPI_TYPE<R>::TYPE(), 0,comm);
322
323
324 if (verb > 9 && mpirank==0) {
325
326 for(int j=0; j<N; ++j)
327 {
328 KN_<R> B(b+j*id.n,id.n);
329 cout << j <<" b linfty " << B.linfty() << endl;
330 }
331 }
332
333 if (verb > 2) {
334
335 for(int j=0; j<N; ++j)
336 { KN_<R> B(x+j*id.n,id.n);
337 cout << " x " << j <<" linfty " << B.linfty() << endl;
338 }
339 }
340 CopyInfo();
341
342 }
343
fac_init()344 void fac_init(){
345 to_mumps_mat();
346 } // n, nzz fixe
fac_symbolic()347 void fac_symbolic(){
348 id.job = JOB_ANA;
349 SetVerb ();
350 mumps_c(&id);
351 Check("MUMPS_mpi Analyse");
352 CopyInfo();
353 }
fac_numeric()354 void fac_numeric(){
355 id.job = JOB_FAC;
356 SetVerb () ;
357 mumps_c(&id);
358 Check("MUMPS_mpi Factorize");
359 CopyInfo();
360 }
UpdateState()361 void UpdateState(){
362 if( A.GetReDoNumerics() ) cn++;
363 if( A.GetReDoSymbolic() ) cs++;
364 this->ChangeCodeState(A.n,cs,cn);
365 }
366
367 };
368
369 // 1 unsym , 2 herm, 4 sym, 8 pos , 16 nopos, 32 seq, 64 ompi, 128 mpi
370 template<> const int SolveMUMPS_mpi<double>::orTypeSol = 1|2|4|8|16|32;
371 template<> const int SolveMUMPS_mpi<std::complex<double>>::orTypeSol = 1|4|8|16|32;
372
Load_Init()373 static void Load_Init()
374 {
375 addsolver<SolveMUMPS_mpi<double>>("MUMPS",50,1);
376 addsolver<SolveMUMPS_mpi<Complex>>("MUMPS",50,1);
377 addsolver<SolveMUMPS_mpi<double>>("MUMPSMPI",50,1);
378 addsolver<SolveMUMPS_mpi<Complex>>("MUMPSMPI",50,1);
379 setptrstring(def_solver,"MUMPSMPI");
380 }
381 LOADFUNC(Load_Init)
382