1 /****************************************************************************/
2 /* This file is part of FreeFEM.                                            */
3 /*                                                                          */
4 /* FreeFEM is free software: you can redistribute it and/or modify          */
5 /* it under the terms of the GNU Lesser General Public License as           */
6 /* published by the Free Software Foundation, either version 3 of           */
7 /* the License, or (at your option) any later version.                      */
8 /*                                                                          */
9 /* FreeFEM is distributed in the hope that it will be useful,               */
10 /* but WITHOUT ANY WARRANTY; without even the implied warranty of           */
11 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
12 /* GNU Lesser General Public License for more details.                      */
13 /*                                                                          */
14 /* You should have received a copy of the GNU Lesser General Public License */
15 /* along with FreeFEM. If not, see <http://www.gnu.org/licenses/>.          */
16 /****************************************************************************/
17 // SUMMARY : ...
18 // LICENSE : LGPLv3
19 // ORG     : LJLL Universite Pierre et Marie Curie, Paris, FRANCE
20 // AUTHORS : Frederic Hecht
21 // E-MAIL  : frederic.hecht@sorbonne-unviersite.fr
22 
23 // *INDENT-OFF* //
24 //ff-c++-LIBRARY-dep:  mumps parmetis metis [ptscotch scotch]  scalapack blas  mpifc  fc mpi  pthread
25 //ff-c++-cpp-dep:
26 // *INDENT-ON* //
27 
28 // F. Hecht  december 2011
29 // ----------------------------
30 // file to add MUMPS sequentiel interface for sparce linear solver with dynamic load.
31 #include <mpi.h>
32 #ifdef _WIN32
33 __declspec(dllexport) int toto;
34 MPI_Fint* _imp__MPI_F_STATUS_IGNORE;
35 MPI_Fint* _imp__MPI_F_STATUSES_IGNORE;
36 //__declspec(dllexport) void __guard_check_icall_fptr(unsigned long ptr) { }
37 #endif
38 #include  <iostream>
39 using namespace std;
40 
41 #include "ff++.hpp"
42 
43 
44 #include <dmumps_c.h>
45 #include <zmumps_c.h>
46 
47 const int JOB_INIT = -1;
48 const int JOB_END = -2;
49 const int JOB_ANA = 1;
50 const int JOB_FAC = 2;
51 const int JOB_ANA_FAC = 4;
52 const int JOB_SOLVE = 3;
53 const int USE_COMM_WORLD = -987654;
54 
55 template<typename RR> struct MUMPS_STRUC_TRAIT {typedef void MUMPS;  typedef void R; };
56 template<> struct MUMPS_STRUC_TRAIT<double>  {typedef DMUMPS_STRUC_C MUMPS; typedef double R;};
57 template<> struct MUMPS_STRUC_TRAIT<Complex>  {typedef ZMUMPS_STRUC_C MUMPS; typedef ZMUMPS_COMPLEX R;};
mumps_c(DMUMPS_STRUC_C * id)58 void mumps_c(DMUMPS_STRUC_C *id) { dmumps_c(id);}
mumps_c(ZMUMPS_STRUC_C * id)59 void mumps_c(ZMUMPS_STRUC_C *id) { zmumps_c(id);}
60 
TYPEMPI_TYPE61 template<class T> struct MPI_TYPE {static MPI_Datatype  TYPE(){return MPI_BYTE;}};;
TYPEMPI_TYPE62 template<> struct MPI_TYPE<long>      {static MPI_Datatype  TYPE(){return MPI_LONG;}};
TYPEMPI_TYPE63 template<> struct MPI_TYPE<int>      {static MPI_Datatype TYPE(){return MPI_INT;}};
TYPEMPI_TYPE64 template<> struct MPI_TYPE<double>    {static MPI_Datatype TYPE(){return MPI_DOUBLE;}};
TYPEMPI_TYPE65 template<> struct MPI_TYPE<char>    {static MPI_Datatype TYPE(){return MPI_BYTE;}};
TYPEMPI_TYPE66 template<> struct MPI_TYPE<Complex>    {static MPI_Datatype TYPE(){return MPI_DOUBLE_COMPLEX;}};
67 
68 
69 static std::string analysis[] = {"AMD", "", "AMF", "SCOTCH", "PORD", "METIS", "QAMD", "automatic sequential", "automatic parallel", "PT-SCOTCH", "ParMetis"};
70 
71 //template<typename R>
72 template<class R=double>
73 class SolveMUMPS_mpi: public  VirtualSolver<int,R>
74 {
75 public:
76     static const int orTypeSol;
77     typedef HashMatrix<int,R>  HMat;
78     typedef R K; //
79     HMat &A;
80 
81 
82     // typedef double R;
83     long verb;
84     double eps;
85     double tgv;
86     int cn,cs;
87     typedef typename MUMPS_STRUC_TRAIT<R>::R MR;
88     mutable typename MUMPS_STRUC_TRAIT<R>::MUMPS id;
89     KN<double> *rinfog;
90     KN<long> *infog;
91     mutable unsigned char   strategy;
92     bool distributed;
93     MPI_Comm comm;
94     int mpirank;
95     int matrank;
96     // int distributed;
97 
ICNTL(int i) const98     int&    ICNTL (int i) const {return id.icntl[i - 1];}
CNTL(int i) const99     double& CNTL  (int i) const {return id.cntl[i - 1];}
INFO(int i) const100     int&    INFO  (int i) const {return id.info[i - 1];}
RINFO(int i) const101     double& RINFO (int i) const {return id.rinfo[i - 1];}
INFOG(int i) const102     int&    INFOG (int i) const {return id.infog[i - 1];}
RINFOG(int i) const103     double& RINFOG (int i) const {return id.rinfog[i - 1];}
104 
SetVerb() const105     void SetVerb () const {
106         ICNTL(1) = 6;//   output stream for error messages.
107         ICNTL(2) = 6;//  stream for diagnostic printing, statistics, and warning messages.
108         ICNTL(3) = 6;//  output stream global information, collected on the host.
109         ICNTL(4) = min(max(verb-2,1L),4L); // the level of printing for error, warning, and diag
110         if(verb ==0 )ICNTL(4) =0;
111         ICNTL(11)=0; // noerroranalysisisperformed(nostatistics).
112         if( id.job ==JOB_SOLVE && verb >99)
113         { //computes statistics related to an error analysis of the linear system
114             if( verb > 999) ICNTL(11)=1; // All Stat (veryexpensive)
115             else ICNTL(11)=2;// compute main statistics
116         }
117 
118 
119     }
Clean()120     void Clean ()
121     {
122         delete [] id.irn;
123         delete [] id.jcn;
124         delete [] id.a;
125         id.irn=0;
126         id.jcn=0;
127         id.a =0;
128     }
to_mumps_mat()129     void to_mumps_mat()
130     {
131         Clean ();
132 
133         id.nrhs = 0;//
134         int n = A.n;
135         int nz = A.nnz;
136         ffassert(A.n == A.m);
137         if( distributed || (mpirank == matrank) )
138         {
139             int *irn = new int[nz];
140             int *jcn = new int[nz];
141             R *a = new R[nz];
142             A.COO();
143 
144             for (int k = 0; k < nz; ++k) {
145                 {
146                     irn[k] = A.i[k]+1;
147                     jcn[k] = A.j[k] + 1;
148                     a[k] = A.aij[k];
149                 }
150             }
151 
152             id.n = n;
153 
154             if(!distributed)
155             {
156                 if(mpirank == matrank)
157                 {
158                   id.nz = nz;
159                   id.irn = irn;
160                   id.jcn = jcn;
161                   id.a = (MR *)(void *)a;
162                 }
163                 else
164                 { //  no matrix
165                     id.nz=0;
166                     id.a =0;
167                     id.irn = 0;;
168                     id.jcn = 0;
169 
170                 }
171 
172             }
173             else
174             {
175                 id.nz_loc = nz;
176                 id.irn_loc = irn;
177                 id.jcn_loc = jcn;
178                 id.a_loc = (MR *)(void *)a;
179 
180             }
181             id.rhs = 0;
182             ffassert( A.half == id.sym );//
183             ICNTL(5) = 0;    // input matrix type
184             ICNTL(7) = 7;    // NUMBERING ...
185 
186             ICNTL(9) = 1;    // 1: A x = b, !1 : tA x = b  during slove phase
187             ICNTL(18) = 0;
188             if(strategy > 0 && strategy < 9 && strategy != 2)
189             {
190                 ICNTL(28) = 1;             // 1: sequential analysis
191                 ICNTL(7)  = strategy - 1; //     0: AMD
192             }
193             //     1:
194             //     2: AMF
195             //     3: SCOTCH
196             //     4: PORD
197             //     5: METIS
198             //     6: QAMD
199             //     7: automatic
200             else
201             {
202                 ICNTL(28) = 1;
203                 ICNTL(7)  = 7;
204             }
205             if(strategy > 8 && strategy < 12)
206             {
207                 ICNTL(28) = 2;              // 2: parallel analysis
208                 ICNTL(29) = strategy - 9;  //     0: automatic
209             }                                   //     1: PT-STOCH
210             //     2: ParMetis
211             ICNTL(9)  = 1;
212             ICNTL(11) = 0;                 // verbose level
213             ICNTL(18) = distributed ? 3: 0;        // centralized matrix input if !distributed
214             ICNTL(20) = 0;                 // dense RHS
215             ICNTL(14) = 30;
216         }
217     }
Check(const char * msg="mumps_mpi")218     void Check (const char *msg = "mumps_mpi")
219     {
220         if (INFO(1) != 0) {
221             cout << " Erreur Mumps mpi: number " << INFO(1) << endl;
222             cout << " Fatal Erreur  " << msg << endl;
223             Clean ();
224             id.job = JOB_END;
225             mumps_c(&id);	/* Terminate instance */
226             ErrorExec(msg, INFO(1));
227         }
228     }
CopyInfo()229     void CopyInfo()
230     {
231         if (rinfog) {
232             // copy rinfog
233             if (rinfog->N() < 40) {rinfog->resize(40);}
234 
235             for (int i = 0; i < 40; ++i) {
236                 (*rinfog)[i] = RINFOG(i + 1);
237             }
238         }
239 
240         if (infog) {
241             // copy ginfo
242             if (infog->N() < 40) {infog->resize(40);}
243 
244             for (int i = 0; i < 40; ++i) {
245                 (*infog)[i] = INFOG(i + 1);
246             }
247         }
248     }
SolveMUMPS_mpi(HMat & AA,const Data_Sparse_Solver & ds,Stack stack)249     SolveMUMPS_mpi (HMat  &AA, const Data_Sparse_Solver & ds,Stack stack )
250     : A(AA), verb(ds.verb),
251     eps(ds.epsilon),
252     tgv(ds.tgv),cn(0),cs(0),
253     rinfog(ds.rinfo), infog(ds.info),
254     matrank(ds.master),distributed(ds.master<0),
255     strategy(ds.strategy)
256     {
257 
258         if(ds.commworld)
259             MPI_Comm_dup(*((MPI_Comm*)ds.commworld), &comm);
260 	else
261 	    MPI_Comm_dup(MPI_COMM_WORLD, &comm);
262 
263         MPI_Comm_rank(comm, &mpirank);
264         int master = mpirank==matrank;
265         int myid = 0;
266         MPI_Comm_rank(MPI_COMM_WORLD, &myid);
267 
268         id.irn=0;
269         id.jcn=0;
270         id.a =0;
271 
272         id.job = JOB_INIT;
273         id.par = 1;
274         id.sym = A.half;
275         id.comm_fortran = MPI_Comm_c2f(comm);
276         SetVerb();
277         mumps_c(&id);
278 
279 
280         Check("MUMPS_mpi build/init");
281         if (verbosity > 3 && master) {
282             cout << "  -- MUMPS   n=  " << id.n << ", peak Mem: " << INFOG(22) << " Mb" << " sym: " << id.sym << endl;
283         }
284 
285 
286     }
287 
288 
289 
~SolveMUMPS_mpi()290     ~SolveMUMPS_mpi () {
291         Clean ();
292         id.job = JOB_END;
293         SetVerb () ;
294         mumps_c(&id);	/* Terminate instance */
295         /*int ierr = */
296         MPI_Comm_free(&comm);
297     }
298 
299 
dosolver(K * x,K * b,int N,int trans)300     void dosolver(K *x,K*b,int N,int trans)
301     {
302         size_t  nN=id.n*N;
303         if (verbosity > 1 && mpirank==0) {
304             cout << " -- MUMPS solve,  peak Mem : " << INFOG(22) << " Mb,   n = "
305             << id.n << " sym =" << id.sym <<" trans = " << trans  << endl;
306         }
307         ICNTL(9) = trans == 0;    // 1: A x = b, !1 : tA x = b  during slove phase
308         id.nrhs = N;
309         // x = b;
310         if(distributed)
311         {
312             MPI_Reduce( (void *) b,(void *)  x  , nN , MPI_TYPE<R>::TYPE(),MPI_SUM,0,comm);
313         }
314         else if(mpirank==0)  std::copy(b,b+nN,x);
315         id.rhs = (MR *)(void *)(R *)x;
316         id.job = JOB_SOLVE;    // performs the analysis. and performs the factorization.
317         SetVerb();
318         mumps_c(&id);
319         Check("MUMPS_mpi dosolver");
320         if(distributed) // send the solution ...
321             MPI_Bcast(reinterpret_cast<void*> (x),nN, MPI_TYPE<R>::TYPE(), 0,comm);
322 
323 
324         if (verb  > 9 && mpirank==0) {
325 
326             for(int j=0; j<N; ++j)
327             {
328                 KN_<R> B(b+j*id.n,id.n);
329                 cout << j <<"   b linfty " << B.linfty()  << endl;
330             }
331         }
332 
333         if (verb > 2) {
334 
335             for(int j=0; j<N; ++j)
336             {   KN_<R> B(x+j*id.n,id.n);
337                 cout << "   x  " << j <<"  linfty " << B.linfty() << endl;
338             }
339         }
340         CopyInfo();
341 
342     }
343 
fac_init()344     void fac_init(){
345         to_mumps_mat();
346     }  // n, nzz fixe
fac_symbolic()347     void fac_symbolic(){
348         id.job = JOB_ANA;
349         SetVerb ();
350         mumps_c(&id);
351         Check("MUMPS_mpi Analyse");
352         CopyInfo();
353     }
fac_numeric()354     void fac_numeric(){
355         id.job = JOB_FAC;
356         SetVerb () ;
357         mumps_c(&id);
358         Check("MUMPS_mpi Factorize");
359         CopyInfo();
360     }
UpdateState()361     void UpdateState(){
362         if( A.GetReDoNumerics() ) cn++;
363         if( A.GetReDoSymbolic() ) cs++;
364         this->ChangeCodeState(A.n,cs,cn);
365     }
366 
367 };
368 
369 // 1 unsym , 2 herm, 4 sym, 8 pos , 16 nopos, 32  seq, 64  ompi, 128 mpi
370 template<> const int SolveMUMPS_mpi<double>::orTypeSol = 1|2|4|8|16|32;
371 template<> const int SolveMUMPS_mpi<std::complex<double>>::orTypeSol = 1|4|8|16|32;
372 
Load_Init()373 static void Load_Init()
374 {
375     addsolver<SolveMUMPS_mpi<double>>("MUMPS",50,1);
376     addsolver<SolveMUMPS_mpi<Complex>>("MUMPS",50,1);
377     addsolver<SolveMUMPS_mpi<double>>("MUMPSMPI",50,1);
378     addsolver<SolveMUMPS_mpi<Complex>>("MUMPSMPI",50,1);
379     setptrstring(def_solver,"MUMPSMPI");
380 }
381 LOADFUNC(Load_Init)
382