1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt * b)67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71   *b = (PetscMUMPSInt)(a);
72   PetscFunctionReturn(0);
73 }
74 
75 /* Put these utility routines here since they are only used in this file */
PetscOptionsMUMPSInt_Private(PetscOptionItems * PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt * value,PetscBool * set,PetscMUMPSInt lb,PetscMUMPSInt ub)76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78   PetscErrorCode ierr;
79   PetscInt       myval;
80   PetscBool      myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97         MUMPS_c(&mumps->id); \
98         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99       } \
100       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr);  \
107       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRQ(ierr); \
108       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr);  \
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid,petsc_size;
155   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
161   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
162   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
163   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
164 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165   PetscInt       *rhs_nrow,max_nrhs;
166   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
167   PetscScalar    *rhs_loc,*rhs_recvbuf;
168 #endif
169   Vec            b_seq,x_seq;
170   PetscInt       ninfo,*info;           /* which INFO to display */
171   PetscInt       sizeredrhs;
172   PetscScalar    *schur_sol;
173   PetscInt       schur_sizesol;
174   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
175   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
176   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177 
178   /* stuff used by petsc/mumps OpenMP support*/
179   PetscBool      use_petsc_omp_support;
180   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
182   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
183   PetscMPIInt    tag,omp_comm_size;
184   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
185   MPI_Request    *reqs;
186 };
187 
188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190  */
PetscMUMPSIntCSRCast(Mat_MUMPS * mumps,PetscInt nrow,PetscInt * ia,PetscInt * ja,PetscMUMPSInt ** ia_mumps,PetscMUMPSInt ** ja_mumps,PetscMUMPSInt * nnz_mumps)191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192 {
193   PetscErrorCode ierr;
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
202       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
207       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
211     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
MatMumpsResetSchur_Private(Mat_MUMPS * mumps)223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscErrorCode ierr;
226 
227   PetscFunctionBegin;
228   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
229   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
230   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
231   mumps->id.size_schur = 0;
232   mumps->id.schur_lld  = 0;
233   mumps->id.ICNTL(19)  = 0;
234   PetscFunctionReturn(0);
235 }
236 
237 /* solve with rhs in mumps->id.redrhs and return in the same location */
MatMumpsSolveSchur_Private(Mat F)238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239 {
240   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
241   Mat                  S,B,X;
242   MatFactorSchurStatus schurstatus;
243   PetscInt             sizesol;
244   PetscErrorCode       ierr;
245 
246   PetscFunctionBegin;
247   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
248   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
249   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
250   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
257     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
263     } else {
264       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
271       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
272       mumps->schur_sizesol = sizesol;
273     }
274     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
275     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
278 #endif
279     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
282     } else {
283       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
284     }
285     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
286     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
287     ierr = MatProductNumeric(X);CHKERRQ(ierr);
288 
289     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
290     break;
291   default:
292     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
293     break;
294   }
295   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
296   ierr = MatDestroy(&B);CHKERRQ(ierr);
297   ierr = MatDestroy(&X);CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
MatMumpsHandleSchur_Private(Mat F,PetscBool expansion)301 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
302 {
303   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
304   PetscErrorCode ierr;
305 
306   PetscFunctionBegin;
307   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
308     PetscFunctionReturn(0);
309   }
310   if (!expansion) { /* prepare for the condensation step */
311     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
312     /* allocate MUMPS internal array to store reduced right-hand sides */
313     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
314       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
315       mumps->id.lredrhs = mumps->id.size_schur;
316       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
317       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
318     }
319     mumps->id.ICNTL(26) = 1; /* condensation phase */
320   } else { /* prepare for the expansion step */
321     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
322     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
323     mumps->id.ICNTL(26) = 2; /* expansion phase */
324     PetscMUMPS_c(mumps);
325     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
326     /* restore defaults */
327     mumps->id.ICNTL(26) = -1;
328     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
329     if (mumps->id.nrhs > 1) {
330       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
331       mumps->id.lredrhs = 0;
332       mumps->sizeredrhs = 0;
333     }
334   }
335   PetscFunctionReturn(0);
336 }
337 
338 /*
339   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
340 
341   input:
342     A       - matrix in aij,baij or sbaij format
343     shift   - 0: C style output triple; 1: Fortran style output triple.
344     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
345               MAT_REUSE_MATRIX:   only the values in v array are updated
346   output:
347     nnz     - dim of r, c, and v (number of local nonzero entries of A)
348     r, c, v - row and col index, matrix values (matrix triples)
349 
350   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
351   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
352   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
353 
354  */
355 
MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
357 {
358   const PetscScalar *av;
359   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
360   PetscInt64        nz,rnz,i,j,k;
361   PetscErrorCode    ierr;
362   PetscMUMPSInt     *row,*col;
363   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
364 
365   PetscFunctionBegin;
366   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
367   mumps->val = (PetscScalar*)av;
368   if (reuse == MAT_INITIAL_MATRIX) {
369     nz   = aa->nz;
370     ai   = aa->i;
371     aj   = aa->j;
372     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
373     for (i=k=0; i<M; i++) {
374       rnz = ai[i+1] - ai[i];
375       ajj = aj + ai[i];
376       for (j=0; j<rnz; j++) {
377         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
378         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
379         k++;
380       }
381     }
382     mumps->irn = row;
383     mumps->jcn = col;
384     mumps->nnz = nz;
385   }
386   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
387   PetscFunctionReturn(0);
388 }
389 
MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)390 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
391 {
392   PetscErrorCode ierr;
393   PetscInt64     nz,i,j,k,r;
394   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
395   PetscMUMPSInt  *row,*col;
396 
397   PetscFunctionBegin;
398   mumps->val = a->val;
399   if (reuse == MAT_INITIAL_MATRIX) {
400     nz   = a->sliidx[a->totalslices];
401     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
402     for (i=k=0; i<a->totalslices; i++) {
403       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
404         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
405       }
406     }
407     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
408     mumps->irn = row;
409     mumps->jcn = col;
410     mumps->nnz = nz;
411   }
412   PetscFunctionReturn(0);
413 }
414 
MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)415 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
416 {
417   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
418   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
419   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
420   PetscInt       bs;
421   PetscErrorCode ierr;
422   PetscMUMPSInt  *row,*col;
423 
424   PetscFunctionBegin;
425   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
426   M          = A->rmap->N/bs;
427   mumps->val = aa->a;
428   if (reuse == MAT_INITIAL_MATRIX) {
429     ai   = aa->i; aj = aa->j;
430     nz   = bs2*aa->nz;
431     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
432     for (i=0; i<M; i++) {
433       ajj = aj + ai[i];
434       rnz = ai[i+1] - ai[i];
435       for (k=0; k<rnz; k++) {
436         for (j=0; j<bs; j++) {
437           for (m=0; m<bs; m++) {
438             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
439             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
440             idx++;
441           }
442         }
443       }
444     }
445     mumps->irn = row;
446     mumps->jcn = col;
447     mumps->nnz = nz;
448   }
449   PetscFunctionReturn(0);
450 }
451 
MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)452 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
453 {
454   const PetscInt *ai, *aj,*ajj;
455   PetscInt        bs;
456   PetscInt64      nz,rnz,i,j,k,m;
457   PetscErrorCode  ierr;
458   PetscMUMPSInt   *row,*col;
459   PetscScalar     *val;
460   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
461   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
462 #if defined(PETSC_USE_COMPLEX)
463   PetscBool       hermitian;
464 #endif
465 
466   PetscFunctionBegin;
467 #if defined(PETSC_USE_COMPLEX)
468   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
469   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
470 #endif
471   ai   = aa->i;
472   aj   = aa->j;
473   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
474   if (reuse == MAT_INITIAL_MATRIX) {
475     nz   = aa->nz;
476     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
477     if (bs>1) {
478       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
479       mumps->val = mumps->val_alloc;
480     } else {
481       mumps->val = aa->a;
482     }
483     mumps->irn = row;
484     mumps->jcn = col;
485   } else {
486     if (bs == 1) mumps->val = aa->a;
487     row = mumps->irn;
488     col = mumps->jcn;
489   }
490   val = mumps->val;
491 
492   nz = 0;
493   if (bs>1) {
494     for (i=0; i<mbs; i++) {
495       rnz = ai[i+1] - ai[i];
496       ajj = aj + ai[i];
497       for (j=0; j<rnz; j++) {
498         for (k=0; k<bs; k++) {
499           for (m=0; m<bs; m++) {
500             if (ajj[j]>i || k>=m) {
501               if (reuse == MAT_INITIAL_MATRIX) {
502                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
503                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
504               }
505               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
506             }
507           }
508         }
509       }
510     }
511   } else if (reuse == MAT_INITIAL_MATRIX) {
512     for (i=0; i<mbs; i++) {
513       rnz = ai[i+1] - ai[i];
514       ajj = aj + ai[i];
515       for (j=0; j<rnz; j++) {
516         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
517         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
518         nz++;
519       }
520     }
521     if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
522   }
523   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
524   PetscFunctionReturn(0);
525 }
526 
MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)527 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
528 {
529   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
530   PetscInt64        nz,rnz,i,j;
531   const PetscScalar *av,*v1;
532   PetscScalar       *val;
533   PetscErrorCode    ierr;
534   PetscMUMPSInt     *row,*col;
535   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
536   PetscBool         missing;
537 #if defined(PETSC_USE_COMPLEX)
538   PetscBool         hermitian;
539 #endif
540 
541   PetscFunctionBegin;
542 #if defined(PETSC_USE_COMPLEX)
543   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
544   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
545 #endif
546   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
547   ai    = aa->i; aj = aa->j;
548   adiag = aa->diag;
549   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
550   if (reuse == MAT_INITIAL_MATRIX) {
551     /* count nz in the upper triangular part of A */
552     nz = 0;
553     if (missing) {
554       for (i=0; i<M; i++) {
555         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
556           for (j=ai[i];j<ai[i+1];j++) {
557             if (aj[j] < i) continue;
558             nz++;
559           }
560         } else {
561           nz += ai[i+1] - adiag[i];
562         }
563       }
564     } else {
565       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
566     }
567     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
568     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
569     mumps->nnz = nz;
570     mumps->irn = row;
571     mumps->jcn = col;
572     mumps->val = mumps->val_alloc = val;
573 
574     nz = 0;
575     if (missing) {
576       for (i=0; i<M; i++) {
577         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
578           for (j=ai[i];j<ai[i+1];j++) {
579             if (aj[j] < i) continue;
580             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
581             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
582             val[nz] = av[j];
583             nz++;
584           }
585         } else {
586           rnz = ai[i+1] - adiag[i];
587           ajj = aj + adiag[i];
588           v1  = av + adiag[i];
589           for (j=0; j<rnz; j++) {
590             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
591             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
592             val[nz++] = v1[j];
593           }
594         }
595       }
596     } else {
597       for (i=0; i<M; i++) {
598         rnz = ai[i+1] - adiag[i];
599         ajj = aj + adiag[i];
600         v1  = av + adiag[i];
601         for (j=0; j<rnz; j++) {
602           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
603           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
604           val[nz++] = v1[j];
605         }
606       }
607     }
608   } else {
609     nz = 0;
610     val = mumps->val;
611     if (missing) {
612       for (i=0; i <M; i++) {
613         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
614           for (j=ai[i];j<ai[i+1];j++) {
615             if (aj[j] < i) continue;
616             val[nz++] = av[j];
617           }
618         } else {
619           rnz = ai[i+1] - adiag[i];
620           v1  = av + adiag[i];
621           for (j=0; j<rnz; j++) {
622             val[nz++] = v1[j];
623           }
624         }
625       }
626     } else {
627       for (i=0; i <M; i++) {
628         rnz = ai[i+1] - adiag[i];
629         v1  = av + adiag[i];
630         for (j=0; j<rnz; j++) {
631           val[nz++] = v1[j];
632         }
633       }
634     }
635   }
636   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)640 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
641 {
642   PetscErrorCode    ierr;
643   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
644   PetscInt          bs;
645   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
646   PetscMUMPSInt     *row,*col;
647   const PetscScalar *av,*bv,*v1,*v2;
648   PetscScalar       *val;
649   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
650   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
651   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
652   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
653 #if defined(PETSC_USE_COMPLEX)
654   PetscBool         hermitian;
655 #endif
656 
657   PetscFunctionBegin;
658 #if defined(PETSC_USE_COMPLEX)
659   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
660   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
661 #endif
662   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
663   rstart = A->rmap->rstart;
664   ai = aa->i;
665   aj = aa->j;
666   bi = bb->i;
667   bj = bb->j;
668   av = aa->a;
669   bv = bb->a;
670 
671   garray = mat->garray;
672 
673   if (reuse == MAT_INITIAL_MATRIX) {
674     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
675     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
676     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
677     /* can not decide the exact mumps->nnz now because of the SBAIJ */
678     mumps->irn = row;
679     mumps->jcn = col;
680     mumps->val = mumps->val_alloc = val;
681   } else {
682     val = mumps->val;
683   }
684 
685   jj = 0; irow = rstart;
686   for (i=0; i<mbs; i++) {
687     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
688     countA = ai[i+1] - ai[i];
689     countB = bi[i+1] - bi[i];
690     bjj    = bj + bi[i];
691     v1     = av + ai[i]*bs2;
692     v2     = bv + bi[i]*bs2;
693 
694     if (bs>1) {
695       /* A-part */
696       for (j=0; j<countA; j++) {
697         for (k=0; k<bs; k++) {
698           for (m=0; m<bs; m++) {
699             if (rstart + ajj[j]*bs>irow || k>=m) {
700               if (reuse == MAT_INITIAL_MATRIX) {
701                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
702                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
703               }
704               val[jj++] = v1[j*bs2 + m + k*bs];
705             }
706           }
707         }
708       }
709 
710       /* B-part */
711       for (j=0; j < countB; j++) {
712         for (k=0; k<bs; k++) {
713           for (m=0; m<bs; m++) {
714             if (reuse == MAT_INITIAL_MATRIX) {
715               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
716               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
717             }
718             val[jj++] = v2[j*bs2 + m + k*bs];
719           }
720         }
721       }
722     } else {
723       /* A-part */
724       for (j=0; j<countA; j++) {
725         if (reuse == MAT_INITIAL_MATRIX) {
726           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
727           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
728         }
729         val[jj++] = v1[j];
730       }
731 
732       /* B-part */
733       for (j=0; j < countB; j++) {
734         if (reuse == MAT_INITIAL_MATRIX) {
735           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
736           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
737         }
738         val[jj++] = v2[j];
739       }
740     }
741     irow+=bs;
742   }
743   mumps->nnz = jj;
744   PetscFunctionReturn(0);
745 }
746 
MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)747 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
748 {
749   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
750   PetscErrorCode    ierr;
751   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
752   PetscMUMPSInt     *row,*col;
753   const PetscScalar *av, *bv,*v1,*v2;
754   PetscScalar       *val;
755   Mat               Ad,Ao;
756   Mat_SeqAIJ        *aa;
757   Mat_SeqAIJ        *bb;
758 
759   PetscFunctionBegin;
760   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
761   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
762   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
763 
764   aa = (Mat_SeqAIJ*)(Ad)->data;
765   bb = (Mat_SeqAIJ*)(Ao)->data;
766   ai = aa->i;
767   aj = aa->j;
768   bi = bb->i;
769   bj = bb->j;
770 
771   rstart = A->rmap->rstart;
772 
773   if (reuse == MAT_INITIAL_MATRIX) {
774     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
775     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
776     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
777     mumps->nnz = nz;
778     mumps->irn = row;
779     mumps->jcn = col;
780     mumps->val = mumps->val_alloc = val;
781   } else {
782     val = mumps->val;
783   }
784 
785   jj = 0; irow = rstart;
786   for (i=0; i<m; i++) {
787     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
788     countA = ai[i+1] - ai[i];
789     countB = bi[i+1] - bi[i];
790     bjj    = bj + bi[i];
791     v1     = av + ai[i];
792     v2     = bv + bi[i];
793 
794     /* A-part */
795     for (j=0; j<countA; j++) {
796       if (reuse == MAT_INITIAL_MATRIX) {
797         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
798         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
799       }
800       val[jj++] = v1[j];
801     }
802 
803     /* B-part */
804     for (j=0; j < countB; j++) {
805       if (reuse == MAT_INITIAL_MATRIX) {
806         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
807         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
808       }
809       val[jj++] = v2[j];
810     }
811     irow++;
812   }
813   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
814   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
815   PetscFunctionReturn(0);
816 }
817 
MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)818 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
819 {
820   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
821   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
822   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
823   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
824   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
825   const PetscInt    bs2=mat->bs2;
826   PetscErrorCode    ierr;
827   PetscInt          bs;
828   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
829   PetscMUMPSInt     *row,*col;
830   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
831   PetscScalar       *val;
832 
833   PetscFunctionBegin;
834   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
835   if (reuse == MAT_INITIAL_MATRIX) {
836     nz   = bs2*(aa->nz + bb->nz);
837     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
838     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
839     mumps->nnz = nz;
840     mumps->irn = row;
841     mumps->jcn = col;
842     mumps->val = mumps->val_alloc = val;
843   } else {
844     val = mumps->val;
845   }
846 
847   jj = 0; irow = rstart;
848   for (i=0; i<mbs; i++) {
849     countA = ai[i+1] - ai[i];
850     countB = bi[i+1] - bi[i];
851     ajj    = aj + ai[i];
852     bjj    = bj + bi[i];
853     v1     = av + bs2*ai[i];
854     v2     = bv + bs2*bi[i];
855 
856     idx = 0;
857     /* A-part */
858     for (k=0; k<countA; k++) {
859       for (j=0; j<bs; j++) {
860         for (n=0; n<bs; n++) {
861           if (reuse == MAT_INITIAL_MATRIX) {
862             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
863             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
864           }
865           val[jj++] = v1[idx++];
866         }
867       }
868     }
869 
870     idx = 0;
871     /* B-part */
872     for (k=0; k<countB; k++) {
873       for (j=0; j<bs; j++) {
874         for (n=0; n<bs; n++) {
875           if (reuse == MAT_INITIAL_MATRIX) {
876             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
877             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
878           }
879           val[jj++] = v2[idx++];
880         }
881       }
882     }
883     irow += bs;
884   }
885   PetscFunctionReturn(0);
886 }
887 
MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)888 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
889 {
890   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
891   PetscErrorCode    ierr;
892   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
893   PetscMUMPSInt     *row,*col;
894   const PetscScalar *av, *bv,*v1,*v2;
895   PetscScalar       *val;
896   Mat               Ad,Ao;
897   Mat_SeqAIJ        *aa;
898   Mat_SeqAIJ        *bb;
899 #if defined(PETSC_USE_COMPLEX)
900   PetscBool         hermitian;
901 #endif
902 
903   PetscFunctionBegin;
904 #if defined(PETSC_USE_COMPLEX)
905   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
906   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
907 #endif
908   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
909   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
910   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
911 
912   aa    = (Mat_SeqAIJ*)(Ad)->data;
913   bb    = (Mat_SeqAIJ*)(Ao)->data;
914   ai    = aa->i;
915   aj    = aa->j;
916   adiag = aa->diag;
917   bi    = bb->i;
918   bj    = bb->j;
919 
920   rstart = A->rmap->rstart;
921 
922   if (reuse == MAT_INITIAL_MATRIX) {
923     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
924     nzb = 0;    /* num of upper triangular entries in mat->B */
925     for (i=0; i<m; i++) {
926       nza   += (ai[i+1] - adiag[i]);
927       countB = bi[i+1] - bi[i];
928       bjj    = bj + bi[i];
929       for (j=0; j<countB; j++) {
930         if (garray[bjj[j]] > rstart) nzb++;
931       }
932     }
933 
934     nz   = nza + nzb; /* total nz of upper triangular part of mat */
935     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
936     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
937     mumps->nnz = nz;
938     mumps->irn = row;
939     mumps->jcn = col;
940     mumps->val = mumps->val_alloc = val;
941   } else {
942     val = mumps->val;
943   }
944 
945   jj = 0; irow = rstart;
946   for (i=0; i<m; i++) {
947     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
948     v1     = av + adiag[i];
949     countA = ai[i+1] - adiag[i];
950     countB = bi[i+1] - bi[i];
951     bjj    = bj + bi[i];
952     v2     = bv + bi[i];
953 
954     /* A-part */
955     for (j=0; j<countA; j++) {
956       if (reuse == MAT_INITIAL_MATRIX) {
957         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
958         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
959       }
960       val[jj++] = v1[j];
961     }
962 
963     /* B-part */
964     for (j=0; j < countB; j++) {
965       if (garray[bjj[j]] > rstart) {
966         if (reuse == MAT_INITIAL_MATRIX) {
967           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
968           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
969         }
970         val[jj++] = v2[j];
971       }
972     }
973     irow++;
974   }
975   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
976   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
MatDestroy_MUMPS(Mat A)980 PetscErrorCode MatDestroy_MUMPS(Mat A)
981 {
982   PetscErrorCode ierr;
983   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
984 
985   PetscFunctionBegin;
986   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
987   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
988   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
989   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
990   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
991   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
992   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
993   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
994   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
995   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
996   mumps->id.job = JOB_END;
997   PetscMUMPS_c(mumps);
998   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
999 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1000   if (mumps->use_petsc_omp_support) {
1001     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1002     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1003     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1004   }
1005 #endif
1006   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1007   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1008   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1009   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1010   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1011   if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRQ(ierr);}
1012   ierr = PetscFree(A->data);CHKERRQ(ierr);
1013 
1014   /* clear composed functions */
1015   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1016   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1017   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1018   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1027   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar * array)1032 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1033 {
1034   PetscErrorCode     ierr;
1035   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1036   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1037   PetscInt           i,m,M,rstart;
1038 
1039   PetscFunctionBegin;
1040   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1041   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1042   if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1043   if (ompsize == 1) {
1044     if (!mumps->irhs_loc) {
1045       mumps->nloc_rhs = m;
1046       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1047       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1048       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1049     }
1050     mumps->id.rhs_loc = (MumpsScalar*)array;
1051   } else {
1052   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1053     const PetscInt  *ranges;
1054     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1055     MPI_Group       petsc_group,omp_group;
1056     PetscScalar     *recvbuf=NULL;
1057 
1058     if (mumps->is_omp_master) {
1059       /* Lazily initialize the omp stuff for distributed rhs */
1060       if (!mumps->irhs_loc) {
1061         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1062         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1063         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr);
1064         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr);
1065         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1066         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr);
1067 
1068         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1069         mumps->nloc_rhs = 0;
1070         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1071         for (j=0; j<ompsize; j++) {
1072           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1073           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1074         }
1075         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1076         for (j=k=0; j<ompsize; j++) {
1077           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1078         }
1079 
1080         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1081         ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr);
1082         ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr);
1083       }
1084 
1085       /* Realloc buffers when current nrhs is bigger than what we have met */
1086       if (nrhs > mumps->max_nrhs) {
1087         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1088         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1089         mumps->max_nrhs = nrhs;
1090       }
1091 
1092       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1093       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1094       mumps->rhs_disps[0] = 0;
1095       for (j=1; j<ompsize; j++) {
1096         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1097         if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1098       }
1099       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1100     }
1101 
1102     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1103     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr);
1104 
1105     if (mumps->is_omp_master) {
1106       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1107         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1108         for (j=0; j<ompsize; j++) {
1109           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1110           dst = dstbase;
1111           for (i=0; i<nrhs; i++) {
1112             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1113             src += mumps->rhs_nrow[j];
1114             dst += mumps->nloc_rhs;
1115           }
1116           dstbase += mumps->rhs_nrow[j];
1117         }
1118       }
1119       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1120     }
1121   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1122   }
1123   mumps->id.nrhs     = nrhs;
1124   mumps->id.nloc_rhs = mumps->nloc_rhs;
1125   mumps->id.lrhs_loc = mumps->nloc_rhs;
1126   mumps->id.irhs_loc = mumps->irhs_loc;
1127   PetscFunctionReturn(0);
1128 }
1129 
MatSolve_MUMPS(Mat A,Vec b,Vec x)1130 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1131 {
1132   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1133   const PetscScalar  *rarray = NULL;
1134   PetscScalar        *array;
1135   IS                 is_iden,is_petsc;
1136   PetscErrorCode     ierr;
1137   PetscInt           i;
1138   PetscBool          second_solve = PETSC_FALSE;
1139   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1140 
1141   PetscFunctionBegin;
1142   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1143   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1144 
1145   if (A->factorerrortype) {
1146     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1147     ierr = VecSetInf(x);CHKERRQ(ierr);
1148     PetscFunctionReturn(0);
1149   }
1150 
1151   mumps->id.nrhs = 1;
1152   if (mumps->petsc_size > 1) {
1153     if (mumps->ICNTL20 == 10) {
1154       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1155       ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1156       ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1157     } else {
1158       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/
1159       ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161       if (!mumps->myid) {
1162         ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1163         mumps->id.rhs = (MumpsScalar*)array;
1164       }
1165     }
1166   } else {  /* petsc_size == 1 */
1167     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1168     ierr = VecCopy(b,x);CHKERRQ(ierr);
1169     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1170     mumps->id.rhs = (MumpsScalar*)array;
1171   }
1172 
1173   /*
1174      handle condensation step of Schur complement (if any)
1175      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1176      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1177      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1178      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1179   */
1180   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1181     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1182     second_solve = PETSC_TRUE;
1183     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1184   }
1185   /* solve phase */
1186   /*-------------*/
1187   mumps->id.job = JOB_SOLVE;
1188   PetscMUMPS_c(mumps);
1189   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1190 
1191   /* handle expansion step of Schur complement (if any) */
1192   if (second_solve) {
1193     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1194   }
1195 
1196   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1197     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1198       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1199       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1200     }
1201     if (!mumps->scat_sol) { /* create scatter scat_sol */
1202       PetscInt *isol2_loc=NULL;
1203       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1204       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1205       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1206       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1207       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1208       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1209       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1210       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1211     }
1212 
1213     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215   }
1216 
1217   if (mumps->petsc_size > 1) {
1218     if (mumps->ICNTL20 == 10) {
1219       ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1220     } else if (!mumps->myid) {
1221       ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1222     }
1223   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1224 
1225   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)1229 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1230 {
1231   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   mumps->id.ICNTL(9) = 0;
1236   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1237   mumps->id.ICNTL(9) = 1;
1238   PetscFunctionReturn(0);
1239 }
1240 
MatMatSolve_MUMPS(Mat A,Mat B,Mat X)1241 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1242 {
1243   PetscErrorCode    ierr;
1244   Mat               Bt = NULL;
1245   PetscBool         denseX,denseB,flg,flgT;
1246   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1247   PetscInt          i,nrhs,M;
1248   PetscScalar       *array;
1249   const PetscScalar *rbray;
1250   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1251   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1252   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1253   IS                is_to,is_from;
1254   PetscInt          k,proc,j,m,myrstart;
1255   const PetscInt    *rstart;
1256   Vec               v_mpi,msol_loc;
1257   VecScatter        scat_sol;
1258   Vec               b_seq;
1259   VecScatter        scat_rhs;
1260   PetscScalar       *aa;
1261   PetscInt          spnr,*ia,*ja;
1262   Mat_MPIAIJ        *b = NULL;
1263 
1264   PetscFunctionBegin;
1265   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1266   if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1267 
1268   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1269   if (denseB) {
1270     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1271     mumps->id.ICNTL(20)= 0; /* dense RHS */
1272   } else { /* sparse B */
1273     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1274     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1275     if (flgT) { /* input B is transpose of actural RHS matrix,
1276                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1277       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1278     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1279     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1280   }
1281 
1282   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1283   mumps->id.nrhs = nrhs;
1284   mumps->id.lrhs = M;
1285   mumps->id.rhs  = NULL;
1286 
1287   if (mumps->petsc_size == 1) {
1288     PetscScalar *aa;
1289     PetscInt    spnr,*ia,*ja;
1290     PetscBool   second_solve = PETSC_FALSE;
1291 
1292     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1293     mumps->id.rhs = (MumpsScalar*)array;
1294 
1295     if (denseB) {
1296       /* copy B to X */
1297       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1298       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1299       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1300     } else { /* sparse B */
1301       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1302       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1303       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1304       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1305       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1306     }
1307     /* handle condensation step of Schur complement (if any) */
1308     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1309       second_solve = PETSC_TRUE;
1310       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1311     }
1312     /* solve phase */
1313     /*-------------*/
1314     mumps->id.job = JOB_SOLVE;
1315     PetscMUMPS_c(mumps);
1316     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1317 
1318     /* handle expansion step of Schur complement (if any) */
1319     if (second_solve) {
1320       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1321     }
1322     if (!denseB) { /* sparse B */
1323       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1324       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1325       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1326     }
1327     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1328     PetscFunctionReturn(0);
1329   }
1330 
1331   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1332   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1333 
1334   /* create msol_loc to hold mumps local solution */
1335   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1336   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1337 
1338   lsol_loc  = mumps->id.lsol_loc;
1339   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1340   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1341   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1342   mumps->id.isol_loc = isol_loc;
1343 
1344   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1345 
1346   if (denseB) {
1347     if (mumps->ICNTL20 == 10) {
1348       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1349       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1350       ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1351       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1352       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1353       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1354     } else {
1355       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1356       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1357         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1358         0, re-arrange B into desired order, which is a local operation.
1359       */
1360 
1361       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1362       /* wrap dense rhs matrix B into a vector v_mpi */
1363       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1364       ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1365       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1366       ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1367 
1368       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1369       if (!mumps->myid) {
1370         PetscInt *idx;
1371         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1372         ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1373         ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1374         k = 0;
1375         for (proc=0; proc<mumps->petsc_size; proc++){
1376           for (j=0; j<nrhs; j++){
1377             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1378           }
1379         }
1380 
1381         ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1382         ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1383         ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1384       } else {
1385         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1386         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1387         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1388       }
1389       ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1390       ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391       ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1392       ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1393       ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1394 
1395       if (!mumps->myid) { /* define rhs on the host */
1396         ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1397         mumps->id.rhs = (MumpsScalar*)bray;
1398         ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1399       }
1400     }
1401   } else { /* sparse B */
1402     b = (Mat_MPIAIJ*)Bt->data;
1403 
1404     /* wrap dense X into a vector v_mpi */
1405     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1406     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1407     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1408     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1409 
1410     if (!mumps->myid) {
1411       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1412       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1413       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1414       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1415       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1416     } else {
1417       mumps->id.irhs_ptr    = NULL;
1418       mumps->id.irhs_sparse = NULL;
1419       mumps->id.nz_rhs      = 0;
1420       mumps->id.rhs_sparse  = NULL;
1421     }
1422   }
1423 
1424   /* solve phase */
1425   /*-------------*/
1426   mumps->id.job = JOB_SOLVE;
1427   PetscMUMPS_c(mumps);
1428   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1429 
1430   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1431   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1432   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1433 
1434   /* create scatter scat_sol */
1435   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1436   /* iidx: index for scatter mumps solution to petsc X */
1437 
1438   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1439   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1440   for (i=0; i<lsol_loc; i++) {
1441     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1442 
1443     for (proc=0; proc<mumps->petsc_size; proc++){
1444       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1445         myrstart = rstart[proc];
1446         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1447         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1448         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1449         break;
1450       }
1451     }
1452 
1453     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1454   }
1455   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1456   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1457   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1459   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1460   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1462 
1463   /* free spaces */
1464   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1465   mumps->id.isol_loc = isol_loc_save;
1466 
1467   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1468   ierr = PetscFree(idxx);CHKERRQ(ierr);
1469   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1470   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1471   if (!denseB) {
1472     if (!mumps->myid) {
1473       b = (Mat_MPIAIJ*)Bt->data;
1474       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1475       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1476       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1477     }
1478   } else {
1479     if (mumps->ICNTL20 == 0) {
1480       ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1481       ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1482     }
1483   }
1484   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1485   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)1489 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1490 {
1491   PetscErrorCode ierr;
1492   PetscBool      flg;
1493   Mat            B;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1497   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1498 
1499   /* Create B=Bt^T that uses Bt's data structure */
1500   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1501 
1502   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1503   ierr = MatDestroy(&B);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #if !defined(PETSC_USE_COMPLEX)
1508 /*
1509   input:
1510    F:        numeric factor
1511   output:
1512    nneg:     total number of negative pivots
1513    nzero:    total number of zero pivots
1514    npos:     (global dimension of F) - nneg - nzero
1515 */
MatGetInertia_SBAIJMUMPS(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)1516 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1517 {
1518   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1519   PetscErrorCode ierr;
1520   PetscMPIInt    size;
1521 
1522   PetscFunctionBegin;
1523   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr);
1524   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1525   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1526 
1527   if (nneg) *nneg = mumps->id.INFOG(12);
1528   if (nzero || npos) {
1529     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1530     if (nzero) *nzero = mumps->id.INFOG(28);
1531     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 #endif
1536 
MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS * mumps)1537 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1538 {
1539   PetscErrorCode ierr;
1540   PetscInt       i,nreqs;
1541   PetscMUMPSInt  *irn,*jcn;
1542   PetscMPIInt    count;
1543   PetscInt64     totnnz,remain;
1544   const PetscInt osize=mumps->omp_comm_size;
1545   PetscScalar    *val;
1546 
1547   PetscFunctionBegin;
1548   if (osize > 1) {
1549     if (reuse == MAT_INITIAL_MATRIX) {
1550       /* master first gathers counts of nonzeros to receive */
1551       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1552       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr);
1553 
1554       /* Then each computes number of send/recvs */
1555       if (mumps->is_omp_master) {
1556         /* Start from 1 since self communication is not done in MPI */
1557         nreqs = 0;
1558         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1559       } else {
1560         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1561       }
1562       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1563 
1564       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1565          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1566          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1567          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1568        */
1569       nreqs = 0; /* counter for actual send/recvs */
1570       if (mumps->is_omp_master) {
1571         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1572         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1573         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1574 
1575         /* Self communication */
1576         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1577         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1578         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1579 
1580         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1581         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1582         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1583         mumps->nnz = totnnz;
1584         mumps->irn = irn;
1585         mumps->jcn = jcn;
1586         mumps->val = mumps->val_alloc = val;
1587 
1588         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1589         jcn += mumps->recvcount[0];
1590         val += mumps->recvcount[0];
1591 
1592         /* Remote communication */
1593         for (i=1; i<osize; i++) {
1594           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1595           remain = mumps->recvcount[i] - count;
1596           while (count>0) {
1597             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1598             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1599             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1600             irn    += count;
1601             jcn    += count;
1602             val    += count;
1603             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1604             remain -= count;
1605           }
1606         }
1607       } else {
1608         irn    = mumps->irn;
1609         jcn    = mumps->jcn;
1610         val    = mumps->val;
1611         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1612         remain = mumps->nnz - count;
1613         while (count>0) {
1614           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1615           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1616           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1617           irn    += count;
1618           jcn    += count;
1619           val    += count;
1620           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1621           remain -= count;
1622         }
1623       }
1624     } else {
1625       nreqs = 0;
1626       if (mumps->is_omp_master) {
1627         val = mumps->val + mumps->recvcount[0];
1628         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1629           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1630           remain = mumps->recvcount[i] - count;
1631           while (count>0) {
1632             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1633             val    += count;
1634             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1635             remain -= count;
1636           }
1637         }
1638       } else {
1639         val    = mumps->val;
1640         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1641         remain = mumps->nnz - count;
1642         while (count>0) {
1643           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1644           val    += count;
1645           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1646           remain -= count;
1647         }
1648       }
1649     }
1650     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1651     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo * info)1656 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1657 {
1658   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1659   PetscErrorCode ierr;
1660   PetscBool      isMPIAIJ;
1661 
1662   PetscFunctionBegin;
1663   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1664     if (mumps->id.INFOG(1) == -6) {
1665       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1666     }
1667     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1668     PetscFunctionReturn(0);
1669   }
1670 
1671   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1672   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1673 
1674   /* numerical factorization phase */
1675   /*-------------------------------*/
1676   mumps->id.job = JOB_FACTNUMERIC;
1677   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1678     if (!mumps->myid) {
1679       mumps->id.a = (MumpsScalar*)mumps->val;
1680     }
1681   } else {
1682     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1683   }
1684   PetscMUMPS_c(mumps);
1685   if (mumps->id.INFOG(1) < 0) {
1686     if (A->erroriffailure) {
1687       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1688     } else {
1689       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1690         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1691         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1692       } else if (mumps->id.INFOG(1) == -13) {
1693         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1694         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1695       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1696         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1697         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1698       } else {
1699         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1700         F->factorerrortype = MAT_FACTOR_OTHER;
1701       }
1702     }
1703   }
1704   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1705 
1706   F->assembled    = PETSC_TRUE;
1707   mumps->matstruc = SAME_NONZERO_PATTERN;
1708   if (F->schur) { /* reset Schur status to unfactored */
1709 #if defined(PETSC_HAVE_CUDA)
1710     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1711 #endif
1712     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1713       mumps->id.ICNTL(19) = 2;
1714       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1715     }
1716     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1717   }
1718 
1719   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1720   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1721 
1722   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1723   if (mumps->petsc_size > 1) {
1724     PetscInt    lsol_loc;
1725     PetscScalar *sol_loc;
1726 
1727     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1728 
1729     /* distributed solution; Create x_seq=sol_loc for repeated use */
1730     if (mumps->x_seq) {
1731       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1732       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1733       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1734     }
1735     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1736     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1737     mumps->id.lsol_loc = lsol_loc;
1738     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1739     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1740   }
1741   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 /* Sets MUMPS options from the options database */
PetscSetMUMPSFromOptions(Mat F,Mat A)1746 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1747 {
1748   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1749   PetscErrorCode ierr;
1750   PetscMUMPSInt  icntl=0;
1751   PetscInt       info[80],i,ninfo=80;
1752   PetscBool      flg=PETSC_FALSE;
1753 
1754   PetscFunctionBegin;
1755   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1756   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1757   if (flg) mumps->id.ICNTL(1) = icntl;
1758   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1759   if (flg) mumps->id.ICNTL(2) = icntl;
1760   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1761   if (flg) mumps->id.ICNTL(3) = icntl;
1762 
1763   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1764   if (flg) mumps->id.ICNTL(4) = icntl;
1765   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1766 
1767   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1768   if (flg) mumps->id.ICNTL(6) = icntl;
1769 
1770   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1771   if (flg) {
1772     if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n");
1773     else mumps->id.ICNTL(7) = icntl;
1774   }
1775 
1776   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1777   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1778   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1780   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1781   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1782   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1783   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1784   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1785     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1786     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1787   }
1788 
1789   /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake.
1790      So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1791      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1792    */
1793 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1794   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1795 #else
1796   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1797 #endif
1798   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1799   if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20);
1800 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1801   if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n");
1802 #endif
1803   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1804 
1805   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1806   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1807   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1808   if (mumps->id.ICNTL(24)) {
1809     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1810   }
1811 
1812   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1813   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1814   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1815   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1816   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1817   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1818   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1819   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1820   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1821   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1822   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1823   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1824 
1825   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1826   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1827   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1828   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1829   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1830   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1831 
1832   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1833 
1834   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1835   if (ninfo) {
1836     if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1837     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1838     mumps->ninfo = ninfo;
1839     for (i=0; i<ninfo; i++) {
1840       if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1841       else  mumps->info[i] = info[i];
1842     }
1843   }
1844 
1845   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 
PetscInitializeMUMPS(Mat A,Mat_MUMPS * mumps)1849 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1850 {
1851   PetscErrorCode ierr;
1852   PetscInt       nthreads=0;
1853   MPI_Comm       newcomm=MPI_COMM_NULL;
1854 
1855   PetscFunctionBegin;
1856   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1857   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr);
1858   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1859 
1860   ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1861   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1862   ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1863   if (mumps->use_petsc_omp_support) {
1864 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1865     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1866     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1867 #else
1868     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1869 #endif
1870   } else {
1871     mumps->omp_comm      = PETSC_COMM_SELF;
1872     mumps->mumps_comm    = mumps->petsc_comm;
1873     mumps->is_omp_master = PETSC_TRUE;
1874   }
1875   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr);
1876   mumps->reqs = NULL;
1877   mumps->tag  = 0;
1878 
1879   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1880   if (mumps->mumps_comm != MPI_COMM_NULL) {
1881     ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRQ(ierr);
1882     mumps->mumps_comm = newcomm;
1883   }
1884 
1885   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1886   mumps->id.job = JOB_INIT;
1887   mumps->id.par = 1;  /* host participates factorizaton and solve */
1888   mumps->id.sym = mumps->sym;
1889 
1890   PetscMUMPS_c(mumps);
1891   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1892 
1893   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1894      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1895    */
1896   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */
1897   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr);
1898 
1899   mumps->scat_rhs = NULL;
1900   mumps->scat_sol = NULL;
1901 
1902   /* set PETSc-MUMPS default options - override MUMPS default */
1903   mumps->id.ICNTL(3) = 0;
1904   mumps->id.ICNTL(4) = 0;
1905   if (mumps->petsc_size == 1) {
1906     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1907     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1908   } else {
1909     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1910     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1911   }
1912 
1913   /* schur */
1914   mumps->id.size_schur    = 0;
1915   mumps->id.listvar_schur = NULL;
1916   mumps->id.schur         = NULL;
1917   mumps->sizeredrhs       = 0;
1918   mumps->schur_sol        = NULL;
1919   mumps->schur_sizesol    = 0;
1920   PetscFunctionReturn(0);
1921 }
1922 
MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo * info,Mat_MUMPS * mumps)1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1924 {
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   if (mumps->id.INFOG(1) < 0) {
1929     if (A->erroriffailure) {
1930       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1931     } else {
1932       if (mumps->id.INFOG(1) == -6) {
1933         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1934         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1935       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1936         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1937         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1938       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1939         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1940       } else {
1941         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1942         F->factorerrortype = MAT_FACTOR_OTHER;
1943       }
1944     }
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)1950 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1951 {
1952   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1953   PetscErrorCode ierr;
1954   Vec            b;
1955   const PetscInt M = A->rmap->N;
1956 
1957   PetscFunctionBegin;
1958   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1959 
1960   /* Set MUMPS options from the options database */
1961   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1962 
1963   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1964   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1965 
1966   /* analysis phase */
1967   /*----------------*/
1968   mumps->id.job = JOB_FACTSYMBOLIC;
1969   mumps->id.n   = M;
1970   switch (mumps->id.ICNTL(18)) {
1971   case 0:  /* centralized assembled matrix input */
1972     if (!mumps->myid) {
1973       mumps->id.nnz = mumps->nnz;
1974       mumps->id.irn = mumps->irn;
1975       mumps->id.jcn = mumps->jcn;
1976       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1977       if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1978         if (!mumps->myid) {
1979           const PetscInt *idx;
1980           PetscInt       i;
1981 
1982           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1983           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1984           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1985           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1986         }
1987       }
1988     }
1989     break;
1990   case 3:  /* distributed assembled matrix input (size>1) */
1991     mumps->id.nnz_loc = mumps->nnz;
1992     mumps->id.irn_loc = mumps->irn;
1993     mumps->id.jcn_loc = mumps->jcn;
1994     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1995     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1996       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1997       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1998       ierr = VecDestroy(&b);CHKERRQ(ierr);
1999     }
2000     break;
2001   }
2002   PetscMUMPS_c(mumps);
2003   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2004 
2005   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2006   F->ops->solve           = MatSolve_MUMPS;
2007   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2008   F->ops->matsolve        = MatMatSolve_MUMPS;
2009   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 /* Note the Petsc r and c permutations are ignored */
MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)2014 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2015 {
2016   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2017   PetscErrorCode ierr;
2018   Vec            b;
2019   const PetscInt M = A->rmap->N;
2020 
2021   PetscFunctionBegin;
2022   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2023 
2024   /* Set MUMPS options from the options database */
2025   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2026 
2027   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2028   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2029 
2030   /* analysis phase */
2031   /*----------------*/
2032   mumps->id.job = JOB_FACTSYMBOLIC;
2033   mumps->id.n   = M;
2034   switch (mumps->id.ICNTL(18)) {
2035   case 0:  /* centralized assembled matrix input */
2036     if (!mumps->myid) {
2037       mumps->id.nnz = mumps->nnz;
2038       mumps->id.irn = mumps->irn;
2039       mumps->id.jcn = mumps->jcn;
2040       if (mumps->id.ICNTL(6)>1) {
2041         mumps->id.a = (MumpsScalar*)mumps->val;
2042       }
2043     }
2044     break;
2045   case 3:  /* distributed assembled matrix input (size>1) */
2046     mumps->id.nnz_loc = mumps->nnz;
2047     mumps->id.irn_loc = mumps->irn;
2048     mumps->id.jcn_loc = mumps->jcn;
2049     if (mumps->id.ICNTL(6)>1) {
2050       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2051     }
2052     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2053       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2054       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2055       ierr = VecDestroy(&b);CHKERRQ(ierr);
2056     }
2057     break;
2058   }
2059   PetscMUMPS_c(mumps);
2060   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2061 
2062   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2063   F->ops->solve           = MatSolve_MUMPS;
2064   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /* Note the Petsc r permutation and factor info are ignored */
MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo * info)2069 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2070 {
2071   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2072   PetscErrorCode ierr;
2073   Vec            b;
2074   const PetscInt M = A->rmap->N;
2075 
2076   PetscFunctionBegin;
2077   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2078 
2079   /* Set MUMPS options from the options database */
2080   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2081 
2082   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2083   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2084 
2085   /* analysis phase */
2086   /*----------------*/
2087   mumps->id.job = JOB_FACTSYMBOLIC;
2088   mumps->id.n   = M;
2089   switch (mumps->id.ICNTL(18)) {
2090   case 0:  /* centralized assembled matrix input */
2091     if (!mumps->myid) {
2092       mumps->id.nnz = mumps->nnz;
2093       mumps->id.irn = mumps->irn;
2094       mumps->id.jcn = mumps->jcn;
2095       if (mumps->id.ICNTL(6)>1) {
2096         mumps->id.a = (MumpsScalar*)mumps->val;
2097       }
2098     }
2099     break;
2100   case 3:  /* distributed assembled matrix input (size>1) */
2101     mumps->id.nnz_loc = mumps->nnz;
2102     mumps->id.irn_loc = mumps->irn;
2103     mumps->id.jcn_loc = mumps->jcn;
2104     if (mumps->id.ICNTL(6)>1) {
2105       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2106     }
2107     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2108       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2109       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2110       ierr = VecDestroy(&b);CHKERRQ(ierr);
2111     }
2112     break;
2113   }
2114   PetscMUMPS_c(mumps);
2115   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2116 
2117   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2118   F->ops->solve                 = MatSolve_MUMPS;
2119   F->ops->solvetranspose        = MatSolve_MUMPS;
2120   F->ops->matsolve              = MatMatSolve_MUMPS;
2121   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2122 #if defined(PETSC_USE_COMPLEX)
2123   F->ops->getinertia = NULL;
2124 #else
2125   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2126 #endif
2127   PetscFunctionReturn(0);
2128 }
2129 
MatView_MUMPS(Mat A,PetscViewer viewer)2130 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2131 {
2132   PetscErrorCode    ierr;
2133   PetscBool         iascii;
2134   PetscViewerFormat format;
2135   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2136 
2137   PetscFunctionBegin;
2138   /* check if matrix is mumps type */
2139   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2140 
2141   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2142   if (iascii) {
2143     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2144     if (format == PETSC_VIEWER_ASCII_INFO) {
2145       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2146       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d \n",mumps->id.sym);CHKERRQ(ierr);
2147       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d \n",mumps->id.par);CHKERRQ(ierr);
2148       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2149       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2150       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2151       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2152       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2153       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2154       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2155       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2156       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2157       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2158       if (mumps->id.ICNTL(11)>0) {
2159         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2160         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2161         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2162         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2163         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2164         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2165       }
2166       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2167       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2168       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2169       /* ICNTL(15-17) not used */
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2171       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2173       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2174       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2175       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2176 
2177       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2178       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2179       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2180       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2181       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2182       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2183 
2184       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2185       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2186       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2187       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2188       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2189       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2190 
2191       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2192       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2193       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2194       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2195       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2196       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2197 
2198       /* infomation local to each processor */
2199       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2200       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2201       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2202       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2203       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2204       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2205       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2206       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2207       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2208       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2209 
2210       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2211       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2212       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2213 
2214       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2215       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2216       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2217 
2218       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2219       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2220       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2221 
2222       if (mumps->ninfo && mumps->ninfo <= 80){
2223         PetscInt i;
2224         for (i=0; i<mumps->ninfo; i++){
2225           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr);
2226           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2227           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2228         }
2229       }
2230       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2231 
2232       if (!mumps->myid) { /* information from the host */
2233         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2234         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2235         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2236         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2237 
2238         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr);
2239         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr);
2240         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr);
2241         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr);
2242         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr);
2243         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr);
2244         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr);
2245         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr);
2246         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr);
2247         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr);
2248         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr);
2249         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr);
2250         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr);
2252         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr);
2253         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr);
2254         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr);
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2263         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2264         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2265         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2266         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2267         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2268         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr);
2269         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr);
2270         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr);
2271         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr);
2272       }
2273     }
2274   }
2275   PetscFunctionReturn(0);
2276 }
2277 
MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo * info)2278 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2279 {
2280   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2281 
2282   PetscFunctionBegin;
2283   info->block_size        = 1.0;
2284   info->nz_allocated      = mumps->id.INFOG(20);
2285   info->nz_used           = mumps->id.INFOG(20);
2286   info->nz_unneeded       = 0.0;
2287   info->assemblies        = 0.0;
2288   info->mallocs           = 0.0;
2289   info->memory            = 0.0;
2290   info->fill_ratio_given  = 0;
2291   info->fill_ratio_needed = 0;
2292   info->factor_mallocs    = 0;
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 /* -------------------------------------------------------------------------------------------*/
MatFactorSetSchurIS_MUMPS(Mat F,IS is)2297 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2298 {
2299   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2300   const PetscScalar *arr;
2301   const PetscInt    *idxs;
2302   PetscInt          size,i;
2303   PetscErrorCode    ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2307   if (mumps->petsc_size > 1) {
2308     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2309 
2310     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2311     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr);
2312     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2313   }
2314 
2315   /* Schur complement matrix */
2316   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2317   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2318   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2319   mumps->id.schur      = (MumpsScalar*)arr;
2320   mumps->id.size_schur = size;
2321   mumps->id.schur_lld  = size;
2322   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2323   if (mumps->sym == 1) {
2324     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2325   }
2326 
2327   /* MUMPS expects Fortran style indices */
2328   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2329   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2330   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2331   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2332   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2333   if (mumps->petsc_size > 1) {
2334     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2335   } else {
2336     if (F->factortype == MAT_FACTOR_LU) {
2337       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2338     } else {
2339       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2340     }
2341   }
2342   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2343   mumps->id.ICNTL(26) = -1;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 /* -------------------------------------------------------------------------------------------*/
MatFactorCreateSchurComplement_MUMPS(Mat F,Mat * S)2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2349 {
2350   Mat            St;
2351   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2352   PetscScalar    *array;
2353 #if defined(PETSC_USE_COMPLEX)
2354   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2355 #endif
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2360   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2361   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2362   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2363   ierr = MatSetUp(St);CHKERRQ(ierr);
2364   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2365   if (!mumps->sym) { /* MUMPS always return a full matrix */
2366     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2367       PetscInt i,j,N=mumps->id.size_schur;
2368       for (i=0;i<N;i++) {
2369         for (j=0;j<N;j++) {
2370 #if !defined(PETSC_USE_COMPLEX)
2371           PetscScalar val = mumps->id.schur[i*N+j];
2372 #else
2373           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2374 #endif
2375           array[j*N+i] = val;
2376         }
2377       }
2378     } else { /* stored by columns */
2379       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2380     }
2381   } else { /* either full or lower-triangular (not packed) */
2382     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2383       PetscInt i,j,N=mumps->id.size_schur;
2384       for (i=0;i<N;i++) {
2385         for (j=i;j<N;j++) {
2386 #if !defined(PETSC_USE_COMPLEX)
2387           PetscScalar val = mumps->id.schur[i*N+j];
2388 #else
2389           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2390 #endif
2391           array[i*N+j] = val;
2392           array[j*N+i] = val;
2393         }
2394       }
2395     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2396       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2397     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2398       PetscInt i,j,N=mumps->id.size_schur;
2399       for (i=0;i<N;i++) {
2400         for (j=0;j<i+1;j++) {
2401 #if !defined(PETSC_USE_COMPLEX)
2402           PetscScalar val = mumps->id.schur[i*N+j];
2403 #else
2404           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2405 #endif
2406           array[i*N+j] = val;
2407           array[j*N+i] = val;
2408         }
2409       }
2410     }
2411   }
2412   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2413   *S   = St;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 /* -------------------------------------------------------------------------------------------*/
MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)2418 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2419 {
2420   PetscErrorCode ierr;
2421   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt * ival)2428 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2429 {
2430   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2431 
2432   PetscFunctionBegin;
2433   *ival = mumps->id.ICNTL(icntl);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 /*@
2438   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2439 
2440    Logically Collective on Mat
2441 
2442    Input Parameters:
2443 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2444 .  icntl - index of MUMPS parameter array ICNTL()
2445 -  ival - value of MUMPS ICNTL(icntl)
2446 
2447   Options Database:
2448 .   -mat_mumps_icntl_<icntl> <ival>
2449 
2450    Level: beginner
2451 
2452    References:
2453 .     MUMPS Users' Guide
2454 
2455 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2456  @*/
MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)2457 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2458 {
2459   PetscErrorCode ierr;
2460 
2461   PetscFunctionBegin;
2462   PetscValidType(F,1);
2463   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2464   PetscValidLogicalCollectiveInt(F,icntl,2);
2465   PetscValidLogicalCollectiveInt(F,ival,3);
2466   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 /*@
2471   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2472 
2473    Logically Collective on Mat
2474 
2475    Input Parameters:
2476 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2477 -  icntl - index of MUMPS parameter array ICNTL()
2478 
2479   Output Parameter:
2480 .  ival - value of MUMPS ICNTL(icntl)
2481 
2482    Level: beginner
2483 
2484    References:
2485 .     MUMPS Users' Guide
2486 
2487 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2488 @*/
MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt * ival)2489 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2490 {
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   PetscValidType(F,1);
2495   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2496   PetscValidLogicalCollectiveInt(F,icntl,2);
2497   PetscValidIntPointer(ival,3);
2498   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 /* -------------------------------------------------------------------------------------------*/
MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)2503 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2504 {
2505   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2506 
2507   PetscFunctionBegin;
2508   mumps->id.CNTL(icntl) = val;
2509   PetscFunctionReturn(0);
2510 }
2511 
MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal * val)2512 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2513 {
2514   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2515 
2516   PetscFunctionBegin;
2517   *val = mumps->id.CNTL(icntl);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 /*@
2522   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2523 
2524    Logically Collective on Mat
2525 
2526    Input Parameters:
2527 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2528 .  icntl - index of MUMPS parameter array CNTL()
2529 -  val - value of MUMPS CNTL(icntl)
2530 
2531   Options Database:
2532 .   -mat_mumps_cntl_<icntl> <val>
2533 
2534    Level: beginner
2535 
2536    References:
2537 .     MUMPS Users' Guide
2538 
2539 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2540 @*/
MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)2541 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2542 {
2543   PetscErrorCode ierr;
2544 
2545   PetscFunctionBegin;
2546   PetscValidType(F,1);
2547   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2548   PetscValidLogicalCollectiveInt(F,icntl,2);
2549   PetscValidLogicalCollectiveReal(F,val,3);
2550   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 /*@
2555   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2556 
2557    Logically Collective on Mat
2558 
2559    Input Parameters:
2560 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2561 -  icntl - index of MUMPS parameter array CNTL()
2562 
2563   Output Parameter:
2564 .  val - value of MUMPS CNTL(icntl)
2565 
2566    Level: beginner
2567 
2568    References:
2569 .      MUMPS Users' Guide
2570 
2571 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2572 @*/
MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal * val)2573 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2574 {
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   PetscValidType(F,1);
2579   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2580   PetscValidLogicalCollectiveInt(F,icntl,2);
2581   PetscValidRealPointer(val,3);
2582   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt * info)2586 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2587 {
2588   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2589 
2590   PetscFunctionBegin;
2591   *info = mumps->id.INFO(icntl);
2592   PetscFunctionReturn(0);
2593 }
2594 
MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt * infog)2595 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2596 {
2597   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2598 
2599   PetscFunctionBegin;
2600   *infog = mumps->id.INFOG(icntl);
2601   PetscFunctionReturn(0);
2602 }
2603 
MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal * rinfo)2604 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2605 {
2606   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2607 
2608   PetscFunctionBegin;
2609   *rinfo = mumps->id.RINFO(icntl);
2610   PetscFunctionReturn(0);
2611 }
2612 
MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal * rinfog)2613 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2614 {
2615   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2616 
2617   PetscFunctionBegin;
2618   *rinfog = mumps->id.RINFOG(icntl);
2619   PetscFunctionReturn(0);
2620 }
2621 
MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)2622 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2623 {
2624   PetscErrorCode ierr;
2625   Mat            Bt = NULL,Btseq = NULL;
2626   PetscBool      flg;
2627   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2628   PetscScalar    *aa;
2629   PetscInt       spnr,*ia,*ja,M,nrhs;
2630 
2631   PetscFunctionBegin;
2632   PetscValidIntPointer(spRHS,2);
2633   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2634   if (flg) {
2635     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2636   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2637 
2638   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2639 
2640   if (mumps->petsc_size > 1) {
2641     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2642     Btseq = b->A;
2643   } else {
2644     Btseq = Bt;
2645   }
2646 
2647   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2648   mumps->id.nrhs = nrhs;
2649   mumps->id.lrhs = M;
2650   mumps->id.rhs  = NULL;
2651 
2652   if (!mumps->myid) {
2653     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2654     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2655     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2656     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2657     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2658   } else {
2659     mumps->id.irhs_ptr    = NULL;
2660     mumps->id.irhs_sparse = NULL;
2661     mumps->id.nz_rhs      = 0;
2662     mumps->id.rhs_sparse  = NULL;
2663   }
2664   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2665   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2666 
2667   /* solve phase */
2668   /*-------------*/
2669   mumps->id.job = JOB_SOLVE;
2670   PetscMUMPS_c(mumps);
2671   if (mumps->id.INFOG(1) < 0)
2672     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2673 
2674   if (!mumps->myid) {
2675     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2676     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2677     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2678   }
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 /*@
2683   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2684 
2685    Logically Collective on Mat
2686 
2687    Input Parameters:
2688 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2689 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2690 
2691   Output Parameter:
2692 . spRHS - requested entries of inverse of A
2693 
2694    Level: beginner
2695 
2696    References:
2697 .      MUMPS Users' Guide
2698 
2699 .seealso: MatGetFactor(), MatCreateTranspose()
2700 @*/
MatMumpsGetInverse(Mat F,Mat spRHS)2701 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2702 {
2703   PetscErrorCode ierr;
2704 
2705   PetscFunctionBegin;
2706   PetscValidType(F,1);
2707   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2708   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2709   PetscFunctionReturn(0);
2710 }
2711 
MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)2712 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2713 {
2714   PetscErrorCode ierr;
2715   Mat            spRHS;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2719   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2720   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 /*@
2725   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2726 
2727    Logically Collective on Mat
2728 
2729    Input Parameters:
2730 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2731 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2732 
2733   Output Parameter:
2734 . spRHST - requested entries of inverse of A^T
2735 
2736    Level: beginner
2737 
2738    References:
2739 .      MUMPS Users' Guide
2740 
2741 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2742 @*/
MatMumpsGetInverseTranspose(Mat F,Mat spRHST)2743 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2744 {
2745   PetscErrorCode ierr;
2746   PetscBool      flg;
2747 
2748   PetscFunctionBegin;
2749   PetscValidType(F,1);
2750   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2751   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2752   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2753 
2754   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 /*@
2759   MatMumpsGetInfo - Get MUMPS parameter INFO()
2760 
2761    Logically Collective on Mat
2762 
2763    Input Parameters:
2764 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2765 -  icntl - index of MUMPS parameter array INFO()
2766 
2767   Output Parameter:
2768 .  ival - value of MUMPS INFO(icntl)
2769 
2770    Level: beginner
2771 
2772    References:
2773 .      MUMPS Users' Guide
2774 
2775 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2776 @*/
MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt * ival)2777 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2778 {
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   PetscValidType(F,1);
2783   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2784   PetscValidIntPointer(ival,3);
2785   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 /*@
2790   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2791 
2792    Logically Collective on Mat
2793 
2794    Input Parameters:
2795 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2796 -  icntl - index of MUMPS parameter array INFOG()
2797 
2798   Output Parameter:
2799 .  ival - value of MUMPS INFOG(icntl)
2800 
2801    Level: beginner
2802 
2803    References:
2804 .      MUMPS Users' Guide
2805 
2806 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2807 @*/
MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt * ival)2808 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2809 {
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   PetscValidType(F,1);
2814   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2815   PetscValidIntPointer(ival,3);
2816   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 /*@
2821   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2822 
2823    Logically Collective on Mat
2824 
2825    Input Parameters:
2826 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2827 -  icntl - index of MUMPS parameter array RINFO()
2828 
2829   Output Parameter:
2830 .  val - value of MUMPS RINFO(icntl)
2831 
2832    Level: beginner
2833 
2834    References:
2835 .       MUMPS Users' Guide
2836 
2837 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2838 @*/
MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal * val)2839 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2840 {
2841   PetscErrorCode ierr;
2842 
2843   PetscFunctionBegin;
2844   PetscValidType(F,1);
2845   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2846   PetscValidRealPointer(val,3);
2847   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 /*@
2852   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2853 
2854    Logically Collective on Mat
2855 
2856    Input Parameters:
2857 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2858 -  icntl - index of MUMPS parameter array RINFOG()
2859 
2860   Output Parameter:
2861 .  val - value of MUMPS RINFOG(icntl)
2862 
2863    Level: beginner
2864 
2865    References:
2866 .      MUMPS Users' Guide
2867 
2868 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2869 @*/
MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal * val)2870 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2871 {
2872   PetscErrorCode ierr;
2873 
2874   PetscFunctionBegin;
2875   PetscValidType(F,1);
2876   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2877   PetscValidRealPointer(val,3);
2878   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 /*MC
2883   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2884   distributed and sequential matrices via the external package MUMPS.
2885 
2886   Works with MATAIJ and MATSBAIJ matrices
2887 
2888   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2889 
2890   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2891 
2892   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2893 
2894   Options Database Keys:
2895 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2896 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2897 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2898 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2899 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2900 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis
2901 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2902 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2903 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2904 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2905 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2906 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2907 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2908 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2909 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2910 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2911 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2912 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2913 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2914 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2915 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2916 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2917 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2918 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2919 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2920 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2921 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2922 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2923 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2924 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2925 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2926 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2927 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2928 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2929                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2930 
2931    If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1
2932 
2933   Level: beginner
2934 
2935     Notes:
2936     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2937 
2938     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2939 $          KSPGetPC(ksp,&pc);
2940 $          PCFactorGetMatrix(pc,&mat);
2941 $          MatMumpsGetInfo(mat,....);
2942 $          MatMumpsGetInfog(mat,....); etc.
2943            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2944 
2945    Two modes to run MUMPS/PETSc with OpenMP
2946 
2947 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2948 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2949 
2950 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2951 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2952 
2953    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2954    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2955    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2956    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2957    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2958 
2959    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2960    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2961    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2962    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2963    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2964    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2965    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2966    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2967    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2968    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2969    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2970    examine the mapping result.
2971 
2972    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2973    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2974    calls omp_set_num_threads(m) internally before calling MUMPS.
2975 
2976    References:
2977 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2978 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2979 
2980 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2981 
2982 M*/
2983 
MatFactorGetSolverType_mumps(Mat A,MatSolverType * type)2984 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2985 {
2986   PetscFunctionBegin;
2987   *type = MATSOLVERMUMPS;
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 /* MatGetFactor for Seq and MPI AIJ matrices */
MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat * F)2992 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2993 {
2994   Mat            B;
2995   PetscErrorCode ierr;
2996   Mat_MUMPS      *mumps;
2997   PetscBool      isSeqAIJ;
2998   PetscMPIInt    size;
2999 
3000   PetscFunctionBegin;
3001  #if defined(PETSC_USE_COMPLEX)
3002   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3003  #endif
3004   /* Create the factorization matrix */
3005   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3006   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3007   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3008   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3009   ierr = MatSetUp(B);CHKERRQ(ierr);
3010 
3011   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3012 
3013   B->ops->view    = MatView_MUMPS;
3014   B->ops->getinfo = MatGetInfo_MUMPS;
3015 
3016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3029 
3030   if (ftype == MAT_FACTOR_LU) {
3031     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3032     B->factortype            = MAT_FACTOR_LU;
3033     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3034     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3035     mumps->sym = 0;
3036   } else {
3037     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3038     B->factortype                  = MAT_FACTOR_CHOLESKY;
3039     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3040     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3041 #if defined(PETSC_USE_COMPLEX)
3042     mumps->sym = 2;
3043 #else
3044     if (A->spd_set && A->spd) mumps->sym = 1;
3045     else                      mumps->sym = 2;
3046 #endif
3047   }
3048 
3049   /* set solvertype */
3050   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3051   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3052   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3053   if (size == 1) {
3054     /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3055     B->useordering = PETSC_TRUE;
3056   }
3057 
3058   B->ops->destroy = MatDestroy_MUMPS;
3059   B->data         = (void*)mumps;
3060 
3061   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3062 
3063   *F = B;
3064   PetscFunctionReturn(0);
3065 }
3066 
3067 /* MatGetFactor for Seq and MPI SBAIJ matrices */
MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat * F)3068 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3069 {
3070   Mat            B;
3071   PetscErrorCode ierr;
3072   Mat_MUMPS      *mumps;
3073   PetscBool      isSeqSBAIJ;
3074   PetscMPIInt    size;
3075 
3076   PetscFunctionBegin;
3077  #if defined(PETSC_USE_COMPLEX)
3078   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3079  #endif
3080   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3081   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3082   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3083   ierr = MatSetUp(B);CHKERRQ(ierr);
3084 
3085   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3086   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3087   if (isSeqSBAIJ) {
3088     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3089   } else {
3090     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3091   }
3092 
3093   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3094   B->ops->view                   = MatView_MUMPS;
3095   B->ops->getinfo                = MatGetInfo_MUMPS;
3096 
3097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3107   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3110 
3111   B->factortype = MAT_FACTOR_CHOLESKY;
3112 #if defined(PETSC_USE_COMPLEX)
3113   mumps->sym = 2;
3114 #else
3115   if (A->spd_set && A->spd) mumps->sym = 1;
3116   else                      mumps->sym = 2;
3117 #endif
3118 
3119   /* set solvertype */
3120   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3121   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3122   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3123   if (size == 1) {
3124     /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3125     B->useordering = PETSC_TRUE;
3126   }
3127 
3128   B->ops->destroy = MatDestroy_MUMPS;
3129   B->data         = (void*)mumps;
3130 
3131   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3132 
3133   *F = B;
3134   PetscFunctionReturn(0);
3135 }
3136 
MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat * F)3137 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3138 {
3139   Mat            B;
3140   PetscErrorCode ierr;
3141   Mat_MUMPS      *mumps;
3142   PetscBool      isSeqBAIJ;
3143   PetscMPIInt    size;
3144 
3145   PetscFunctionBegin;
3146   /* Create the factorization matrix */
3147   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3148   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3149   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3150   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3151   ierr = MatSetUp(B);CHKERRQ(ierr);
3152 
3153   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3154   if (ftype == MAT_FACTOR_LU) {
3155     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3156     B->factortype            = MAT_FACTOR_LU;
3157     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3158     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3159     mumps->sym = 0;
3160   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3161 
3162   B->ops->view        = MatView_MUMPS;
3163   B->ops->getinfo     = MatGetInfo_MUMPS;
3164 
3165   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3166   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3167   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3168   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3173   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3175   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3176   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3178 
3179   /* set solvertype */
3180   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3181   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3183   if (size == 1) {
3184     /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3185     B->useordering = PETSC_TRUE;
3186   }
3187 
3188   B->ops->destroy = MatDestroy_MUMPS;
3189   B->data         = (void*)mumps;
3190 
3191   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3192 
3193   *F = B;
3194   PetscFunctionReturn(0);
3195 }
3196 
3197 /* MatGetFactor for Seq and MPI SELL matrices */
MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat * F)3198 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3199 {
3200   Mat            B;
3201   PetscErrorCode ierr;
3202   Mat_MUMPS      *mumps;
3203   PetscBool      isSeqSELL;
3204   PetscMPIInt    size;
3205 
3206   PetscFunctionBegin;
3207   /* Create the factorization matrix */
3208   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3209   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3210   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3211   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3212   ierr = MatSetUp(B);CHKERRQ(ierr);
3213 
3214   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3215 
3216   B->ops->view        = MatView_MUMPS;
3217   B->ops->getinfo     = MatGetInfo_MUMPS;
3218 
3219   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3220   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3221   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3222   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3223   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3224   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3225   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3226   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3227   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3228   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3229   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3230 
3231   if (ftype == MAT_FACTOR_LU) {
3232     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3233     B->factortype            = MAT_FACTOR_LU;
3234     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3235     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3236     mumps->sym = 0;
3237   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3238 
3239   /* set solvertype */
3240   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3241   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3242   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3243   if (size == 1) {
3244     /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3245     B->useordering = PETSC_TRUE;
3246   }
3247 
3248   B->ops->destroy = MatDestroy_MUMPS;
3249   B->data         = (void*)mumps;
3250 
3251   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3252 
3253   *F = B;
3254   PetscFunctionReturn(0);
3255 }
3256 
MatSolverTypeRegister_MUMPS(void)3257 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3258 {
3259   PetscErrorCode ierr;
3260 
3261   PetscFunctionBegin;
3262   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3263   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3264   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3265   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3266   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3267   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3268   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3269   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3270   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3271   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3272   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3273   PetscFunctionReturn(0);
3274 }
3275 
3276