1
2 /*
3 Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48 naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55 #endif
56 #else
57 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58 #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59 #endif
60 #endif
61
62 #define MPIU_MUMPSINT MPI_INT
63 #define PETSC_MUMPS_INT_MAX 2147483647
64 #define PETSC_MUMPS_INT_MIN -2147483648
65
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt * b)67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69 PetscFunctionBegin;
70 if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71 *b = (PetscMUMPSInt)(a);
72 PetscFunctionReturn(0);
73 }
74
75 /* Put these utility routines here since they are only used in this file */
PetscOptionsMUMPSInt_Private(PetscOptionItems * PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt * value,PetscBool * set,PetscMUMPSInt lb,PetscMUMPSInt ub)76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78 PetscErrorCode ierr;
79 PetscInt myval;
80 PetscBool myset;
81 PetscFunctionBegin;
82 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83 ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84 if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85 if (set) *set = myset;
86 PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93 do { \
94 if (mumps->use_petsc_omp_support) { \
95 if (mumps->is_omp_master) { \
96 ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97 MUMPS_c(&mumps->id); \
98 ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99 } \
100 ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
102 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
103 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
104 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105 */ \
106 ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \
107 ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm);CHKERRQ(ierr); \
108 ierr = MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr); \
109 } else { \
110 MUMPS_c(&mumps->id); \
111 } \
112 } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115 do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141 CMUMPS_STRUC_C id;
142 #else
143 ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147 SMUMPS_STRUC_C id;
148 #else
149 DMUMPS_STRUC_C id;
150 #endif
151 #endif
152
153 MatStructure matstruc;
154 PetscMPIInt myid,petsc_size;
155 PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */
156 PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157 PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
158 PetscMUMPSInt sym;
159 MPI_Comm mumps_comm;
160 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
161 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
162 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
163 PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc;
164 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165 PetscInt *rhs_nrow,max_nrhs;
166 PetscMPIInt *rhs_recvcounts,*rhs_disps;
167 PetscScalar *rhs_loc,*rhs_recvbuf;
168 #endif
169 Vec b_seq,x_seq;
170 PetscInt ninfo,*info; /* which INFO to display */
171 PetscInt sizeredrhs;
172 PetscScalar *schur_sol;
173 PetscInt schur_sizesol;
174 PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
175 PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
176 PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177
178 /* stuff used by petsc/mumps OpenMP support*/
179 PetscBool use_petsc_omp_support;
180 PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181 MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */
182 PetscInt64 *recvcount; /* a collection of nnz on omp_master */
183 PetscMPIInt tag,omp_comm_size;
184 PetscBool is_omp_master; /* is this rank the master of omp_comm */
185 MPI_Request *reqs;
186 };
187
188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190 */
PetscMUMPSIntCSRCast(Mat_MUMPS * mumps,PetscInt nrow,PetscInt * ia,PetscInt * ja,PetscMUMPSInt ** ia_mumps,PetscMUMPSInt ** ja_mumps,PetscMUMPSInt * nnz_mumps)191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192 {
193 PetscErrorCode ierr;
194 PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195
196 PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198 {
199 PetscInt i;
200 if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
202 ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
203 mumps->cur_ilen = nrow+1;
204 }
205 if (nnz > mumps->cur_jlen) {
206 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
207 ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
208 mumps->cur_jlen = nnz;
209 }
210 for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
211 for (i=0; i<nnz; i++) {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
212 *ia_mumps = mumps->ia_alloc;
213 *ja_mumps = mumps->ja_alloc;
214 }
215 #else
216 *ia_mumps = ia;
217 *ja_mumps = ja;
218 #endif
219 ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
220 PetscFunctionReturn(0);
221 }
222
MatMumpsResetSchur_Private(Mat_MUMPS * mumps)223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225 PetscErrorCode ierr;
226
227 PetscFunctionBegin;
228 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
229 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
230 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
231 mumps->id.size_schur = 0;
232 mumps->id.schur_lld = 0;
233 mumps->id.ICNTL(19) = 0;
234 PetscFunctionReturn(0);
235 }
236
237 /* solve with rhs in mumps->id.redrhs and return in the same location */
MatMumpsSolveSchur_Private(Mat F)238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239 {
240 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
241 Mat S,B,X;
242 MatFactorSchurStatus schurstatus;
243 PetscInt sizesol;
244 PetscErrorCode ierr;
245
246 PetscFunctionBegin;
247 ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
248 ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
249 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
250 ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252 ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
253 #endif
254 switch (schurstatus) {
255 case MAT_FACTOR_SCHUR_FACTORED:
256 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
257 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
260 #endif
261 if (!mumps->id.ICNTL(9)) { /* transpose solve */
262 ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
263 } else {
264 ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
265 }
266 break;
267 case MAT_FACTOR_SCHUR_INVERTED:
268 sizesol = mumps->id.nrhs*mumps->id.size_schur;
269 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270 ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
271 ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
272 mumps->schur_sizesol = sizesol;
273 }
274 ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
275 ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277 ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
278 #endif
279 ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
280 if (!mumps->id.ICNTL(9)) { /* transpose solve */
281 ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
282 } else {
283 ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
284 }
285 ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
286 ierr = MatProductSymbolic(X);CHKERRQ(ierr);
287 ierr = MatProductNumeric(X);CHKERRQ(ierr);
288
289 ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
290 break;
291 default:
292 SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
293 break;
294 }
295 ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
296 ierr = MatDestroy(&B);CHKERRQ(ierr);
297 ierr = MatDestroy(&X);CHKERRQ(ierr);
298 PetscFunctionReturn(0);
299 }
300
MatMumpsHandleSchur_Private(Mat F,PetscBool expansion)301 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
302 {
303 Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
304 PetscErrorCode ierr;
305
306 PetscFunctionBegin;
307 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
308 PetscFunctionReturn(0);
309 }
310 if (!expansion) { /* prepare for the condensation step */
311 PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
312 /* allocate MUMPS internal array to store reduced right-hand sides */
313 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
314 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
315 mumps->id.lredrhs = mumps->id.size_schur;
316 ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
317 mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
318 }
319 mumps->id.ICNTL(26) = 1; /* condensation phase */
320 } else { /* prepare for the expansion step */
321 /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
322 ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
323 mumps->id.ICNTL(26) = 2; /* expansion phase */
324 PetscMUMPS_c(mumps);
325 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
326 /* restore defaults */
327 mumps->id.ICNTL(26) = -1;
328 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
329 if (mumps->id.nrhs > 1) {
330 ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
331 mumps->id.lredrhs = 0;
332 mumps->sizeredrhs = 0;
333 }
334 }
335 PetscFunctionReturn(0);
336 }
337
338 /*
339 MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
340
341 input:
342 A - matrix in aij,baij or sbaij format
343 shift - 0: C style output triple; 1: Fortran style output triple.
344 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
345 MAT_REUSE_MATRIX: only the values in v array are updated
346 output:
347 nnz - dim of r, c, and v (number of local nonzero entries of A)
348 r, c, v - row and col index, matrix values (matrix triples)
349
350 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
351 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
352 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
353
354 */
355
MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)356 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
357 {
358 const PetscScalar *av;
359 const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
360 PetscInt64 nz,rnz,i,j,k;
361 PetscErrorCode ierr;
362 PetscMUMPSInt *row,*col;
363 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
364
365 PetscFunctionBegin;
366 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
367 mumps->val = (PetscScalar*)av;
368 if (reuse == MAT_INITIAL_MATRIX) {
369 nz = aa->nz;
370 ai = aa->i;
371 aj = aa->j;
372 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
373 for (i=k=0; i<M; i++) {
374 rnz = ai[i+1] - ai[i];
375 ajj = aj + ai[i];
376 for (j=0; j<rnz; j++) {
377 ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
378 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
379 k++;
380 }
381 }
382 mumps->irn = row;
383 mumps->jcn = col;
384 mumps->nnz = nz;
385 }
386 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
387 PetscFunctionReturn(0);
388 }
389
MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)390 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
391 {
392 PetscErrorCode ierr;
393 PetscInt64 nz,i,j,k,r;
394 Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
395 PetscMUMPSInt *row,*col;
396
397 PetscFunctionBegin;
398 mumps->val = a->val;
399 if (reuse == MAT_INITIAL_MATRIX) {
400 nz = a->sliidx[a->totalslices];
401 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
402 for (i=k=0; i<a->totalslices; i++) {
403 for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
404 ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
405 }
406 }
407 for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
408 mumps->irn = row;
409 mumps->jcn = col;
410 mumps->nnz = nz;
411 }
412 PetscFunctionReturn(0);
413 }
414
MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)415 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
416 {
417 Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
418 const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
419 PetscInt64 M,nz,idx=0,rnz,i,j,k,m;
420 PetscInt bs;
421 PetscErrorCode ierr;
422 PetscMUMPSInt *row,*col;
423
424 PetscFunctionBegin;
425 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
426 M = A->rmap->N/bs;
427 mumps->val = aa->a;
428 if (reuse == MAT_INITIAL_MATRIX) {
429 ai = aa->i; aj = aa->j;
430 nz = bs2*aa->nz;
431 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
432 for (i=0; i<M; i++) {
433 ajj = aj + ai[i];
434 rnz = ai[i+1] - ai[i];
435 for (k=0; k<rnz; k++) {
436 for (j=0; j<bs; j++) {
437 for (m=0; m<bs; m++) {
438 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
439 ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
440 idx++;
441 }
442 }
443 }
444 }
445 mumps->irn = row;
446 mumps->jcn = col;
447 mumps->nnz = nz;
448 }
449 PetscFunctionReturn(0);
450 }
451
MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)452 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
453 {
454 const PetscInt *ai, *aj,*ajj;
455 PetscInt bs;
456 PetscInt64 nz,rnz,i,j,k,m;
457 PetscErrorCode ierr;
458 PetscMUMPSInt *row,*col;
459 PetscScalar *val;
460 Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
461 const PetscInt bs2=aa->bs2,mbs=aa->mbs;
462 #if defined(PETSC_USE_COMPLEX)
463 PetscBool hermitian;
464 #endif
465
466 PetscFunctionBegin;
467 #if defined(PETSC_USE_COMPLEX)
468 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
469 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
470 #endif
471 ai = aa->i;
472 aj = aa->j;
473 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
474 if (reuse == MAT_INITIAL_MATRIX) {
475 nz = aa->nz;
476 ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
477 if (bs>1) {
478 ierr = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
479 mumps->val = mumps->val_alloc;
480 } else {
481 mumps->val = aa->a;
482 }
483 mumps->irn = row;
484 mumps->jcn = col;
485 } else {
486 if (bs == 1) mumps->val = aa->a;
487 row = mumps->irn;
488 col = mumps->jcn;
489 }
490 val = mumps->val;
491
492 nz = 0;
493 if (bs>1) {
494 for (i=0; i<mbs; i++) {
495 rnz = ai[i+1] - ai[i];
496 ajj = aj + ai[i];
497 for (j=0; j<rnz; j++) {
498 for (k=0; k<bs; k++) {
499 for (m=0; m<bs; m++) {
500 if (ajj[j]>i || k>=m) {
501 if (reuse == MAT_INITIAL_MATRIX) {
502 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
503 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
504 }
505 val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
506 }
507 }
508 }
509 }
510 }
511 } else if (reuse == MAT_INITIAL_MATRIX) {
512 for (i=0; i<mbs; i++) {
513 rnz = ai[i+1] - ai[i];
514 ajj = aj + ai[i];
515 for (j=0; j<rnz; j++) {
516 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
517 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
518 nz++;
519 }
520 }
521 if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
522 }
523 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
524 PetscFunctionReturn(0);
525 }
526
MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)527 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
528 {
529 const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
530 PetscInt64 nz,rnz,i,j;
531 const PetscScalar *av,*v1;
532 PetscScalar *val;
533 PetscErrorCode ierr;
534 PetscMUMPSInt *row,*col;
535 Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
536 PetscBool missing;
537 #if defined(PETSC_USE_COMPLEX)
538 PetscBool hermitian;
539 #endif
540
541 PetscFunctionBegin;
542 #if defined(PETSC_USE_COMPLEX)
543 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
544 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
545 #endif
546 ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
547 ai = aa->i; aj = aa->j;
548 adiag = aa->diag;
549 ierr = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
550 if (reuse == MAT_INITIAL_MATRIX) {
551 /* count nz in the upper triangular part of A */
552 nz = 0;
553 if (missing) {
554 for (i=0; i<M; i++) {
555 if (PetscUnlikely(adiag[i] >= ai[i+1])) {
556 for (j=ai[i];j<ai[i+1];j++) {
557 if (aj[j] < i) continue;
558 nz++;
559 }
560 } else {
561 nz += ai[i+1] - adiag[i];
562 }
563 }
564 } else {
565 for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
566 }
567 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
568 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
569 mumps->nnz = nz;
570 mumps->irn = row;
571 mumps->jcn = col;
572 mumps->val = mumps->val_alloc = val;
573
574 nz = 0;
575 if (missing) {
576 for (i=0; i<M; i++) {
577 if (PetscUnlikely(adiag[i] >= ai[i+1])) {
578 for (j=ai[i];j<ai[i+1];j++) {
579 if (aj[j] < i) continue;
580 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
581 ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
582 val[nz] = av[j];
583 nz++;
584 }
585 } else {
586 rnz = ai[i+1] - adiag[i];
587 ajj = aj + adiag[i];
588 v1 = av + adiag[i];
589 for (j=0; j<rnz; j++) {
590 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
591 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
592 val[nz++] = v1[j];
593 }
594 }
595 }
596 } else {
597 for (i=0; i<M; i++) {
598 rnz = ai[i+1] - adiag[i];
599 ajj = aj + adiag[i];
600 v1 = av + adiag[i];
601 for (j=0; j<rnz; j++) {
602 ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
603 ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
604 val[nz++] = v1[j];
605 }
606 }
607 }
608 } else {
609 nz = 0;
610 val = mumps->val;
611 if (missing) {
612 for (i=0; i <M; i++) {
613 if (PetscUnlikely(adiag[i] >= ai[i+1])) {
614 for (j=ai[i];j<ai[i+1];j++) {
615 if (aj[j] < i) continue;
616 val[nz++] = av[j];
617 }
618 } else {
619 rnz = ai[i+1] - adiag[i];
620 v1 = av + adiag[i];
621 for (j=0; j<rnz; j++) {
622 val[nz++] = v1[j];
623 }
624 }
625 }
626 } else {
627 for (i=0; i <M; i++) {
628 rnz = ai[i+1] - adiag[i];
629 v1 = av + adiag[i];
630 for (j=0; j<rnz; j++) {
631 val[nz++] = v1[j];
632 }
633 }
634 }
635 }
636 ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
637 PetscFunctionReturn(0);
638 }
639
MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)640 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
641 {
642 PetscErrorCode ierr;
643 const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
644 PetscInt bs;
645 PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB;
646 PetscMUMPSInt *row,*col;
647 const PetscScalar *av,*bv,*v1,*v2;
648 PetscScalar *val;
649 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
650 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
651 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
652 const PetscInt bs2=aa->bs2,mbs=aa->mbs;
653 #if defined(PETSC_USE_COMPLEX)
654 PetscBool hermitian;
655 #endif
656
657 PetscFunctionBegin;
658 #if defined(PETSC_USE_COMPLEX)
659 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
660 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
661 #endif
662 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
663 rstart = A->rmap->rstart;
664 ai = aa->i;
665 aj = aa->j;
666 bi = bb->i;
667 bj = bb->j;
668 av = aa->a;
669 bv = bb->a;
670
671 garray = mat->garray;
672
673 if (reuse == MAT_INITIAL_MATRIX) {
674 nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
675 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
676 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
677 /* can not decide the exact mumps->nnz now because of the SBAIJ */
678 mumps->irn = row;
679 mumps->jcn = col;
680 mumps->val = mumps->val_alloc = val;
681 } else {
682 val = mumps->val;
683 }
684
685 jj = 0; irow = rstart;
686 for (i=0; i<mbs; i++) {
687 ajj = aj + ai[i]; /* ptr to the beginning of this row */
688 countA = ai[i+1] - ai[i];
689 countB = bi[i+1] - bi[i];
690 bjj = bj + bi[i];
691 v1 = av + ai[i]*bs2;
692 v2 = bv + bi[i]*bs2;
693
694 if (bs>1) {
695 /* A-part */
696 for (j=0; j<countA; j++) {
697 for (k=0; k<bs; k++) {
698 for (m=0; m<bs; m++) {
699 if (rstart + ajj[j]*bs>irow || k>=m) {
700 if (reuse == MAT_INITIAL_MATRIX) {
701 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
702 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
703 }
704 val[jj++] = v1[j*bs2 + m + k*bs];
705 }
706 }
707 }
708 }
709
710 /* B-part */
711 for (j=0; j < countB; j++) {
712 for (k=0; k<bs; k++) {
713 for (m=0; m<bs; m++) {
714 if (reuse == MAT_INITIAL_MATRIX) {
715 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
716 ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
717 }
718 val[jj++] = v2[j*bs2 + m + k*bs];
719 }
720 }
721 }
722 } else {
723 /* A-part */
724 for (j=0; j<countA; j++) {
725 if (reuse == MAT_INITIAL_MATRIX) {
726 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
727 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
728 }
729 val[jj++] = v1[j];
730 }
731
732 /* B-part */
733 for (j=0; j < countB; j++) {
734 if (reuse == MAT_INITIAL_MATRIX) {
735 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
736 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
737 }
738 val[jj++] = v2[j];
739 }
740 }
741 irow+=bs;
742 }
743 mumps->nnz = jj;
744 PetscFunctionReturn(0);
745 }
746
MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)747 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
748 {
749 const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
750 PetscErrorCode ierr;
751 PetscInt64 rstart,nz,i,j,jj,irow,countA,countB;
752 PetscMUMPSInt *row,*col;
753 const PetscScalar *av, *bv,*v1,*v2;
754 PetscScalar *val;
755 Mat Ad,Ao;
756 Mat_SeqAIJ *aa;
757 Mat_SeqAIJ *bb;
758
759 PetscFunctionBegin;
760 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
761 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
762 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
763
764 aa = (Mat_SeqAIJ*)(Ad)->data;
765 bb = (Mat_SeqAIJ*)(Ao)->data;
766 ai = aa->i;
767 aj = aa->j;
768 bi = bb->i;
769 bj = bb->j;
770
771 rstart = A->rmap->rstart;
772
773 if (reuse == MAT_INITIAL_MATRIX) {
774 nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
775 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
776 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
777 mumps->nnz = nz;
778 mumps->irn = row;
779 mumps->jcn = col;
780 mumps->val = mumps->val_alloc = val;
781 } else {
782 val = mumps->val;
783 }
784
785 jj = 0; irow = rstart;
786 for (i=0; i<m; i++) {
787 ajj = aj + ai[i]; /* ptr to the beginning of this row */
788 countA = ai[i+1] - ai[i];
789 countB = bi[i+1] - bi[i];
790 bjj = bj + bi[i];
791 v1 = av + ai[i];
792 v2 = bv + bi[i];
793
794 /* A-part */
795 for (j=0; j<countA; j++) {
796 if (reuse == MAT_INITIAL_MATRIX) {
797 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
798 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
799 }
800 val[jj++] = v1[j];
801 }
802
803 /* B-part */
804 for (j=0; j < countB; j++) {
805 if (reuse == MAT_INITIAL_MATRIX) {
806 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
807 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
808 }
809 val[jj++] = v2[j];
810 }
811 irow++;
812 }
813 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
814 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
815 PetscFunctionReturn(0);
816 }
817
MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)818 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
819 {
820 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
821 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
822 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
823 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
824 const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
825 const PetscInt bs2=mat->bs2;
826 PetscErrorCode ierr;
827 PetscInt bs;
828 PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx;
829 PetscMUMPSInt *row,*col;
830 const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
831 PetscScalar *val;
832
833 PetscFunctionBegin;
834 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
835 if (reuse == MAT_INITIAL_MATRIX) {
836 nz = bs2*(aa->nz + bb->nz);
837 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
838 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
839 mumps->nnz = nz;
840 mumps->irn = row;
841 mumps->jcn = col;
842 mumps->val = mumps->val_alloc = val;
843 } else {
844 val = mumps->val;
845 }
846
847 jj = 0; irow = rstart;
848 for (i=0; i<mbs; i++) {
849 countA = ai[i+1] - ai[i];
850 countB = bi[i+1] - bi[i];
851 ajj = aj + ai[i];
852 bjj = bj + bi[i];
853 v1 = av + bs2*ai[i];
854 v2 = bv + bs2*bi[i];
855
856 idx = 0;
857 /* A-part */
858 for (k=0; k<countA; k++) {
859 for (j=0; j<bs; j++) {
860 for (n=0; n<bs; n++) {
861 if (reuse == MAT_INITIAL_MATRIX) {
862 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
863 ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
864 }
865 val[jj++] = v1[idx++];
866 }
867 }
868 }
869
870 idx = 0;
871 /* B-part */
872 for (k=0; k<countB; k++) {
873 for (j=0; j<bs; j++) {
874 for (n=0; n<bs; n++) {
875 if (reuse == MAT_INITIAL_MATRIX) {
876 ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
877 ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
878 }
879 val[jj++] = v2[idx++];
880 }
881 }
882 }
883 irow += bs;
884 }
885 PetscFunctionReturn(0);
886 }
887
MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS * mumps)888 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
889 {
890 const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
891 PetscErrorCode ierr;
892 PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
893 PetscMUMPSInt *row,*col;
894 const PetscScalar *av, *bv,*v1,*v2;
895 PetscScalar *val;
896 Mat Ad,Ao;
897 Mat_SeqAIJ *aa;
898 Mat_SeqAIJ *bb;
899 #if defined(PETSC_USE_COMPLEX)
900 PetscBool hermitian;
901 #endif
902
903 PetscFunctionBegin;
904 #if defined(PETSC_USE_COMPLEX)
905 ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
906 if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
907 #endif
908 ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
909 ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
910 ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
911
912 aa = (Mat_SeqAIJ*)(Ad)->data;
913 bb = (Mat_SeqAIJ*)(Ao)->data;
914 ai = aa->i;
915 aj = aa->j;
916 adiag = aa->diag;
917 bi = bb->i;
918 bj = bb->j;
919
920 rstart = A->rmap->rstart;
921
922 if (reuse == MAT_INITIAL_MATRIX) {
923 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
924 nzb = 0; /* num of upper triangular entries in mat->B */
925 for (i=0; i<m; i++) {
926 nza += (ai[i+1] - adiag[i]);
927 countB = bi[i+1] - bi[i];
928 bjj = bj + bi[i];
929 for (j=0; j<countB; j++) {
930 if (garray[bjj[j]] > rstart) nzb++;
931 }
932 }
933
934 nz = nza + nzb; /* total nz of upper triangular part of mat */
935 ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
936 ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
937 mumps->nnz = nz;
938 mumps->irn = row;
939 mumps->jcn = col;
940 mumps->val = mumps->val_alloc = val;
941 } else {
942 val = mumps->val;
943 }
944
945 jj = 0; irow = rstart;
946 for (i=0; i<m; i++) {
947 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
948 v1 = av + adiag[i];
949 countA = ai[i+1] - adiag[i];
950 countB = bi[i+1] - bi[i];
951 bjj = bj + bi[i];
952 v2 = bv + bi[i];
953
954 /* A-part */
955 for (j=0; j<countA; j++) {
956 if (reuse == MAT_INITIAL_MATRIX) {
957 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
958 ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
959 }
960 val[jj++] = v1[j];
961 }
962
963 /* B-part */
964 for (j=0; j < countB; j++) {
965 if (garray[bjj[j]] > rstart) {
966 if (reuse == MAT_INITIAL_MATRIX) {
967 ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
968 ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
969 }
970 val[jj++] = v2[j];
971 }
972 }
973 irow++;
974 }
975 ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
976 ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
977 PetscFunctionReturn(0);
978 }
979
MatDestroy_MUMPS(Mat A)980 PetscErrorCode MatDestroy_MUMPS(Mat A)
981 {
982 PetscErrorCode ierr;
983 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
984
985 PetscFunctionBegin;
986 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
987 ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
988 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
989 ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
990 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
991 ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
992 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
993 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
994 ierr = PetscFree(mumps->info);CHKERRQ(ierr);
995 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
996 mumps->id.job = JOB_END;
997 PetscMUMPS_c(mumps);
998 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
999 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1000 if (mumps->use_petsc_omp_support) {
1001 ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1002 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1003 ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1004 }
1005 #endif
1006 ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1007 ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1008 ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1009 ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1010 ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1011 if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = MPI_Comm_free(&mumps->mumps_comm);CHKERRQ(ierr);}
1012 ierr = PetscFree(A->data);CHKERRQ(ierr);
1013
1014 /* clear composed functions */
1015 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1016 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1017 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1018 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1019 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1020 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1021 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1022 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1023 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1024 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1025 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1026 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1027 ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1028 PetscFunctionReturn(0);
1029 }
1030
1031 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar * array)1032 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1033 {
1034 PetscErrorCode ierr;
1035 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1036 const PetscMPIInt ompsize=mumps->omp_comm_size;
1037 PetscInt i,m,M,rstart;
1038
1039 PetscFunctionBegin;
1040 ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1041 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1042 if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1043 if (ompsize == 1) {
1044 if (!mumps->irhs_loc) {
1045 mumps->nloc_rhs = m;
1046 ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1047 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1048 for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1049 }
1050 mumps->id.rhs_loc = (MumpsScalar*)array;
1051 } else {
1052 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1053 const PetscInt *ranges;
1054 PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks;
1055 MPI_Group petsc_group,omp_group;
1056 PetscScalar *recvbuf=NULL;
1057
1058 if (mumps->is_omp_master) {
1059 /* Lazily initialize the omp stuff for distributed rhs */
1060 if (!mumps->irhs_loc) {
1061 ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1062 ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1063 ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr);
1064 ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr);
1065 for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1066 ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr);
1067
1068 /* Populate mumps->irhs_loc[], rhs_nrow[] */
1069 mumps->nloc_rhs = 0;
1070 ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1071 for (j=0; j<ompsize; j++) {
1072 mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1073 mumps->nloc_rhs += mumps->rhs_nrow[j];
1074 }
1075 ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1076 for (j=k=0; j<ompsize; j++) {
1077 for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1078 }
1079
1080 ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1081 ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr);
1082 ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr);
1083 }
1084
1085 /* Realloc buffers when current nrhs is bigger than what we have met */
1086 if (nrhs > mumps->max_nrhs) {
1087 ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1088 ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1089 mumps->max_nrhs = nrhs;
1090 }
1091
1092 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1093 for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1094 mumps->rhs_disps[0] = 0;
1095 for (j=1; j<ompsize; j++) {
1096 mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1097 if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1098 }
1099 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1100 }
1101
1102 ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1103 ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr);
1104
1105 if (mumps->is_omp_master) {
1106 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1107 PetscScalar *dst,*dstbase = mumps->rhs_loc;
1108 for (j=0; j<ompsize; j++) {
1109 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1110 dst = dstbase;
1111 for (i=0; i<nrhs; i++) {
1112 ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1113 src += mumps->rhs_nrow[j];
1114 dst += mumps->nloc_rhs;
1115 }
1116 dstbase += mumps->rhs_nrow[j];
1117 }
1118 }
1119 mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1120 }
1121 #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1122 }
1123 mumps->id.nrhs = nrhs;
1124 mumps->id.nloc_rhs = mumps->nloc_rhs;
1125 mumps->id.lrhs_loc = mumps->nloc_rhs;
1126 mumps->id.irhs_loc = mumps->irhs_loc;
1127 PetscFunctionReturn(0);
1128 }
1129
MatSolve_MUMPS(Mat A,Vec b,Vec x)1130 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1131 {
1132 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1133 const PetscScalar *rarray = NULL;
1134 PetscScalar *array;
1135 IS is_iden,is_petsc;
1136 PetscErrorCode ierr;
1137 PetscInt i;
1138 PetscBool second_solve = PETSC_FALSE;
1139 static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1140
1141 PetscFunctionBegin;
1142 ierr = PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1143 ierr = PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1144
1145 if (A->factorerrortype) {
1146 ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1147 ierr = VecSetInf(x);CHKERRQ(ierr);
1148 PetscFunctionReturn(0);
1149 }
1150
1151 mumps->id.nrhs = 1;
1152 if (mumps->petsc_size > 1) {
1153 if (mumps->ICNTL20 == 10) {
1154 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1155 ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1156 ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1157 } else {
1158 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/
1159 ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160 ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161 if (!mumps->myid) {
1162 ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1163 mumps->id.rhs = (MumpsScalar*)array;
1164 }
1165 }
1166 } else { /* petsc_size == 1 */
1167 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1168 ierr = VecCopy(b,x);CHKERRQ(ierr);
1169 ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1170 mumps->id.rhs = (MumpsScalar*)array;
1171 }
1172
1173 /*
1174 handle condensation step of Schur complement (if any)
1175 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1176 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1177 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1178 This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1179 */
1180 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1181 if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1182 second_solve = PETSC_TRUE;
1183 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1184 }
1185 /* solve phase */
1186 /*-------------*/
1187 mumps->id.job = JOB_SOLVE;
1188 PetscMUMPS_c(mumps);
1189 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1190
1191 /* handle expansion step of Schur complement (if any) */
1192 if (second_solve) {
1193 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1194 }
1195
1196 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1197 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1198 /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1199 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1200 }
1201 if (!mumps->scat_sol) { /* create scatter scat_sol */
1202 PetscInt *isol2_loc=NULL;
1203 ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1204 ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1205 for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1206 ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr); /* to */
1207 ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1208 ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1209 ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1210 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1211 }
1212
1213 ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215 }
1216
1217 if (mumps->petsc_size > 1) {
1218 if (mumps->ICNTL20 == 10) {
1219 ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1220 } else if (!mumps->myid) {
1221 ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1222 }
1223 } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1224
1225 ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1226 PetscFunctionReturn(0);
1227 }
1228
MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)1229 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1230 {
1231 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1232 PetscErrorCode ierr;
1233
1234 PetscFunctionBegin;
1235 mumps->id.ICNTL(9) = 0;
1236 ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1237 mumps->id.ICNTL(9) = 1;
1238 PetscFunctionReturn(0);
1239 }
1240
MatMatSolve_MUMPS(Mat A,Mat B,Mat X)1241 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1242 {
1243 PetscErrorCode ierr;
1244 Mat Bt = NULL;
1245 PetscBool denseX,denseB,flg,flgT;
1246 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1247 PetscInt i,nrhs,M;
1248 PetscScalar *array;
1249 const PetscScalar *rbray;
1250 PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0;
1251 PetscMUMPSInt *isol_loc,*isol_loc_save;
1252 PetscScalar *bray,*sol_loc,*sol_loc_save;
1253 IS is_to,is_from;
1254 PetscInt k,proc,j,m,myrstart;
1255 const PetscInt *rstart;
1256 Vec v_mpi,msol_loc;
1257 VecScatter scat_sol;
1258 Vec b_seq;
1259 VecScatter scat_rhs;
1260 PetscScalar *aa;
1261 PetscInt spnr,*ia,*ja;
1262 Mat_MPIAIJ *b = NULL;
1263
1264 PetscFunctionBegin;
1265 ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1266 if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1267
1268 ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1269 if (denseB) {
1270 if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1271 mumps->id.ICNTL(20)= 0; /* dense RHS */
1272 } else { /* sparse B */
1273 if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1274 ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1275 if (flgT) { /* input B is transpose of actural RHS matrix,
1276 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1277 ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1278 } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1279 mumps->id.ICNTL(20)= 1; /* sparse RHS */
1280 }
1281
1282 ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1283 mumps->id.nrhs = nrhs;
1284 mumps->id.lrhs = M;
1285 mumps->id.rhs = NULL;
1286
1287 if (mumps->petsc_size == 1) {
1288 PetscScalar *aa;
1289 PetscInt spnr,*ia,*ja;
1290 PetscBool second_solve = PETSC_FALSE;
1291
1292 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1293 mumps->id.rhs = (MumpsScalar*)array;
1294
1295 if (denseB) {
1296 /* copy B to X */
1297 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1298 ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1299 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1300 } else { /* sparse B */
1301 ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1302 ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1303 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1304 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1305 mumps->id.rhs_sparse = (MumpsScalar*)aa;
1306 }
1307 /* handle condensation step of Schur complement (if any) */
1308 if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1309 second_solve = PETSC_TRUE;
1310 ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1311 }
1312 /* solve phase */
1313 /*-------------*/
1314 mumps->id.job = JOB_SOLVE;
1315 PetscMUMPS_c(mumps);
1316 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1317
1318 /* handle expansion step of Schur complement (if any) */
1319 if (second_solve) {
1320 ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1321 }
1322 if (!denseB) { /* sparse B */
1323 ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1324 ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1325 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1326 }
1327 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1328 PetscFunctionReturn(0);
1329 }
1330
1331 /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1332 if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1333
1334 /* create msol_loc to hold mumps local solution */
1335 isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1336 sol_loc_save = (PetscScalar*)mumps->id.sol_loc;
1337
1338 lsol_loc = mumps->id.lsol_loc;
1339 nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */
1340 ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1341 mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1342 mumps->id.isol_loc = isol_loc;
1343
1344 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1345
1346 if (denseB) {
1347 if (mumps->ICNTL20 == 10) {
1348 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1349 ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1350 ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1351 ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1352 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1353 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1354 } else {
1355 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1356 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1357 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1358 0, re-arrange B into desired order, which is a local operation.
1359 */
1360
1361 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1362 /* wrap dense rhs matrix B into a vector v_mpi */
1363 ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1364 ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1365 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1366 ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1367
1368 /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1369 if (!mumps->myid) {
1370 PetscInt *idx;
1371 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1372 ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1373 ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1374 k = 0;
1375 for (proc=0; proc<mumps->petsc_size; proc++){
1376 for (j=0; j<nrhs; j++){
1377 for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1378 }
1379 }
1380
1381 ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1382 ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1383 ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1384 } else {
1385 ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1386 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1387 ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1388 }
1389 ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1390 ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1391 ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1392 ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1393 ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1394
1395 if (!mumps->myid) { /* define rhs on the host */
1396 ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1397 mumps->id.rhs = (MumpsScalar*)bray;
1398 ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1399 }
1400 }
1401 } else { /* sparse B */
1402 b = (Mat_MPIAIJ*)Bt->data;
1403
1404 /* wrap dense X into a vector v_mpi */
1405 ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1406 ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1407 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1408 ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1409
1410 if (!mumps->myid) {
1411 ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1412 ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1413 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1414 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1415 mumps->id.rhs_sparse = (MumpsScalar*)aa;
1416 } else {
1417 mumps->id.irhs_ptr = NULL;
1418 mumps->id.irhs_sparse = NULL;
1419 mumps->id.nz_rhs = 0;
1420 mumps->id.rhs_sparse = NULL;
1421 }
1422 }
1423
1424 /* solve phase */
1425 /*-------------*/
1426 mumps->id.job = JOB_SOLVE;
1427 PetscMUMPS_c(mumps);
1428 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1429
1430 /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1431 ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1432 ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1433
1434 /* create scatter scat_sol */
1435 ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1436 /* iidx: index for scatter mumps solution to petsc X */
1437
1438 ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1439 ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1440 for (i=0; i<lsol_loc; i++) {
1441 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1442
1443 for (proc=0; proc<mumps->petsc_size; proc++){
1444 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1445 myrstart = rstart[proc];
1446 k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1447 iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1448 m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1449 break;
1450 }
1451 }
1452
1453 for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1454 }
1455 ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1456 ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1457 ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1459 ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1460 ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461 ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1462
1463 /* free spaces */
1464 mumps->id.sol_loc = (MumpsScalar*)sol_loc_save;
1465 mumps->id.isol_loc = isol_loc_save;
1466
1467 ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1468 ierr = PetscFree(idxx);CHKERRQ(ierr);
1469 ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1470 ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1471 if (!denseB) {
1472 if (!mumps->myid) {
1473 b = (Mat_MPIAIJ*)Bt->data;
1474 ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1475 ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1476 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1477 }
1478 } else {
1479 if (mumps->ICNTL20 == 0) {
1480 ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1481 ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1482 }
1483 }
1484 ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1485 ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1486 PetscFunctionReturn(0);
1487 }
1488
MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)1489 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1490 {
1491 PetscErrorCode ierr;
1492 PetscBool flg;
1493 Mat B;
1494
1495 PetscFunctionBegin;
1496 ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1497 if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1498
1499 /* Create B=Bt^T that uses Bt's data structure */
1500 ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1501
1502 ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1503 ierr = MatDestroy(&B);CHKERRQ(ierr);
1504 PetscFunctionReturn(0);
1505 }
1506
1507 #if !defined(PETSC_USE_COMPLEX)
1508 /*
1509 input:
1510 F: numeric factor
1511 output:
1512 nneg: total number of negative pivots
1513 nzero: total number of zero pivots
1514 npos: (global dimension of F) - nneg - nzero
1515 */
MatGetInertia_SBAIJMUMPS(Mat F,PetscInt * nneg,PetscInt * nzero,PetscInt * npos)1516 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1517 {
1518 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
1519 PetscErrorCode ierr;
1520 PetscMPIInt size;
1521
1522 PetscFunctionBegin;
1523 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr);
1524 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1525 if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1526
1527 if (nneg) *nneg = mumps->id.INFOG(12);
1528 if (nzero || npos) {
1529 if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1530 if (nzero) *nzero = mumps->id.INFOG(28);
1531 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1532 }
1533 PetscFunctionReturn(0);
1534 }
1535 #endif
1536
MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS * mumps)1537 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1538 {
1539 PetscErrorCode ierr;
1540 PetscInt i,nreqs;
1541 PetscMUMPSInt *irn,*jcn;
1542 PetscMPIInt count;
1543 PetscInt64 totnnz,remain;
1544 const PetscInt osize=mumps->omp_comm_size;
1545 PetscScalar *val;
1546
1547 PetscFunctionBegin;
1548 if (osize > 1) {
1549 if (reuse == MAT_INITIAL_MATRIX) {
1550 /* master first gathers counts of nonzeros to receive */
1551 if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1552 ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr);
1553
1554 /* Then each computes number of send/recvs */
1555 if (mumps->is_omp_master) {
1556 /* Start from 1 since self communication is not done in MPI */
1557 nreqs = 0;
1558 for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1559 } else {
1560 nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1561 }
1562 ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1563
1564 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1565 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1566 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1567 is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1568 */
1569 nreqs = 0; /* counter for actual send/recvs */
1570 if (mumps->is_omp_master) {
1571 for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1572 ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1573 ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1574
1575 /* Self communication */
1576 ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1577 ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1578 ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1579
1580 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1581 ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1582 ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1583 mumps->nnz = totnnz;
1584 mumps->irn = irn;
1585 mumps->jcn = jcn;
1586 mumps->val = mumps->val_alloc = val;
1587
1588 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1589 jcn += mumps->recvcount[0];
1590 val += mumps->recvcount[0];
1591
1592 /* Remote communication */
1593 for (i=1; i<osize; i++) {
1594 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1595 remain = mumps->recvcount[i] - count;
1596 while (count>0) {
1597 ierr = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1598 ierr = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1599 ierr = MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1600 irn += count;
1601 jcn += count;
1602 val += count;
1603 count = PetscMin(remain,PETSC_MPI_INT_MAX);
1604 remain -= count;
1605 }
1606 }
1607 } else {
1608 irn = mumps->irn;
1609 jcn = mumps->jcn;
1610 val = mumps->val;
1611 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1612 remain = mumps->nnz - count;
1613 while (count>0) {
1614 ierr = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1615 ierr = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1616 ierr = MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1617 irn += count;
1618 jcn += count;
1619 val += count;
1620 count = PetscMin(remain,PETSC_MPI_INT_MAX);
1621 remain -= count;
1622 }
1623 }
1624 } else {
1625 nreqs = 0;
1626 if (mumps->is_omp_master) {
1627 val = mumps->val + mumps->recvcount[0];
1628 for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1629 count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1630 remain = mumps->recvcount[i] - count;
1631 while (count>0) {
1632 ierr = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1633 val += count;
1634 count = PetscMin(remain,PETSC_MPI_INT_MAX);
1635 remain -= count;
1636 }
1637 }
1638 } else {
1639 val = mumps->val;
1640 count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1641 remain = mumps->nnz - count;
1642 while (count>0) {
1643 ierr = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1644 val += count;
1645 count = PetscMin(remain,PETSC_MPI_INT_MAX);
1646 remain -= count;
1647 }
1648 }
1649 }
1650 ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1651 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1652 }
1653 PetscFunctionReturn(0);
1654 }
1655
MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo * info)1656 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1657 {
1658 Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data;
1659 PetscErrorCode ierr;
1660 PetscBool isMPIAIJ;
1661
1662 PetscFunctionBegin;
1663 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1664 if (mumps->id.INFOG(1) == -6) {
1665 ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1666 }
1667 ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1668 PetscFunctionReturn(0);
1669 }
1670
1671 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1672 ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1673
1674 /* numerical factorization phase */
1675 /*-------------------------------*/
1676 mumps->id.job = JOB_FACTNUMERIC;
1677 if (!mumps->id.ICNTL(18)) { /* A is centralized */
1678 if (!mumps->myid) {
1679 mumps->id.a = (MumpsScalar*)mumps->val;
1680 }
1681 } else {
1682 mumps->id.a_loc = (MumpsScalar*)mumps->val;
1683 }
1684 PetscMUMPS_c(mumps);
1685 if (mumps->id.INFOG(1) < 0) {
1686 if (A->erroriffailure) {
1687 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1688 } else {
1689 if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1690 ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1691 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1692 } else if (mumps->id.INFOG(1) == -13) {
1693 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1694 F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1695 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1696 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1697 F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1698 } else {
1699 ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1700 F->factorerrortype = MAT_FACTOR_OTHER;
1701 }
1702 }
1703 }
1704 if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1705
1706 F->assembled = PETSC_TRUE;
1707 mumps->matstruc = SAME_NONZERO_PATTERN;
1708 if (F->schur) { /* reset Schur status to unfactored */
1709 #if defined(PETSC_HAVE_CUDA)
1710 F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1711 #endif
1712 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1713 mumps->id.ICNTL(19) = 2;
1714 ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1715 }
1716 ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1717 }
1718
1719 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1720 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1721
1722 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1723 if (mumps->petsc_size > 1) {
1724 PetscInt lsol_loc;
1725 PetscScalar *sol_loc;
1726
1727 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1728
1729 /* distributed solution; Create x_seq=sol_loc for repeated use */
1730 if (mumps->x_seq) {
1731 ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1732 ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1733 ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1734 }
1735 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1736 ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1737 mumps->id.lsol_loc = lsol_loc;
1738 mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1739 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1740 }
1741 ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1742 PetscFunctionReturn(0);
1743 }
1744
1745 /* Sets MUMPS options from the options database */
PetscSetMUMPSFromOptions(Mat F,Mat A)1746 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1747 {
1748 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1749 PetscErrorCode ierr;
1750 PetscMUMPSInt icntl=0;
1751 PetscInt info[80],i,ninfo=80;
1752 PetscBool flg=PETSC_FALSE;
1753
1754 PetscFunctionBegin;
1755 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1756 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1757 if (flg) mumps->id.ICNTL(1) = icntl;
1758 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1759 if (flg) mumps->id.ICNTL(2) = icntl;
1760 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1761 if (flg) mumps->id.ICNTL(3) = icntl;
1762
1763 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1764 if (flg) mumps->id.ICNTL(4) = icntl;
1765 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1766
1767 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1768 if (flg) mumps->id.ICNTL(6) = icntl;
1769
1770 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1771 if (flg) {
1772 if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n");
1773 else mumps->id.ICNTL(7) = icntl;
1774 }
1775
1776 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1777 /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1778 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1779 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1780 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1781 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1782 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1783 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1784 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1785 ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1786 ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1787 }
1788
1789 /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake.
1790 So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1791 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1792 */
1793 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1794 mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1795 #else
1796 mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1797 #endif
1798 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1799 if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20);
1800 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1801 if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n");
1802 #endif
1803 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1804
1805 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1806 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1807 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1808 if (mumps->id.ICNTL(24)) {
1809 mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1810 }
1811
1812 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1813 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1814 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1815 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1816 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1817 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1818 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1819 /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr); -- not supported by PETSc API */
1820 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1821 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1822 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1823 ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1824
1825 ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1826 ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1827 ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1828 ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1829 ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1830 ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1831
1832 ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1833
1834 ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1835 if (ninfo) {
1836 if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1837 ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1838 mumps->ninfo = ninfo;
1839 for (i=0; i<ninfo; i++) {
1840 if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1841 else mumps->info[i] = info[i];
1842 }
1843 }
1844
1845 ierr = PetscOptionsEnd();CHKERRQ(ierr);
1846 PetscFunctionReturn(0);
1847 }
1848
PetscInitializeMUMPS(Mat A,Mat_MUMPS * mumps)1849 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1850 {
1851 PetscErrorCode ierr;
1852 PetscInt nthreads=0;
1853 MPI_Comm newcomm=MPI_COMM_NULL;
1854
1855 PetscFunctionBegin;
1856 mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1857 ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr);
1858 ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1859
1860 ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1861 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1862 ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1863 if (mumps->use_petsc_omp_support) {
1864 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1865 ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1866 ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1867 #else
1868 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1869 #endif
1870 } else {
1871 mumps->omp_comm = PETSC_COMM_SELF;
1872 mumps->mumps_comm = mumps->petsc_comm;
1873 mumps->is_omp_master = PETSC_TRUE;
1874 }
1875 ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr);
1876 mumps->reqs = NULL;
1877 mumps->tag = 0;
1878
1879 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1880 if (mumps->mumps_comm != MPI_COMM_NULL) {
1881 ierr = MPI_Comm_dup(mumps->mumps_comm,&newcomm);CHKERRQ(ierr);
1882 mumps->mumps_comm = newcomm;
1883 }
1884
1885 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1886 mumps->id.job = JOB_INIT;
1887 mumps->id.par = 1; /* host participates factorizaton and solve */
1888 mumps->id.sym = mumps->sym;
1889
1890 PetscMUMPS_c(mumps);
1891 if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1892
1893 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1894 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1895 */
1896 ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */
1897 ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr);
1898
1899 mumps->scat_rhs = NULL;
1900 mumps->scat_sol = NULL;
1901
1902 /* set PETSc-MUMPS default options - override MUMPS default */
1903 mumps->id.ICNTL(3) = 0;
1904 mumps->id.ICNTL(4) = 0;
1905 if (mumps->petsc_size == 1) {
1906 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1907 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1908 } else {
1909 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1910 mumps->id.ICNTL(21) = 1; /* distributed solution */
1911 }
1912
1913 /* schur */
1914 mumps->id.size_schur = 0;
1915 mumps->id.listvar_schur = NULL;
1916 mumps->id.schur = NULL;
1917 mumps->sizeredrhs = 0;
1918 mumps->schur_sol = NULL;
1919 mumps->schur_sizesol = 0;
1920 PetscFunctionReturn(0);
1921 }
1922
MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo * info,Mat_MUMPS * mumps)1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1924 {
1925 PetscErrorCode ierr;
1926
1927 PetscFunctionBegin;
1928 if (mumps->id.INFOG(1) < 0) {
1929 if (A->erroriffailure) {
1930 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1931 } else {
1932 if (mumps->id.INFOG(1) == -6) {
1933 ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1934 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1935 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1936 ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1937 F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1938 } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1939 ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1940 } else {
1941 ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1942 F->factorerrortype = MAT_FACTOR_OTHER;
1943 }
1944 }
1945 }
1946 PetscFunctionReturn(0);
1947 }
1948
1949 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)1950 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1951 {
1952 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1953 PetscErrorCode ierr;
1954 Vec b;
1955 const PetscInt M = A->rmap->N;
1956
1957 PetscFunctionBegin;
1958 mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1959
1960 /* Set MUMPS options from the options database */
1961 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1962
1963 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1964 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1965
1966 /* analysis phase */
1967 /*----------------*/
1968 mumps->id.job = JOB_FACTSYMBOLIC;
1969 mumps->id.n = M;
1970 switch (mumps->id.ICNTL(18)) {
1971 case 0: /* centralized assembled matrix input */
1972 if (!mumps->myid) {
1973 mumps->id.nnz = mumps->nnz;
1974 mumps->id.irn = mumps->irn;
1975 mumps->id.jcn = mumps->jcn;
1976 if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1977 if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1978 if (!mumps->myid) {
1979 const PetscInt *idx;
1980 PetscInt i;
1981
1982 ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1983 ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1984 for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1985 ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1986 }
1987 }
1988 }
1989 break;
1990 case 3: /* distributed assembled matrix input (size>1) */
1991 mumps->id.nnz_loc = mumps->nnz;
1992 mumps->id.irn_loc = mumps->irn;
1993 mumps->id.jcn_loc = mumps->jcn;
1994 if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1995 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1996 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1997 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1998 ierr = VecDestroy(&b);CHKERRQ(ierr);
1999 }
2000 break;
2001 }
2002 PetscMUMPS_c(mumps);
2003 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2004
2005 F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2006 F->ops->solve = MatSolve_MUMPS;
2007 F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2008 F->ops->matsolve = MatMatSolve_MUMPS;
2009 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2010 PetscFunctionReturn(0);
2011 }
2012
2013 /* Note the Petsc r and c permutations are ignored */
MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)2014 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2015 {
2016 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2017 PetscErrorCode ierr;
2018 Vec b;
2019 const PetscInt M = A->rmap->N;
2020
2021 PetscFunctionBegin;
2022 mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2023
2024 /* Set MUMPS options from the options database */
2025 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2026
2027 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2028 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2029
2030 /* analysis phase */
2031 /*----------------*/
2032 mumps->id.job = JOB_FACTSYMBOLIC;
2033 mumps->id.n = M;
2034 switch (mumps->id.ICNTL(18)) {
2035 case 0: /* centralized assembled matrix input */
2036 if (!mumps->myid) {
2037 mumps->id.nnz = mumps->nnz;
2038 mumps->id.irn = mumps->irn;
2039 mumps->id.jcn = mumps->jcn;
2040 if (mumps->id.ICNTL(6)>1) {
2041 mumps->id.a = (MumpsScalar*)mumps->val;
2042 }
2043 }
2044 break;
2045 case 3: /* distributed assembled matrix input (size>1) */
2046 mumps->id.nnz_loc = mumps->nnz;
2047 mumps->id.irn_loc = mumps->irn;
2048 mumps->id.jcn_loc = mumps->jcn;
2049 if (mumps->id.ICNTL(6)>1) {
2050 mumps->id.a_loc = (MumpsScalar*)mumps->val;
2051 }
2052 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2053 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2054 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2055 ierr = VecDestroy(&b);CHKERRQ(ierr);
2056 }
2057 break;
2058 }
2059 PetscMUMPS_c(mumps);
2060 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2061
2062 F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2063 F->ops->solve = MatSolve_MUMPS;
2064 F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2065 PetscFunctionReturn(0);
2066 }
2067
2068 /* Note the Petsc r permutation and factor info are ignored */
MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo * info)2069 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2070 {
2071 Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2072 PetscErrorCode ierr;
2073 Vec b;
2074 const PetscInt M = A->rmap->N;
2075
2076 PetscFunctionBegin;
2077 mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2078
2079 /* Set MUMPS options from the options database */
2080 ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2081
2082 ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2083 ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2084
2085 /* analysis phase */
2086 /*----------------*/
2087 mumps->id.job = JOB_FACTSYMBOLIC;
2088 mumps->id.n = M;
2089 switch (mumps->id.ICNTL(18)) {
2090 case 0: /* centralized assembled matrix input */
2091 if (!mumps->myid) {
2092 mumps->id.nnz = mumps->nnz;
2093 mumps->id.irn = mumps->irn;
2094 mumps->id.jcn = mumps->jcn;
2095 if (mumps->id.ICNTL(6)>1) {
2096 mumps->id.a = (MumpsScalar*)mumps->val;
2097 }
2098 }
2099 break;
2100 case 3: /* distributed assembled matrix input (size>1) */
2101 mumps->id.nnz_loc = mumps->nnz;
2102 mumps->id.irn_loc = mumps->irn;
2103 mumps->id.jcn_loc = mumps->jcn;
2104 if (mumps->id.ICNTL(6)>1) {
2105 mumps->id.a_loc = (MumpsScalar*)mumps->val;
2106 }
2107 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2108 ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2109 ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2110 ierr = VecDestroy(&b);CHKERRQ(ierr);
2111 }
2112 break;
2113 }
2114 PetscMUMPS_c(mumps);
2115 ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2116
2117 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2118 F->ops->solve = MatSolve_MUMPS;
2119 F->ops->solvetranspose = MatSolve_MUMPS;
2120 F->ops->matsolve = MatMatSolve_MUMPS;
2121 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2122 #if defined(PETSC_USE_COMPLEX)
2123 F->ops->getinertia = NULL;
2124 #else
2125 F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2126 #endif
2127 PetscFunctionReturn(0);
2128 }
2129
MatView_MUMPS(Mat A,PetscViewer viewer)2130 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2131 {
2132 PetscErrorCode ierr;
2133 PetscBool iascii;
2134 PetscViewerFormat format;
2135 Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
2136
2137 PetscFunctionBegin;
2138 /* check if matrix is mumps type */
2139 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2140
2141 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2142 if (iascii) {
2143 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2144 if (format == PETSC_VIEWER_ASCII_INFO) {
2145 ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2146 ierr = PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);CHKERRQ(ierr);
2147 ierr = PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);CHKERRQ(ierr);
2148 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2149 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2150 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2151 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2152 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2153 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2154 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2155 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2156 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2157 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2158 if (mumps->id.ICNTL(11)>0) {
2159 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2160 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2161 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2162 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2163 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2164 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2165 }
2166 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2167 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2168 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2169 /* ICNTL(15-17) not used */
2170 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2171 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2172 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2173 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2174 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2175 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2176
2177 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2178 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2179 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2180 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2181 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2182 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2183
2184 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2185 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2186 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2187 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2188 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2189 ierr = PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2190
2191 ierr = PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2192 ierr = PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2193 ierr = PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2194 ierr = PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2195 ierr = PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2196 ierr = PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2197
2198 /* infomation local to each processor */
2199 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2200 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2201 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2202 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2203 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2204 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2205 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2206 ierr = PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2207 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2208 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2209
2210 ierr = PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2211 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2212 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2213
2214 ierr = PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2215 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2216 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2217
2218 ierr = PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2219 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2220 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2221
2222 if (mumps->ninfo && mumps->ninfo <= 80){
2223 PetscInt i;
2224 for (i=0; i<mumps->ninfo; i++){
2225 ierr = PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr);
2226 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2227 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2228 }
2229 }
2230 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2231
2232 if (!mumps->myid) { /* information from the host */
2233 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2234 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2235 ierr = PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2236 ierr = PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2237
2238 ierr = PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr);
2239 ierr = PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr);
2240 ierr = PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr);
2241 ierr = PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr);
2242 ierr = PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr);
2243 ierr = PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr);
2244 ierr = PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr);
2245 ierr = PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr);
2246 ierr = PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr);
2247 ierr = PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr);
2248 ierr = PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr);
2249 ierr = PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr);
2250 ierr = PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr);
2251 ierr = PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr);
2252 ierr = PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr);
2253 ierr = PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr);
2254 ierr = PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr);
2255 ierr = PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr);
2256 ierr = PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr);
2257 ierr = PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr);
2258 ierr = PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr);
2259 ierr = PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr);
2260 ierr = PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr);
2261 ierr = PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2262 ierr = PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2263 ierr = PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2264 ierr = PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2265 ierr = PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2266 ierr = PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2267 ierr = PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2268 ierr = PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr);
2269 ierr = PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr);
2270 ierr = PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr);
2271 ierr = PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr);
2272 }
2273 }
2274 }
2275 PetscFunctionReturn(0);
2276 }
2277
MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo * info)2278 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2279 {
2280 Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2281
2282 PetscFunctionBegin;
2283 info->block_size = 1.0;
2284 info->nz_allocated = mumps->id.INFOG(20);
2285 info->nz_used = mumps->id.INFOG(20);
2286 info->nz_unneeded = 0.0;
2287 info->assemblies = 0.0;
2288 info->mallocs = 0.0;
2289 info->memory = 0.0;
2290 info->fill_ratio_given = 0;
2291 info->fill_ratio_needed = 0;
2292 info->factor_mallocs = 0;
2293 PetscFunctionReturn(0);
2294 }
2295
2296 /* -------------------------------------------------------------------------------------------*/
MatFactorSetSchurIS_MUMPS(Mat F,IS is)2297 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2298 {
2299 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2300 const PetscScalar *arr;
2301 const PetscInt *idxs;
2302 PetscInt size,i;
2303 PetscErrorCode ierr;
2304
2305 PetscFunctionBegin;
2306 ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2307 if (mumps->petsc_size > 1) {
2308 PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2309
2310 ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2311 ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr);
2312 if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2313 }
2314
2315 /* Schur complement matrix */
2316 ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2317 ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2318 ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2319 mumps->id.schur = (MumpsScalar*)arr;
2320 mumps->id.size_schur = size;
2321 mumps->id.schur_lld = size;
2322 ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2323 if (mumps->sym == 1) {
2324 ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2325 }
2326
2327 /* MUMPS expects Fortran style indices */
2328 ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2329 ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2330 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2331 for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2332 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2333 if (mumps->petsc_size > 1) {
2334 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2335 } else {
2336 if (F->factortype == MAT_FACTOR_LU) {
2337 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2338 } else {
2339 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2340 }
2341 }
2342 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2343 mumps->id.ICNTL(26) = -1;
2344 PetscFunctionReturn(0);
2345 }
2346
2347 /* -------------------------------------------------------------------------------------------*/
MatFactorCreateSchurComplement_MUMPS(Mat F,Mat * S)2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2349 {
2350 Mat St;
2351 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2352 PetscScalar *array;
2353 #if defined(PETSC_USE_COMPLEX)
2354 PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2355 #endif
2356 PetscErrorCode ierr;
2357
2358 PetscFunctionBegin;
2359 if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2360 ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2361 ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2362 ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2363 ierr = MatSetUp(St);CHKERRQ(ierr);
2364 ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2365 if (!mumps->sym) { /* MUMPS always return a full matrix */
2366 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2367 PetscInt i,j,N=mumps->id.size_schur;
2368 for (i=0;i<N;i++) {
2369 for (j=0;j<N;j++) {
2370 #if !defined(PETSC_USE_COMPLEX)
2371 PetscScalar val = mumps->id.schur[i*N+j];
2372 #else
2373 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2374 #endif
2375 array[j*N+i] = val;
2376 }
2377 }
2378 } else { /* stored by columns */
2379 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2380 }
2381 } else { /* either full or lower-triangular (not packed) */
2382 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2383 PetscInt i,j,N=mumps->id.size_schur;
2384 for (i=0;i<N;i++) {
2385 for (j=i;j<N;j++) {
2386 #if !defined(PETSC_USE_COMPLEX)
2387 PetscScalar val = mumps->id.schur[i*N+j];
2388 #else
2389 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2390 #endif
2391 array[i*N+j] = val;
2392 array[j*N+i] = val;
2393 }
2394 }
2395 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2396 ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2397 } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2398 PetscInt i,j,N=mumps->id.size_schur;
2399 for (i=0;i<N;i++) {
2400 for (j=0;j<i+1;j++) {
2401 #if !defined(PETSC_USE_COMPLEX)
2402 PetscScalar val = mumps->id.schur[i*N+j];
2403 #else
2404 PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2405 #endif
2406 array[i*N+j] = val;
2407 array[j*N+i] = val;
2408 }
2409 }
2410 }
2411 }
2412 ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2413 *S = St;
2414 PetscFunctionReturn(0);
2415 }
2416
2417 /* -------------------------------------------------------------------------------------------*/
MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)2418 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2419 {
2420 PetscErrorCode ierr;
2421 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2422
2423 PetscFunctionBegin;
2424 ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2425 PetscFunctionReturn(0);
2426 }
2427
MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt * ival)2428 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2429 {
2430 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2431
2432 PetscFunctionBegin;
2433 *ival = mumps->id.ICNTL(icntl);
2434 PetscFunctionReturn(0);
2435 }
2436
2437 /*@
2438 MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2439
2440 Logically Collective on Mat
2441
2442 Input Parameters:
2443 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2444 . icntl - index of MUMPS parameter array ICNTL()
2445 - ival - value of MUMPS ICNTL(icntl)
2446
2447 Options Database:
2448 . -mat_mumps_icntl_<icntl> <ival>
2449
2450 Level: beginner
2451
2452 References:
2453 . MUMPS Users' Guide
2454
2455 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2456 @*/
MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)2457 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2458 {
2459 PetscErrorCode ierr;
2460
2461 PetscFunctionBegin;
2462 PetscValidType(F,1);
2463 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2464 PetscValidLogicalCollectiveInt(F,icntl,2);
2465 PetscValidLogicalCollectiveInt(F,ival,3);
2466 ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2467 PetscFunctionReturn(0);
2468 }
2469
2470 /*@
2471 MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2472
2473 Logically Collective on Mat
2474
2475 Input Parameters:
2476 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2477 - icntl - index of MUMPS parameter array ICNTL()
2478
2479 Output Parameter:
2480 . ival - value of MUMPS ICNTL(icntl)
2481
2482 Level: beginner
2483
2484 References:
2485 . MUMPS Users' Guide
2486
2487 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2488 @*/
MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt * ival)2489 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2490 {
2491 PetscErrorCode ierr;
2492
2493 PetscFunctionBegin;
2494 PetscValidType(F,1);
2495 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2496 PetscValidLogicalCollectiveInt(F,icntl,2);
2497 PetscValidIntPointer(ival,3);
2498 ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2499 PetscFunctionReturn(0);
2500 }
2501
2502 /* -------------------------------------------------------------------------------------------*/
MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)2503 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2504 {
2505 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2506
2507 PetscFunctionBegin;
2508 mumps->id.CNTL(icntl) = val;
2509 PetscFunctionReturn(0);
2510 }
2511
MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal * val)2512 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2513 {
2514 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2515
2516 PetscFunctionBegin;
2517 *val = mumps->id.CNTL(icntl);
2518 PetscFunctionReturn(0);
2519 }
2520
2521 /*@
2522 MatMumpsSetCntl - Set MUMPS parameter CNTL()
2523
2524 Logically Collective on Mat
2525
2526 Input Parameters:
2527 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2528 . icntl - index of MUMPS parameter array CNTL()
2529 - val - value of MUMPS CNTL(icntl)
2530
2531 Options Database:
2532 . -mat_mumps_cntl_<icntl> <val>
2533
2534 Level: beginner
2535
2536 References:
2537 . MUMPS Users' Guide
2538
2539 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2540 @*/
MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)2541 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2542 {
2543 PetscErrorCode ierr;
2544
2545 PetscFunctionBegin;
2546 PetscValidType(F,1);
2547 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2548 PetscValidLogicalCollectiveInt(F,icntl,2);
2549 PetscValidLogicalCollectiveReal(F,val,3);
2550 ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2551 PetscFunctionReturn(0);
2552 }
2553
2554 /*@
2555 MatMumpsGetCntl - Get MUMPS parameter CNTL()
2556
2557 Logically Collective on Mat
2558
2559 Input Parameters:
2560 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2561 - icntl - index of MUMPS parameter array CNTL()
2562
2563 Output Parameter:
2564 . val - value of MUMPS CNTL(icntl)
2565
2566 Level: beginner
2567
2568 References:
2569 . MUMPS Users' Guide
2570
2571 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2572 @*/
MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal * val)2573 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2574 {
2575 PetscErrorCode ierr;
2576
2577 PetscFunctionBegin;
2578 PetscValidType(F,1);
2579 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2580 PetscValidLogicalCollectiveInt(F,icntl,2);
2581 PetscValidRealPointer(val,3);
2582 ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2583 PetscFunctionReturn(0);
2584 }
2585
MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt * info)2586 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2587 {
2588 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2589
2590 PetscFunctionBegin;
2591 *info = mumps->id.INFO(icntl);
2592 PetscFunctionReturn(0);
2593 }
2594
MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt * infog)2595 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2596 {
2597 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2598
2599 PetscFunctionBegin;
2600 *infog = mumps->id.INFOG(icntl);
2601 PetscFunctionReturn(0);
2602 }
2603
MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal * rinfo)2604 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2605 {
2606 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2607
2608 PetscFunctionBegin;
2609 *rinfo = mumps->id.RINFO(icntl);
2610 PetscFunctionReturn(0);
2611 }
2612
MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal * rinfog)2613 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2614 {
2615 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2616
2617 PetscFunctionBegin;
2618 *rinfog = mumps->id.RINFOG(icntl);
2619 PetscFunctionReturn(0);
2620 }
2621
MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)2622 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2623 {
2624 PetscErrorCode ierr;
2625 Mat Bt = NULL,Btseq = NULL;
2626 PetscBool flg;
2627 Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2628 PetscScalar *aa;
2629 PetscInt spnr,*ia,*ja,M,nrhs;
2630
2631 PetscFunctionBegin;
2632 PetscValidIntPointer(spRHS,2);
2633 ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2634 if (flg) {
2635 ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2636 } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2637
2638 ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2639
2640 if (mumps->petsc_size > 1) {
2641 Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2642 Btseq = b->A;
2643 } else {
2644 Btseq = Bt;
2645 }
2646
2647 ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2648 mumps->id.nrhs = nrhs;
2649 mumps->id.lrhs = M;
2650 mumps->id.rhs = NULL;
2651
2652 if (!mumps->myid) {
2653 ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2654 ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2655 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2656 ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2657 mumps->id.rhs_sparse = (MumpsScalar*)aa;
2658 } else {
2659 mumps->id.irhs_ptr = NULL;
2660 mumps->id.irhs_sparse = NULL;
2661 mumps->id.nz_rhs = 0;
2662 mumps->id.rhs_sparse = NULL;
2663 }
2664 mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2665 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2666
2667 /* solve phase */
2668 /*-------------*/
2669 mumps->id.job = JOB_SOLVE;
2670 PetscMUMPS_c(mumps);
2671 if (mumps->id.INFOG(1) < 0)
2672 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2673
2674 if (!mumps->myid) {
2675 ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2676 ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2677 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2678 }
2679 PetscFunctionReturn(0);
2680 }
2681
2682 /*@
2683 MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2684
2685 Logically Collective on Mat
2686
2687 Input Parameters:
2688 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2689 - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2690
2691 Output Parameter:
2692 . spRHS - requested entries of inverse of A
2693
2694 Level: beginner
2695
2696 References:
2697 . MUMPS Users' Guide
2698
2699 .seealso: MatGetFactor(), MatCreateTranspose()
2700 @*/
MatMumpsGetInverse(Mat F,Mat spRHS)2701 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2702 {
2703 PetscErrorCode ierr;
2704
2705 PetscFunctionBegin;
2706 PetscValidType(F,1);
2707 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2708 ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2709 PetscFunctionReturn(0);
2710 }
2711
MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)2712 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2713 {
2714 PetscErrorCode ierr;
2715 Mat spRHS;
2716
2717 PetscFunctionBegin;
2718 ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2719 ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2720 ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2721 PetscFunctionReturn(0);
2722 }
2723
2724 /*@
2725 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2726
2727 Logically Collective on Mat
2728
2729 Input Parameters:
2730 + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2731 - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2732
2733 Output Parameter:
2734 . spRHST - requested entries of inverse of A^T
2735
2736 Level: beginner
2737
2738 References:
2739 . MUMPS Users' Guide
2740
2741 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2742 @*/
MatMumpsGetInverseTranspose(Mat F,Mat spRHST)2743 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2744 {
2745 PetscErrorCode ierr;
2746 PetscBool flg;
2747
2748 PetscFunctionBegin;
2749 PetscValidType(F,1);
2750 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2751 ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2752 if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2753
2754 ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2755 PetscFunctionReturn(0);
2756 }
2757
2758 /*@
2759 MatMumpsGetInfo - Get MUMPS parameter INFO()
2760
2761 Logically Collective on Mat
2762
2763 Input Parameters:
2764 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2765 - icntl - index of MUMPS parameter array INFO()
2766
2767 Output Parameter:
2768 . ival - value of MUMPS INFO(icntl)
2769
2770 Level: beginner
2771
2772 References:
2773 . MUMPS Users' Guide
2774
2775 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2776 @*/
MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt * ival)2777 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2778 {
2779 PetscErrorCode ierr;
2780
2781 PetscFunctionBegin;
2782 PetscValidType(F,1);
2783 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2784 PetscValidIntPointer(ival,3);
2785 ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2786 PetscFunctionReturn(0);
2787 }
2788
2789 /*@
2790 MatMumpsGetInfog - Get MUMPS parameter INFOG()
2791
2792 Logically Collective on Mat
2793
2794 Input Parameters:
2795 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2796 - icntl - index of MUMPS parameter array INFOG()
2797
2798 Output Parameter:
2799 . ival - value of MUMPS INFOG(icntl)
2800
2801 Level: beginner
2802
2803 References:
2804 . MUMPS Users' Guide
2805
2806 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2807 @*/
MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt * ival)2808 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2809 {
2810 PetscErrorCode ierr;
2811
2812 PetscFunctionBegin;
2813 PetscValidType(F,1);
2814 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2815 PetscValidIntPointer(ival,3);
2816 ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2817 PetscFunctionReturn(0);
2818 }
2819
2820 /*@
2821 MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2822
2823 Logically Collective on Mat
2824
2825 Input Parameters:
2826 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2827 - icntl - index of MUMPS parameter array RINFO()
2828
2829 Output Parameter:
2830 . val - value of MUMPS RINFO(icntl)
2831
2832 Level: beginner
2833
2834 References:
2835 . MUMPS Users' Guide
2836
2837 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2838 @*/
MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal * val)2839 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2840 {
2841 PetscErrorCode ierr;
2842
2843 PetscFunctionBegin;
2844 PetscValidType(F,1);
2845 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2846 PetscValidRealPointer(val,3);
2847 ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2848 PetscFunctionReturn(0);
2849 }
2850
2851 /*@
2852 MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2853
2854 Logically Collective on Mat
2855
2856 Input Parameters:
2857 + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2858 - icntl - index of MUMPS parameter array RINFOG()
2859
2860 Output Parameter:
2861 . val - value of MUMPS RINFOG(icntl)
2862
2863 Level: beginner
2864
2865 References:
2866 . MUMPS Users' Guide
2867
2868 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2869 @*/
MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal * val)2870 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2871 {
2872 PetscErrorCode ierr;
2873
2874 PetscFunctionBegin;
2875 PetscValidType(F,1);
2876 if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2877 PetscValidRealPointer(val,3);
2878 ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2879 PetscFunctionReturn(0);
2880 }
2881
2882 /*MC
2883 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2884 distributed and sequential matrices via the external package MUMPS.
2885
2886 Works with MATAIJ and MATSBAIJ matrices
2887
2888 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2889
2890 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2891
2892 Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2893
2894 Options Database Keys:
2895 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2896 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2897 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2898 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2899 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2900 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis
2901 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2902 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2903 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2904 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2905 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2906 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2907 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2908 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2909 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2910 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2911 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2912 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2913 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2914 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2915 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2916 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2917 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2918 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2919 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2920 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2921 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2922 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2923 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2924 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2925 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2926 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2927 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2928 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2929 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2930
2931 If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1
2932
2933 Level: beginner
2934
2935 Notes:
2936 MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2937
2938 When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2939 $ KSPGetPC(ksp,&pc);
2940 $ PCFactorGetMatrix(pc,&mat);
2941 $ MatMumpsGetInfo(mat,....);
2942 $ MatMumpsGetInfog(mat,....); etc.
2943 Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2944
2945 Two modes to run MUMPS/PETSc with OpenMP
2946
2947 $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2948 $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2949
2950 $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2951 $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2952
2953 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2954 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2955 (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2956 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2957 (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2958
2959 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2960 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2961 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2962 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2963 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2964 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2965 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2966 MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2967 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2968 problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2969 For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2970 examine the mapping result.
2971
2972 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2973 for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2974 calls omp_set_num_threads(m) internally before calling MUMPS.
2975
2976 References:
2977 + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2978 - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2979
2980 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2981
2982 M*/
2983
MatFactorGetSolverType_mumps(Mat A,MatSolverType * type)2984 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2985 {
2986 PetscFunctionBegin;
2987 *type = MATSOLVERMUMPS;
2988 PetscFunctionReturn(0);
2989 }
2990
2991 /* MatGetFactor for Seq and MPI AIJ matrices */
MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat * F)2992 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2993 {
2994 Mat B;
2995 PetscErrorCode ierr;
2996 Mat_MUMPS *mumps;
2997 PetscBool isSeqAIJ;
2998 PetscMPIInt size;
2999
3000 PetscFunctionBegin;
3001 #if defined(PETSC_USE_COMPLEX)
3002 if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3003 #endif
3004 /* Create the factorization matrix */
3005 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3006 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3007 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3008 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3009 ierr = MatSetUp(B);CHKERRQ(ierr);
3010
3011 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3012
3013 B->ops->view = MatView_MUMPS;
3014 B->ops->getinfo = MatGetInfo_MUMPS;
3015
3016 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3018 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3019 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3021 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3025 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3026 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3027 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3028 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3029
3030 if (ftype == MAT_FACTOR_LU) {
3031 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3032 B->factortype = MAT_FACTOR_LU;
3033 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3034 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3035 mumps->sym = 0;
3036 } else {
3037 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3038 B->factortype = MAT_FACTOR_CHOLESKY;
3039 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3040 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3041 #if defined(PETSC_USE_COMPLEX)
3042 mumps->sym = 2;
3043 #else
3044 if (A->spd_set && A->spd) mumps->sym = 1;
3045 else mumps->sym = 2;
3046 #endif
3047 }
3048
3049 /* set solvertype */
3050 ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3051 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3052 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3053 if (size == 1) {
3054 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3055 B->useordering = PETSC_TRUE;
3056 }
3057
3058 B->ops->destroy = MatDestroy_MUMPS;
3059 B->data = (void*)mumps;
3060
3061 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3062
3063 *F = B;
3064 PetscFunctionReturn(0);
3065 }
3066
3067 /* MatGetFactor for Seq and MPI SBAIJ matrices */
MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat * F)3068 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3069 {
3070 Mat B;
3071 PetscErrorCode ierr;
3072 Mat_MUMPS *mumps;
3073 PetscBool isSeqSBAIJ;
3074 PetscMPIInt size;
3075
3076 PetscFunctionBegin;
3077 #if defined(PETSC_USE_COMPLEX)
3078 if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3079 #endif
3080 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3081 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3082 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3083 ierr = MatSetUp(B);CHKERRQ(ierr);
3084
3085 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3086 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3087 if (isSeqSBAIJ) {
3088 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3089 } else {
3090 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3091 }
3092
3093 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3094 B->ops->view = MatView_MUMPS;
3095 B->ops->getinfo = MatGetInfo_MUMPS;
3096
3097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3110
3111 B->factortype = MAT_FACTOR_CHOLESKY;
3112 #if defined(PETSC_USE_COMPLEX)
3113 mumps->sym = 2;
3114 #else
3115 if (A->spd_set && A->spd) mumps->sym = 1;
3116 else mumps->sym = 2;
3117 #endif
3118
3119 /* set solvertype */
3120 ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3121 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3122 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3123 if (size == 1) {
3124 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3125 B->useordering = PETSC_TRUE;
3126 }
3127
3128 B->ops->destroy = MatDestroy_MUMPS;
3129 B->data = (void*)mumps;
3130
3131 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3132
3133 *F = B;
3134 PetscFunctionReturn(0);
3135 }
3136
MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat * F)3137 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3138 {
3139 Mat B;
3140 PetscErrorCode ierr;
3141 Mat_MUMPS *mumps;
3142 PetscBool isSeqBAIJ;
3143 PetscMPIInt size;
3144
3145 PetscFunctionBegin;
3146 /* Create the factorization matrix */
3147 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3148 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3149 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3150 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3151 ierr = MatSetUp(B);CHKERRQ(ierr);
3152
3153 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3154 if (ftype == MAT_FACTOR_LU) {
3155 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3156 B->factortype = MAT_FACTOR_LU;
3157 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3158 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3159 mumps->sym = 0;
3160 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3161
3162 B->ops->view = MatView_MUMPS;
3163 B->ops->getinfo = MatGetInfo_MUMPS;
3164
3165 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3166 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3167 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3168 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3169 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3170 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3173 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3175 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3177 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3178
3179 /* set solvertype */
3180 ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3181 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3182 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3183 if (size == 1) {
3184 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3185 B->useordering = PETSC_TRUE;
3186 }
3187
3188 B->ops->destroy = MatDestroy_MUMPS;
3189 B->data = (void*)mumps;
3190
3191 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3192
3193 *F = B;
3194 PetscFunctionReturn(0);
3195 }
3196
3197 /* MatGetFactor for Seq and MPI SELL matrices */
MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat * F)3198 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3199 {
3200 Mat B;
3201 PetscErrorCode ierr;
3202 Mat_MUMPS *mumps;
3203 PetscBool isSeqSELL;
3204 PetscMPIInt size;
3205
3206 PetscFunctionBegin;
3207 /* Create the factorization matrix */
3208 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3209 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3210 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3211 ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3212 ierr = MatSetUp(B);CHKERRQ(ierr);
3213
3214 ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3215
3216 B->ops->view = MatView_MUMPS;
3217 B->ops->getinfo = MatGetInfo_MUMPS;
3218
3219 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3220 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3221 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3222 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3223 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3224 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3225 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3226 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3227 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3228 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3229 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3230
3231 if (ftype == MAT_FACTOR_LU) {
3232 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3233 B->factortype = MAT_FACTOR_LU;
3234 if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3235 else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3236 mumps->sym = 0;
3237 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3238
3239 /* set solvertype */
3240 ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3241 ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3242 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3243 if (size == 1) {
3244 /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3245 B->useordering = PETSC_TRUE;
3246 }
3247
3248 B->ops->destroy = MatDestroy_MUMPS;
3249 B->data = (void*)mumps;
3250
3251 ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3252
3253 *F = B;
3254 PetscFunctionReturn(0);
3255 }
3256
MatSolverTypeRegister_MUMPS(void)3257 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3258 {
3259 PetscErrorCode ierr;
3260
3261 PetscFunctionBegin;
3262 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3263 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3264 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3265 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3266 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3267 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3268 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3269 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3270 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3271 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3272 ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3273 PetscFunctionReturn(0);
3274 }
3275
3276