1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
MatSetBlockSizes_MPIAIJ(Mat M,PetscInt rbs,PetscInt cbs)66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
MatFindNonzeroRows_MPIAIJ(Mat M,IS * keptrows)79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92   ia        = a->i;
93   ib        = b->i;
94   for (i=0; i<m; i++) {
95     na = ia[i+1] - ia[i];
96     nb = ib[i+1] - ib[i];
97     if (!na && !nb) {
98       cnt++;
99       goto ok1;
100     }
101     aa = a->a + ia[i];
102     for (j=0; j<na; j++) {
103       if (aa[j] != 0.0) goto ok1;
104     }
105     bb = b->a + ib[i];
106     for (j=0; j <nb; j++) {
107       if (bb[j] != 0.0) goto ok1;
108     }
109     cnt++;
110 ok1:;
111   }
112   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113   if (!n0rows) PetscFunctionReturn(0);
114   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115   cnt  = 0;
116   for (i=0; i<m; i++) {
117     na = ia[i+1] - ia[i];
118     nb = ib[i+1] - ib[i];
119     if (!na && !nb) continue;
120     aa = a->a + ia[i];
121     for (j=0; j<na;j++) {
122       if (aa[j] != 0.0) {
123         rows[cnt++] = rstart + i;
124         goto ok2;
125       }
126     }
127     bb = b->a + ib[i];
128     for (j=0; j<nb; j++) {
129       if (bb[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134 ok2:;
135   }
136   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)140 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142   PetscErrorCode    ierr;
143   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
144   PetscBool         cong;
145 
146   PetscFunctionBegin;
147   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148   if (Y->assembled && cong) {
149     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150   } else {
151     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152   }
153   PetscFunctionReturn(0);
154 }
155 
MatFindZeroDiagonals_MPIAIJ(Mat M,IS * zrows)156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
159   PetscErrorCode ierr;
160   PetscInt       i,rstart,nrows,*rows;
161 
162   PetscFunctionBegin;
163   *zrows = NULL;
164   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166   for (i=0; i<nrows; i++) rows[i] += rstart;
167   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal * norms)171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173   PetscErrorCode ierr;
174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
175   PetscInt       i,n,*garray = aij->garray;
176   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
177   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
178   PetscReal      *work;
179 
180   PetscFunctionBegin;
181   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183   if (type == NORM_2) {
184     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186     }
187     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189     }
190   } else if (type == NORM_1) {
191     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193     }
194     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196     }
197   } else if (type == NORM_INFINITY) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203     }
204 
205   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206   if (type == NORM_INFINITY) {
207     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208   } else {
209     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210   }
211   ierr = PetscFree(work);CHKERRQ(ierr);
212   if (type == NORM_2) {
213     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214   }
215   PetscFunctionReturn(0);
216 }
217 
MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS * is)218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
221   IS              sis,gis;
222   PetscErrorCode  ierr;
223   const PetscInt  *isis,*igis;
224   PetscInt        n,*iis,nsis,ngis,rstart,i;
225 
226   PetscFunctionBegin;
227   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233 
234   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237   n    = ngis + nsis;
238   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240   for (i=0; i<n; i++) iis[i] += rstart;
241   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242 
243   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245   ierr = ISDestroy(&sis);CHKERRQ(ierr);
246   ierr = ISDestroy(&gis);CHKERRQ(ierr);
247   PetscFunctionReturn(0);
248 }
249 
250 /*
251     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253 
254     Only for square matrices
255 
256     Used by a preconditioner, hence PETSC_EXTERN
257 */
MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat * inmat)258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260   PetscMPIInt    rank,size;
261   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262   PetscErrorCode ierr;
263   Mat            mat;
264   Mat_SeqAIJ     *gmata;
265   PetscMPIInt    tag;
266   MPI_Status     status;
267   PetscBool      aij;
268   MatScalar      *gmataa,*ao,*ad,*gmataarestore=NULL;
269 
270   PetscFunctionBegin;
271   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273   if (!rank) {
274     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276   }
277   if (reuse == MAT_INITIAL_MATRIX) {
278     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287 
288     rowners[0] = 0;
289     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290     rstart = rowners[rank];
291     rend   = rowners[rank+1];
292     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293     if (!rank) {
294       gmata = (Mat_SeqAIJ*) gmat->data;
295       /* send row lengths to all processors */
296       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297       for (i=1; i<size; i++) {
298         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299       }
300       /* determine number diagonal and off-diagonal counts */
301       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303       jj   = 0;
304       for (i=0; i<m; i++) {
305         for (j=0; j<dlens[i]; j++) {
306           if (gmata->j[jj] < rstart) ld[i]++;
307           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308           jj++;
309         }
310       }
311       /* send column indices to other processes */
312       for (i=1; i<size; i++) {
313         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316       }
317 
318       /* send numerical values to other processes */
319       for (i=1; i<size; i++) {
320         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322       }
323       gmataa = gmata->a;
324       gmataj = gmata->j;
325 
326     } else {
327       /* receive row lengths */
328       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329       /* receive column indices */
330       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333       /* determine number diagonal and off-diagonal counts */
334       ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336       jj   = 0;
337       for (i=0; i<m; i++) {
338         for (j=0; j<dlens[i]; j++) {
339           if (gmataj[jj] < rstart) ld[i]++;
340           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341           jj++;
342         }
343       }
344       /* receive numerical values */
345       ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347     }
348     /* set preallocation */
349     for (i=0; i<m; i++) {
350       dlens[i] -= olens[i];
351     }
352     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354 
355     for (i=0; i<m; i++) {
356       dlens[i] += olens[i];
357     }
358     cnt = 0;
359     for (i=0; i<m; i++) {
360       row  = rstart + i;
361       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362       cnt += dlens[i];
363     }
364     if (rank) {
365       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366     }
367     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368     ierr = PetscFree(rowners);CHKERRQ(ierr);
369 
370     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371 
372     *inmat = mat;
373   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
374     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376     mat  = *inmat;
377     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378     if (!rank) {
379       /* send numerical values to other processes */
380       gmata  = (Mat_SeqAIJ*) gmat->data;
381       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382       gmataa = gmata->a;
383       for (i=1; i<size; i++) {
384         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386       }
387       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388     } else {
389       /* receive numerical values from process 0*/
390       nz   = Ad->nz + Ao->nz;
391       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393     }
394     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396     ad = Ad->a;
397     ao = Ao->a;
398     if (mat->rmap->n) {
399       i  = 0;
400       nz = ld[i];                                   ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402     }
403     for (i=1; i<mat->rmap->n; i++) {
404       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406     }
407     i--;
408     if (mat->rmap->n) {
409       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410     }
411     if (rank) {
412       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413     }
414   }
415   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
MatCreateColmap_MPIAIJ_Private(Mat mat)427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
430   PetscErrorCode ierr;
431   PetscInt       n = aij->B->cmap->n,i;
432 
433   PetscFunctionBegin;
434   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437   for (i=0; i<n; i++) {
438     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439   }
440 #else
441   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445   PetscFunctionReturn(0);
446 }
447 
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
449 { \
450     if (col <= lastcol1)  low1 = 0;     \
451     else                 high1 = nrow1; \
452     lastcol1 = col;\
453     while (high1-low1 > 5) { \
454       t = (low1+high1)/2; \
455       if (rp1[t] > col) high1 = t; \
456       else              low1  = t; \
457     } \
458       for (_i=low1; _i<high1; _i++) { \
459         if (rp1[_i] > col) break; \
460         if (rp1[_i] == col) { \
461           if (addv == ADD_VALUES) { \
462             ap1[_i] += value;   \
463             /* Not sure LogFlops will slow dow the code or not */ \
464             (void)PetscLogFlops(1.0);   \
465            } \
466           else                    ap1[_i] = value; \
467           inserted = PETSC_TRUE; \
468           goto a_noinsert; \
469         } \
470       }  \
471       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
473       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475       N = nrow1++ - 1; a->nz++; high1++; \
476       /* shift up all the later entries in this row */ \
477       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479       rp1[_i] = col;  \
480       ap1[_i] = value;  \
481       A->nonzerostate++;\
482       a_noinsert: ; \
483       ailen[row] = nrow1; \
484 }
485 
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487   { \
488     if (col <= lastcol2) low2 = 0;                        \
489     else high2 = nrow2;                                   \
490     lastcol2 = col;                                       \
491     while (high2-low2 > 5) {                              \
492       t = (low2+high2)/2;                                 \
493       if (rp2[t] > col) high2 = t;                        \
494       else             low2  = t;                         \
495     }                                                     \
496     for (_i=low2; _i<high2; _i++) {                       \
497       if (rp2[_i] > col) break;                           \
498       if (rp2[_i] == col) {                               \
499         if (addv == ADD_VALUES) {                         \
500           ap2[_i] += value;                               \
501           (void)PetscLogFlops(1.0);                       \
502         }                                                 \
503         else                    ap2[_i] = value;          \
504         inserted = PETSC_TRUE;                            \
505         goto b_noinsert;                                  \
506       }                                                   \
507     }                                                     \
508     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
510     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512     N = nrow2++ - 1; b->nz++; high2++;                    \
513     /* shift up all the later entries in this row */      \
514     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516     rp2[_i] = col;                                        \
517     ap2[_i] = value;                                      \
518     B->nonzerostate++;                                    \
519     b_noinsert: ;                                         \
520     bilen[row] = nrow2;                                   \
521   }
522 
MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
526   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527   PetscErrorCode ierr;
528   PetscInt       l,*garray = mat->garray,diag;
529 
530   PetscFunctionBegin;
531   /* code only works for square matrices A */
532 
533   /* find size of row to the left of the diagonal part */
534   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535   row  = row - diag;
536   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537     if (garray[b->j[b->i[row]+l]] > diag) break;
538   }
539   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540 
541   /* diagonal part */
542   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543 
544   /* right of diagonal part */
545   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549   PetscFunctionReturn(0);
550 }
551 
MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
555   PetscScalar    value = 0.0;
556   PetscErrorCode ierr;
557   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
558   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559   PetscBool      roworiented = aij->roworiented;
560 
561   /* Some Variables required in the macro */
562   Mat        A                    = aij->A;
563   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
564   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565   MatScalar  *aa                  = a->a;
566   PetscBool  ignorezeroentries    = a->ignorezeroentries;
567   Mat        B                    = aij->B;
568   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
569   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570   MatScalar  *ba                  = b->a;
571   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572    * cannot use "#if defined" inside a macro. */
573   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574 
575   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576   PetscInt  nonew;
577   MatScalar *ap1,*ap2;
578 
579   PetscFunctionBegin;
580   for (i=0; i<m; i++) {
581     if (im[i] < 0) continue;
582     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583     if (im[i] >= rstart && im[i] < rend) {
584       row      = im[i] - rstart;
585       lastcol1 = -1;
586       rp1      = aj + ai[row];
587       ap1      = aa + ai[row];
588       rmax1    = aimax[row];
589       nrow1    = ailen[row];
590       low1     = 0;
591       high1    = nrow1;
592       lastcol2 = -1;
593       rp2      = bj + bi[row];
594       ap2      = ba + bi[row];
595       rmax2    = bimax[row];
596       nrow2    = bilen[row];
597       low2     = 0;
598       high2    = nrow2;
599 
600       for (j=0; j<n; j++) {
601         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
602         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603         if (in[j] >= cstart && in[j] < cend) {
604           col   = in[j] - cstart;
605           nonew = a->nonew;
606           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610         } else if (in[j] < 0) continue;
611         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612         else {
613           if (mat->was_assembled) {
614             if (!aij->colmap) {
615               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616             }
617 #if defined(PETSC_USE_CTABLE)
618             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619             col--;
620 #else
621             col = aij->colmap[in[j]] - 1;
622 #endif
623             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625               col  =  in[j];
626               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627               B        = aij->B;
628               b        = (Mat_SeqAIJ*)B->data;
629               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630               rp2      = bj + bi[row];
631               ap2      = ba + bi[row];
632               rmax2    = bimax[row];
633               nrow2    = bilen[row];
634               low2     = 0;
635               high2    = nrow2;
636               bm       = aij->B->rmap->n;
637               ba       = b->a;
638               inserted = PETSC_FALSE;
639             } else if (col < 0) {
640               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643             }
644           } else col = in[j];
645           nonew = b->nonew;
646           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650         }
651       }
652     } else {
653       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654       if (!aij->donotstash) {
655         mat->assembled = PETSC_FALSE;
656         if (roworiented) {
657           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658         } else {
659           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660         }
661       }
662     }
663   }
664   PetscFunctionReturn(0);
665 }
666 
667 /*
668     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
675   Mat            A           = aij->A; /* diagonal part of the matrix */
676   Mat            B           = aij->B; /* offdiagonal part of the matrix */
677   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
678   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
679   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
680   PetscInt       *ailen      = a->ilen,*aj = a->j;
681   PetscInt       *bilen      = b->ilen,*bj = b->j;
682   PetscInt       am          = aij->A->rmap->n,j;
683   PetscInt       diag_so_far = 0,dnz;
684   PetscInt       offd_so_far = 0,onz;
685 
686   PetscFunctionBegin;
687   /* Iterate over all rows of the matrix */
688   for (j=0; j<am; j++) {
689     dnz = onz = 0;
690     /*  Iterate over all non-zero columns of the current row */
691     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692       /* If column is in the diagonal */
693       if (mat_j[col] >= cstart && mat_j[col] < cend) {
694         aj[diag_so_far++] = mat_j[col] - cstart;
695         dnz++;
696       } else { /* off-diagonal entries */
697         bj[offd_so_far++] = mat_j[col];
698         onz++;
699       }
700     }
701     ailen[j] = dnz;
702     bilen[j] = onz;
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
717   Mat            A      = aij->A; /* diagonal part of the matrix */
718   Mat            B      = aij->B; /* offdiagonal part of the matrix */
719   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
721   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
722   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723   PetscInt       *ailen = a->ilen,*aj = a->j;
724   PetscInt       *bilen = b->ilen,*bj = b->j;
725   PetscInt       am     = aij->A->rmap->n,j;
726   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728   PetscScalar    *aa = a->a,*ba = b->a;
729 
730   PetscFunctionBegin;
731   /* Iterate over all rows of the matrix */
732   for (j=0; j<am; j++) {
733     dnz_row = onz_row = 0;
734     rowstart_offd = full_offd_i[j];
735     rowstart_diag = full_diag_i[j];
736     /*  Iterate over all non-zero columns of the current row */
737     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738       /* If column is in the diagonal */
739       if (mat_j[col] >= cstart && mat_j[col] < cend) {
740         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741         aa[rowstart_diag+dnz_row] = mat_a[col];
742         dnz_row++;
743       } else { /* off-diagonal entries */
744         bj[rowstart_offd+onz_row] = mat_j[col];
745         ba[rowstart_offd+onz_row] = mat_a[col];
746         onz_row++;
747       }
748     }
749     ailen[j] = dnz_row;
750     bilen[j] = onz_row;
751   }
752   PetscFunctionReturn(0);
753 }
754 
MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
758   PetscErrorCode ierr;
759   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761 
762   PetscFunctionBegin;
763   for (i=0; i<m; i++) {
764     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766     if (idxm[i] >= rstart && idxm[i] < rend) {
767       row = idxm[i] - rstart;
768       for (j=0; j<n; j++) {
769         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771         if (idxn[j] >= cstart && idxn[j] < cend) {
772           col  = idxn[j] - cstart;
773           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774         } else {
775           if (!aij->colmap) {
776             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777           }
778 #if defined(PETSC_USE_CTABLE)
779           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780           col--;
781 #else
782           col = aij->colmap[idxn[j]] - 1;
783 #endif
784           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785           else {
786             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787           }
788         }
789       }
790     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791   }
792   PetscFunctionReturn(0);
793 }
794 
MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800 
801   PetscFunctionBegin;
802   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
803 
804   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
805   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
806   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
807   PetscFunctionReturn(0);
808 }
809 
MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
811 {
812   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
813   PetscErrorCode ierr;
814   PetscMPIInt    n;
815   PetscInt       i,j,rstart,ncols,flg;
816   PetscInt       *row,*col;
817   PetscBool      other_disassembled;
818   PetscScalar    *val;
819 
820   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
821 
822   PetscFunctionBegin;
823   if (!aij->donotstash && !mat->nooffprocentries) {
824     while (1) {
825       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
826       if (!flg) break;
827 
828       for (i=0; i<n;) {
829         /* Now identify the consecutive vals belonging to the same row */
830         for (j=i,rstart=row[j]; j<n; j++) {
831           if (row[j] != rstart) break;
832         }
833         if (j < n) ncols = j-i;
834         else       ncols = n-i;
835         /* Now assemble all these values with a single function call */
836         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
837         i    = j;
838       }
839     }
840     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
841   }
842 #if defined(PETSC_HAVE_DEVICE)
843   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
844   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
845   if (mat->boundtocpu) {
846     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
847     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
848   }
849 #endif
850   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
851   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
852 
853   /* determine if any processor has disassembled, if so we must
854      also disassemble ourself, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no processor disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
860     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
861     if (mat->was_assembled && !other_disassembled) {
862 #if defined(PETSC_HAVE_DEVICE)
863       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
864 #endif
865       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
866     }
867   }
868   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
870   }
871   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
872 #if defined(PETSC_HAVE_DEVICE)
873   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
874 #endif
875   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
876   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
877 
878   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
879 
880   aij->rowvalues = NULL;
881 
882   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
883 
884   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
885   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
886     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
887     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
888   }
889 #if defined(PETSC_HAVE_DEVICE)
890   mat->offloadmask = PETSC_OFFLOAD_BOTH;
891 #endif
892   PetscFunctionReturn(0);
893 }
894 
MatZeroEntries_MPIAIJ(Mat A)895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
896 {
897   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
902   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
909   PetscObjectState sA, sB;
910   PetscInt        *lrows;
911   PetscInt         r, len;
912   PetscBool        cong, lch, gch;
913   PetscErrorCode   ierr;
914 
915   PetscFunctionBegin;
916   /* get locally owned rows */
917   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
918   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
919   /* fix right hand side if needed */
920   if (x && b) {
921     const PetscScalar *xx;
922     PetscScalar       *bb;
923 
924     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
925     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
926     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
927     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
928     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
929     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
930   }
931 
932   sA = mat->A->nonzerostate;
933   sB = mat->B->nonzerostate;
934 
935   if (diag != 0.0 && cong) {
936     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
937     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
938   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
939     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
940     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
941     PetscInt   nnwA, nnwB;
942     PetscBool  nnzA, nnzB;
943 
944     nnwA = aijA->nonew;
945     nnwB = aijB->nonew;
946     nnzA = aijA->keepnonzeropattern;
947     nnzB = aijB->keepnonzeropattern;
948     if (!nnzA) {
949       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
950       aijA->nonew = 0;
951     }
952     if (!nnzB) {
953       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
954       aijB->nonew = 0;
955     }
956     /* Must zero here before the next loop */
957     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
959     for (r = 0; r < len; ++r) {
960       const PetscInt row = lrows[r] + A->rmap->rstart;
961       if (row >= A->cmap->N) continue;
962       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
963     }
964     aijA->nonew = nnwA;
965     aijB->nonew = nnwB;
966   } else {
967     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
968     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
969   }
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973 
974   /* reduce nonzerostate */
975   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
976   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977   if (gch) A->nonzerostate++;
978   PetscFunctionReturn(0);
979 }
980 
MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
982 {
983   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode    ierr;
985   PetscMPIInt       n = A->rmap->n;
986   PetscInt          i,j,r,m,len = 0;
987   PetscInt          *lrows,*owners = A->rmap->range;
988   PetscMPIInt       p = 0;
989   PetscSFNode       *rrows;
990   PetscSF           sf;
991   const PetscScalar *xx;
992   PetscScalar       *bb,*mask;
993   Vec               xmask,lmask;
994   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
995   const PetscInt    *aj, *ii,*ridx;
996   PetscScalar       *aa;
997 
998   PetscFunctionBegin;
999   /* Create SF where leaves are input rows and roots are owned rows */
1000   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1001   for (r = 0; r < n; ++r) lrows[r] = -1;
1002   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1003   for (r = 0; r < N; ++r) {
1004     const PetscInt idx   = rows[r];
1005     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1006     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1007       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1008     }
1009     rrows[r].rank  = p;
1010     rrows[r].index = rows[r] - owners[p];
1011   }
1012   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1013   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1014   /* Collect flags for rows to be zeroed */
1015   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1016   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1017   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1018   /* Compress and put in row numbers */
1019   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1020   /* zero diagonal part of matrix */
1021   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1022   /* handle off diagonal part of matrix */
1023   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x && b) { /* this code is buggy when the row and column layout don't match */
1032     PetscBool cong;
1033 
1034     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1035     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1036     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1039     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1040   }
1041   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1042   /* remove zeroed rows of off diagonal matrix */
1043   ii = aij->i;
1044   for (i=0; i<len; i++) {
1045     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1046   }
1047   /* loop over all elements of off process part of matrix zeroing removed columns*/
1048   if (aij->compressedrow.use) {
1049     m    = aij->compressedrow.nrows;
1050     ii   = aij->compressedrow.i;
1051     ridx = aij->compressedrow.rindex;
1052     for (i=0; i<m; i++) {
1053       n  = ii[i+1] - ii[i];
1054       aj = aij->j + ii[i];
1055       aa = aij->a + ii[i];
1056 
1057       for (j=0; j<n; j++) {
1058         if (PetscAbsScalar(mask[*aj])) {
1059           if (b) bb[*ridx] -= *aa*xx[*aj];
1060           *aa = 0.0;
1061         }
1062         aa++;
1063         aj++;
1064       }
1065       ridx++;
1066     }
1067   } else { /* do not use compressed row format */
1068     m = l->B->rmap->n;
1069     for (i=0; i<m; i++) {
1070       n  = ii[i+1] - ii[i];
1071       aj = aij->j + ii[i];
1072       aa = aij->a + ii[i];
1073       for (j=0; j<n; j++) {
1074         if (PetscAbsScalar(mask[*aj])) {
1075           if (b) bb[i] -= *aa*xx[*aj];
1076           *aa = 0.0;
1077         }
1078         aa++;
1079         aj++;
1080       }
1081     }
1082   }
1083   if (x && b) {
1084     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1085     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1086   }
1087   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1088   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1089   ierr = PetscFree(lrows);CHKERRQ(ierr);
1090 
1091   /* only change matrix nonzero state if pattern was allowed to be changed */
1092   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1093     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1094     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103   PetscInt       nt;
1104   VecScatter     Mvctx = a->Mvctx;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1109   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127 {
1128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1129   PetscErrorCode ierr;
1130   VecScatter     Mvctx = a->Mvctx;
1131 
1132   PetscFunctionBegin;
1133   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1134   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1136   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)1141 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1142 {
1143   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   /* do nondiagonal part */
1148   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1149   /* do local part */
1150   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1151   /* add partial results together */
1152   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool * f)1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1158 {
1159   MPI_Comm       comm;
1160   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1161   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162   IS             Me,Notme;
1163   PetscErrorCode ierr;
1164   PetscInt       M,N,first,last,*notme,i;
1165   PetscBool      lf;
1166   PetscMPIInt    size;
1167 
1168   PetscFunctionBegin;
1169   /* Easy test: symmetric diagonal block */
1170   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1171   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1172   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1173   if (!*f) PetscFunctionReturn(0);
1174   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1176   if (size == 1) PetscFunctionReturn(0);
1177 
1178   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1179   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1180   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1181   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1182   for (i=0; i<first; i++) notme[i] = i;
1183   for (i=last; i<M; i++) notme[i-last+first] = i;
1184   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1185   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1186   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1187   Aoff = Aoffs[0];
1188   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1189   Boff = Boffs[0];
1190   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1191   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1192   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1193   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1194   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1195   ierr = PetscFree(notme);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool * f)1199 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1200 {
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)1208 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209 {
1210   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   /* do nondiagonal part */
1215   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1216   /* do local part */
1217   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1218   /* add partial results together */
1219   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 /*
1225   This only works correctly for square matrices where the subblock A->A is the
1226    diagonal block
1227 */
MatGetDiagonal_MPIAIJ(Mat A,Vec v)1228 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1229 {
1230   PetscErrorCode ierr;
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232 
1233   PetscFunctionBegin;
1234   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1235   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1236   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
MatScale_MPIAIJ(Mat A,PetscScalar aa)1240 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1241 {
1242   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1247   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
MatDestroy_MPIAIJ(Mat mat)1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1254   PetscErrorCode ierr;
1255 
1256   PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined(PETSC_USE_CTABLE)
1265   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1273   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1274   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1275   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1276 
1277   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1278   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1279 
1280   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1290 #if defined(PETSC_HAVE_CUDA)
1291   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1292 #endif
1293 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1295 #endif
1296 #if defined(PETSC_HAVE_ELEMENTAL)
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1298 #endif
1299 #if defined(PETSC_HAVE_SCALAPACK)
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1301 #endif
1302 #if defined(PETSC_HAVE_HYPRE)
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1305 #endif
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1312 #if defined(PETSC_HAVE_MKL_SPARSE)
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1314 #endif
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)1321 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1322 {
1323   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1324   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1325   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1326   const PetscInt    *garray = aij->garray;
1327   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1328   PetscInt          *rowlens;
1329   PetscInt          *colidxs;
1330   PetscScalar       *matvals;
1331   PetscErrorCode    ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1335 
1336   M  = mat->rmap->N;
1337   N  = mat->cmap->N;
1338   m  = mat->rmap->n;
1339   rs = mat->rmap->rstart;
1340   cs = mat->cmap->rstart;
1341   nz = A->nz + B->nz;
1342 
1343   /* write matrix header */
1344   header[0] = MAT_FILE_CLASSID;
1345   header[1] = M; header[2] = N; header[3] = nz;
1346   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1348 
1349   /* fill in and store row lengths  */
1350   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1351   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1353   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1354 
1355   /* fill in and store column indices */
1356   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1357   for (cnt=0, i=0; i<m; i++) {
1358     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1359       if (garray[B->j[jb]] > cs) break;
1360       colidxs[cnt++] = garray[B->j[jb]];
1361     }
1362     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1363       colidxs[cnt++] = A->j[ja] + cs;
1364     for (; jb<B->i[i+1]; jb++)
1365       colidxs[cnt++] = garray[B->j[jb]];
1366   }
1367   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1368   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1369   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1370 
1371   /* fill in and store nonzero values */
1372   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1373   for (cnt=0, i=0; i<m; i++) {
1374     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1375       if (garray[B->j[jb]] > cs) break;
1376       matvals[cnt++] = B->a[jb];
1377     }
1378     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1379       matvals[cnt++] = A->a[ja];
1380     for (; jb<B->i[i+1]; jb++)
1381       matvals[cnt++] = B->a[jb];
1382   }
1383   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1384   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1385   ierr = PetscFree(matvals);CHKERRQ(ierr);
1386 
1387   /* write block size option to the viewer's .info file */
1388   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #include <petscdraw.h>
MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)1393 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1394 {
1395   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1396   PetscErrorCode    ierr;
1397   PetscMPIInt       rank = aij->rank,size = aij->size;
1398   PetscBool         isdraw,iascii,isbinary;
1399   PetscViewer       sviewer;
1400   PetscViewerFormat format;
1401 
1402   PetscFunctionBegin;
1403   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1404   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1405   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1406   if (iascii) {
1407     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1408     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1409       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1410       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1411       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1412       for (i=0; i<(PetscInt)size; i++) {
1413         nmax = PetscMax(nmax,nz[i]);
1414         nmin = PetscMin(nmin,nz[i]);
1415         navg += nz[i];
1416       }
1417       ierr = PetscFree(nz);CHKERRQ(ierr);
1418       navg = navg/size;
1419       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1420       PetscFunctionReturn(0);
1421     }
1422     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1423     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1424       MatInfo   info;
1425       PetscBool inodes;
1426 
1427       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1428       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1429       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1431       if (!inodes) {
1432         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1433                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1434       } else {
1435         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1436                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1437       }
1438       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1439       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1440       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1441       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1442       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1443       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1444       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1445       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1446       PetscFunctionReturn(0);
1447     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1448       PetscInt inodecount,inodelimit,*inodes;
1449       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1450       if (inodes) {
1451         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1454       }
1455       PetscFunctionReturn(0);
1456     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1457       PetscFunctionReturn(0);
1458     }
1459   } else if (isbinary) {
1460     if (size == 1) {
1461       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1462       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1463     } else {
1464       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1465     }
1466     PetscFunctionReturn(0);
1467   } else if (iascii && size == 1) {
1468     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1469     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1470     PetscFunctionReturn(0);
1471   } else if (isdraw) {
1472     PetscDraw draw;
1473     PetscBool isnull;
1474     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1475     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1476     if (isnull) PetscFunctionReturn(0);
1477   }
1478 
1479   { /* assemble the entire matrix onto first processor */
1480     Mat A = NULL, Av;
1481     IS  isrow,iscol;
1482 
1483     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1484     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1485     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1486     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1487 /*  The commented code uses MatCreateSubMatrices instead */
1488 /*
1489     Mat *AA, A = NULL, Av;
1490     IS  isrow,iscol;
1491 
1492     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1493     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1494     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1495     if (!rank) {
1496        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1497        A    = AA[0];
1498        Av   = AA[0];
1499     }
1500     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1501 */
1502     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1503     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1504     /*
1505        Everyone has to call to draw the matrix since the graphics waits are
1506        synchronized across all processors that share the PetscDraw object
1507     */
1508     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1509     if (!rank) {
1510       if (((PetscObject)mat)->name) {
1511         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1512       }
1513       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1514     }
1515     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1516     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1517     ierr = MatDestroy(&A);CHKERRQ(ierr);
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
MatView_MPIAIJ(Mat mat,PetscViewer viewer)1522 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1523 {
1524   PetscErrorCode ierr;
1525   PetscBool      iascii,isdraw,issocket,isbinary;
1526 
1527   PetscFunctionBegin;
1528   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1529   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1530   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1531   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1532   if (iascii || isdraw || isbinary || issocket) {
1533     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)1538 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1539 {
1540   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1541   PetscErrorCode ierr;
1542   Vec            bb1 = NULL;
1543   PetscBool      hasop;
1544 
1545   PetscFunctionBegin;
1546   if (flag == SOR_APPLY_UPPER) {
1547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1548     PetscFunctionReturn(0);
1549   }
1550 
1551   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1552     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1553   }
1554 
1555   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1556     if (flag & SOR_ZERO_INITIAL_GUESS) {
1557       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1558       its--;
1559     }
1560 
1561     while (its--) {
1562       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564 
1565       /* update rhs: bb1 = bb - B*x */
1566       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1567       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1568 
1569       /* local sweep */
1570       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1571     }
1572   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1573     if (flag & SOR_ZERO_INITIAL_GUESS) {
1574       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1575       its--;
1576     }
1577     while (its--) {
1578       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580 
1581       /* update rhs: bb1 = bb - B*x */
1582       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1583       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1584 
1585       /* local sweep */
1586       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1587     }
1588   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1589     if (flag & SOR_ZERO_INITIAL_GUESS) {
1590       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1591       its--;
1592     }
1593     while (its--) {
1594       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1595       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596 
1597       /* update rhs: bb1 = bb - B*x */
1598       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1599       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1600 
1601       /* local sweep */
1602       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1603     }
1604   } else if (flag & SOR_EISENSTAT) {
1605     Vec xx1;
1606 
1607     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1608     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1609 
1610     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1611     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612     if (!mat->diag) {
1613       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1614       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1615     }
1616     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1617     if (hasop) {
1618       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1619     } else {
1620       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1621     }
1622     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1623 
1624     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1625 
1626     /* local sweep */
1627     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1628     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1629     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1630   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1631 
1632   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1633 
1634   matin->factorerrortype = mat->A->factorerrortype;
1635   PetscFunctionReturn(0);
1636 }
1637 
MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat * B)1638 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1639 {
1640   Mat            aA,aB,Aperm;
1641   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1642   PetscScalar    *aa,*ba;
1643   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1644   PetscSF        rowsf,sf;
1645   IS             parcolp = NULL;
1646   PetscBool      done;
1647   PetscErrorCode ierr;
1648 
1649   PetscFunctionBegin;
1650   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1651   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1652   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1653   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1654 
1655   /* Invert row permutation to find out where my rows should go */
1656   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1657   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1658   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1659   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1660   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1661   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1662 
1663   /* Invert column permutation to find out where my columns should go */
1664   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1665   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1666   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1667   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1668   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1669   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1670   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1671 
1672   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1673   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1674   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1675 
1676   /* Find out where my gcols should go */
1677   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1678   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1679   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1680   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1681   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1682   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1683   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1684   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1685 
1686   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1687   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1688   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1689   for (i=0; i<m; i++) {
1690     PetscInt    row = rdest[i];
1691     PetscMPIInt rowner;
1692     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1693     for (j=ai[i]; j<ai[i+1]; j++) {
1694       PetscInt    col = cdest[aj[j]];
1695       PetscMPIInt cowner;
1696       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1697       if (rowner == cowner) dnnz[i]++;
1698       else onnz[i]++;
1699     }
1700     for (j=bi[i]; j<bi[i+1]; j++) {
1701       PetscInt    col = gcdest[bj[j]];
1702       PetscMPIInt cowner;
1703       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1704       if (rowner == cowner) dnnz[i]++;
1705       else onnz[i]++;
1706     }
1707   }
1708   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1709   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1710   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1711   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1712   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1713 
1714   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1715   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1716   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1717   for (i=0; i<m; i++) {
1718     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1719     PetscInt j0,rowlen;
1720     rowlen = ai[i+1] - ai[i];
1721     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1722       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1723       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1724     }
1725     rowlen = bi[i+1] - bi[i];
1726     for (j0=j=0; j<rowlen; j0=j) {
1727       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1728       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1729     }
1730   }
1731   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1732   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1733   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1734   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1735   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1736   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1737   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1738   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1739   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1740   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1741   *B = Aperm;
1742   PetscFunctionReturn(0);
1743 }
1744 
MatGetGhosts_MPIAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])1745 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1746 {
1747   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1752   if (ghosts) *ghosts = aij->garray;
1753   PetscFunctionReturn(0);
1754 }
1755 
MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo * info)1756 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1757 {
1758   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1759   Mat            A    = mat->A,B = mat->B;
1760   PetscErrorCode ierr;
1761   PetscLogDouble isend[5],irecv[5];
1762 
1763   PetscFunctionBegin;
1764   info->block_size = 1.0;
1765   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1766 
1767   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1768   isend[3] = info->memory;  isend[4] = info->mallocs;
1769 
1770   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1771 
1772   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1773   isend[3] += info->memory;  isend[4] += info->mallocs;
1774   if (flag == MAT_LOCAL) {
1775     info->nz_used      = isend[0];
1776     info->nz_allocated = isend[1];
1777     info->nz_unneeded  = isend[2];
1778     info->memory       = isend[3];
1779     info->mallocs      = isend[4];
1780   } else if (flag == MAT_GLOBAL_MAX) {
1781     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1782 
1783     info->nz_used      = irecv[0];
1784     info->nz_allocated = irecv[1];
1785     info->nz_unneeded  = irecv[2];
1786     info->memory       = irecv[3];
1787     info->mallocs      = irecv[4];
1788   } else if (flag == MAT_GLOBAL_SUM) {
1789     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1790 
1791     info->nz_used      = irecv[0];
1792     info->nz_allocated = irecv[1];
1793     info->nz_unneeded  = irecv[2];
1794     info->memory       = irecv[3];
1795     info->mallocs      = irecv[4];
1796   }
1797   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1798   info->fill_ratio_needed = 0;
1799   info->factor_mallocs    = 0;
1800   PetscFunctionReturn(0);
1801 }
1802 
MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)1803 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1804 {
1805   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1806   PetscErrorCode ierr;
1807 
1808   PetscFunctionBegin;
1809   switch (op) {
1810   case MAT_NEW_NONZERO_LOCATIONS:
1811   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1812   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1813   case MAT_KEEP_NONZERO_PATTERN:
1814   case MAT_NEW_NONZERO_LOCATION_ERR:
1815   case MAT_USE_INODES:
1816   case MAT_IGNORE_ZERO_ENTRIES:
1817     MatCheckPreallocated(A,1);
1818     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_ROW_ORIENTED:
1822     MatCheckPreallocated(A,1);
1823     a->roworiented = flg;
1824 
1825     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1827     break;
1828   case MAT_NEW_DIAGONALS:
1829   case MAT_SORTED_FULL:
1830     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1831     break;
1832   case MAT_IGNORE_OFF_PROC_ENTRIES:
1833     a->donotstash = flg;
1834     break;
1835   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1836   case MAT_SPD:
1837   case MAT_SYMMETRIC:
1838   case MAT_STRUCTURALLY_SYMMETRIC:
1839   case MAT_HERMITIAN:
1840   case MAT_SYMMETRY_ETERNAL:
1841     break;
1842   case MAT_SUBMAT_SINGLEIS:
1843     A->submat_singleis = flg;
1844     break;
1845   case MAT_STRUCTURE_ONLY:
1846     /* The option is handled directly by MatSetOption() */
1847     break;
1848   default:
1849     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1854 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1855 {
1856   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1857   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1858   PetscErrorCode ierr;
1859   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1860   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1861   PetscInt       *cmap,*idx_p;
1862 
1863   PetscFunctionBegin;
1864   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1865   mat->getrowactive = PETSC_TRUE;
1866 
1867   if (!mat->rowvalues && (idx || v)) {
1868     /*
1869         allocate enough space to hold information from the longest row.
1870     */
1871     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1872     PetscInt   max = 1,tmp;
1873     for (i=0; i<matin->rmap->n; i++) {
1874       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1875       if (max < tmp) max = tmp;
1876     }
1877     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1878   }
1879 
1880   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1881   lrow = row - rstart;
1882 
1883   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1884   if (!v)   {pvA = NULL; pvB = NULL;}
1885   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1886   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1887   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1888   nztot = nzA + nzB;
1889 
1890   cmap = mat->garray;
1891   if (v  || idx) {
1892     if (nztot) {
1893       /* Sort by increasing column numbers, assuming A and B already sorted */
1894       PetscInt imark = -1;
1895       if (v) {
1896         *v = v_p = mat->rowvalues;
1897         for (i=0; i<nzB; i++) {
1898           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1899           else break;
1900         }
1901         imark = i;
1902         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1903         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1904       }
1905       if (idx) {
1906         *idx = idx_p = mat->rowindices;
1907         if (imark > -1) {
1908           for (i=0; i<imark; i++) {
1909             idx_p[i] = cmap[cworkB[i]];
1910           }
1911         } else {
1912           for (i=0; i<nzB; i++) {
1913             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1914             else break;
1915           }
1916           imark = i;
1917         }
1918         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1919         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1920       }
1921     } else {
1922       if (idx) *idx = NULL;
1923       if (v)   *v   = NULL;
1924     }
1925   }
1926   *nz  = nztot;
1927   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1928   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1933 {
1934   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1935 
1936   PetscFunctionBegin;
1937   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1938   aij->getrowactive = PETSC_FALSE;
1939   PetscFunctionReturn(0);
1940 }
1941 
MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal * norm)1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1943 {
1944   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1945   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1946   PetscErrorCode ierr;
1947   PetscInt       i,j,cstart = mat->cmap->rstart;
1948   PetscReal      sum = 0.0;
1949   MatScalar      *v;
1950 
1951   PetscFunctionBegin;
1952   if (aij->size == 1) {
1953     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1954   } else {
1955     if (type == NORM_FROBENIUS) {
1956       v = amat->a;
1957       for (i=0; i<amat->nz; i++) {
1958         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959       }
1960       v = bmat->a;
1961       for (i=0; i<bmat->nz; i++) {
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963       }
1964       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1965       *norm = PetscSqrtReal(*norm);
1966       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1967     } else if (type == NORM_1) { /* max column norm */
1968       PetscReal *tmp,*tmp2;
1969       PetscInt  *jj,*garray = aij->garray;
1970       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1971       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1972       *norm = 0.0;
1973       v     = amat->a; jj = amat->j;
1974       for (j=0; j<amat->nz; j++) {
1975         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1976       }
1977       v = bmat->a; jj = bmat->j;
1978       for (j=0; j<bmat->nz; j++) {
1979         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1980       }
1981       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1982       for (j=0; j<mat->cmap->N; j++) {
1983         if (tmp2[j] > *norm) *norm = tmp2[j];
1984       }
1985       ierr = PetscFree(tmp);CHKERRQ(ierr);
1986       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1987       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1988     } else if (type == NORM_INFINITY) { /* max row norm */
1989       PetscReal ntemp = 0.0;
1990       for (j=0; j<aij->A->rmap->n; j++) {
1991         v   = amat->a + amat->i[j];
1992         sum = 0.0;
1993         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1994           sum += PetscAbsScalar(*v); v++;
1995         }
1996         v = bmat->a + bmat->i[j];
1997         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1998           sum += PetscAbsScalar(*v); v++;
1999         }
2000         if (sum > ntemp) ntemp = sum;
2001       }
2002       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2003       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2004     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2005   }
2006   PetscFunctionReturn(0);
2007 }
2008 
MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat * matout)2009 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2010 {
2011   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
2012   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2013   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2014   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
2015   PetscErrorCode  ierr;
2016   Mat             B,A_diag,*B_diag;
2017   const MatScalar *array;
2018 
2019   PetscFunctionBegin;
2020   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2021   ai = Aloc->i; aj = Aloc->j;
2022   bi = Bloc->i; bj = Bloc->j;
2023   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2024     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2025     PetscSFNode          *oloc;
2026     PETSC_UNUSED PetscSF sf;
2027 
2028     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2029     /* compute d_nnz for preallocation */
2030     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2031     for (i=0; i<ai[ma]; i++) {
2032       d_nnz[aj[i]]++;
2033     }
2034     /* compute local off-diagonal contributions */
2035     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2036     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2037     /* map those to global */
2038     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2039     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2040     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2041     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2042     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2043     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2044     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2045 
2046     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2047     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2048     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2049     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2050     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2051     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2052   } else {
2053     B    = *matout;
2054     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2055   }
2056 
2057   b           = (Mat_MPIAIJ*)B->data;
2058   A_diag      = a->A;
2059   B_diag      = &b->A;
2060   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
2061   A_diag_ncol = A_diag->cmap->N;
2062   B_diag_ilen = sub_B_diag->ilen;
2063   B_diag_i    = sub_B_diag->i;
2064 
2065   /* Set ilen for diagonal of B */
2066   for (i=0; i<A_diag_ncol; i++) {
2067     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2068   }
2069 
2070   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2071   very quickly (=without using MatSetValues), because all writes are local. */
2072   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2073 
2074   /* copy over the B part */
2075   ierr  = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2076   array = Bloc->a;
2077   row   = A->rmap->rstart;
2078   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2079   cols_tmp = cols;
2080   for (i=0; i<mb; i++) {
2081     ncol = bi[i+1]-bi[i];
2082     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2083     row++;
2084     array += ncol; cols_tmp += ncol;
2085   }
2086   ierr = PetscFree(cols);CHKERRQ(ierr);
2087 
2088   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2091     *matout = B;
2092   } else {
2093     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2094   }
2095   PetscFunctionReturn(0);
2096 }
2097 
MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)2098 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2099 {
2100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2101   Mat            a    = aij->A,b = aij->B;
2102   PetscErrorCode ierr;
2103   PetscInt       s1,s2,s3;
2104 
2105   PetscFunctionBegin;
2106   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2107   if (rr) {
2108     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2109     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2110     /* Overlap communication with computation. */
2111     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112   }
2113   if (ll) {
2114     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2115     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2116     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2117   }
2118   /* scale  the diagonal block */
2119   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2120 
2121   if (rr) {
2122     /* Do a scatter end and then right scale the off-diagonal block */
2123     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2124     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2125   }
2126   PetscFunctionReturn(0);
2127 }
2128 
MatSetUnfactored_MPIAIJ(Mat A)2129 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2130 {
2131   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
MatEqual_MPIAIJ(Mat A,Mat B,PetscBool * flag)2139 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2140 {
2141   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2142   Mat            a,b,c,d;
2143   PetscBool      flg;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   a = matA->A; b = matA->B;
2148   c = matB->A; d = matB->B;
2149 
2150   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2151   if (flg) {
2152     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2153   }
2154   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2155   PetscFunctionReturn(0);
2156 }
2157 
MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2159 {
2160   PetscErrorCode ierr;
2161   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2162   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2163 
2164   PetscFunctionBegin;
2165   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2166   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2167     /* because of the column compression in the off-processor part of the matrix a->B,
2168        the number of columns in a->B and b->B may be different, hence we cannot call
2169        the MatCopy() directly on the two parts. If need be, we can provide a more
2170        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2171        then copying the submatrices */
2172     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2173   } else {
2174     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2175     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2176   }
2177   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
MatSetUp_MPIAIJ(Mat A)2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2182 {
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 /*
2191    Computes the number of nonzeros per row needed for preallocation when X and Y
2192    have different nonzero structure.
2193 */
MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt * xi,const PetscInt * xj,const PetscInt * xltog,const PetscInt * yi,const PetscInt * yj,const PetscInt * yltog,PetscInt * nnz)2194 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2195 {
2196   PetscInt       i,j,k,nzx,nzy;
2197 
2198   PetscFunctionBegin;
2199   /* Set the number of nonzeros in the new matrix */
2200   for (i=0; i<m; i++) {
2201     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2202     nzx = xi[i+1] - xi[i];
2203     nzy = yi[i+1] - yi[i];
2204     nnz[i] = 0;
2205     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2206       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2207       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2208       nnz[i]++;
2209     }
2210     for (; k<nzy; k++) nnz[i]++;
2211   }
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)2216 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2217 {
2218   PetscErrorCode ierr;
2219   PetscInt       m = Y->rmap->N;
2220   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2221   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2222 
2223   PetscFunctionBegin;
2224   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2225   PetscFunctionReturn(0);
2226 }
2227 
MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2228 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2229 {
2230   PetscErrorCode ierr;
2231   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2232   PetscBLASInt   bnz,one=1;
2233   Mat_SeqAIJ     *x,*y;
2234 
2235   PetscFunctionBegin;
2236   if (str == SAME_NONZERO_PATTERN) {
2237     PetscScalar alpha = a;
2238     x    = (Mat_SeqAIJ*)xx->A->data;
2239     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2240     y    = (Mat_SeqAIJ*)yy->A->data;
2241     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2242     x    = (Mat_SeqAIJ*)xx->B->data;
2243     y    = (Mat_SeqAIJ*)yy->B->data;
2244     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2245     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2246     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2247     /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2248        will be updated */
2249 #if defined(PETSC_HAVE_DEVICE)
2250     if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2251       Y->offloadmask = PETSC_OFFLOAD_CPU;
2252     }
2253 #endif
2254   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2255     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2256   } else {
2257     Mat      B;
2258     PetscInt *nnz_d,*nnz_o;
2259     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2260     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2261     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2262     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2263     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2264     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2265     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2266     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2267     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2268     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2269     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2270     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2271     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2272   }
2273   PetscFunctionReturn(0);
2274 }
2275 
2276 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2277 
MatConjugate_MPIAIJ(Mat mat)2278 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2279 {
2280 #if defined(PETSC_USE_COMPLEX)
2281   PetscErrorCode ierr;
2282   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2283 
2284   PetscFunctionBegin;
2285   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2286   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2287 #else
2288   PetscFunctionBegin;
2289 #endif
2290   PetscFunctionReturn(0);
2291 }
2292 
MatRealPart_MPIAIJ(Mat A)2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2300   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
MatImaginaryPart_MPIAIJ(Mat A)2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305 {
2306   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2311   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2316 {
2317   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2318   PetscErrorCode    ierr;
2319   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2320   PetscScalar       *va,*vv;
2321   Vec               vB,vA;
2322   const PetscScalar *vb;
2323 
2324   PetscFunctionBegin;
2325   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2326   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2327 
2328   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2329   if (idx) {
2330     for (i=0; i<m; i++) {
2331       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2332     }
2333   }
2334 
2335   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2336   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2337   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2338 
2339   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2340   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2341   for (i=0; i<m; i++) {
2342     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2343       vv[i] = vb[i];
2344       if (idx) idx[i] = a->garray[idxb[i]];
2345     } else {
2346       vv[i] = va[i];
2347       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2348         idx[i] = a->garray[idxb[i]];
2349     }
2350   }
2351   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2352   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2353   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2354   ierr = PetscFree(idxb);CHKERRQ(ierr);
2355   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2356   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2357   PetscFunctionReturn(0);
2358 }
2359 
MatGetRowMinAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])2360 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361 {
2362   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2363   PetscInt       m = A->rmap->n,n = A->cmap->n;
2364   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2365   PetscInt       *cmap  = mat->garray;
2366   PetscInt       *diagIdx, *offdiagIdx;
2367   Vec            diagV, offdiagV;
2368   PetscScalar    *a, *diagA, *offdiagA, *ba;
2369   PetscInt       r,j,col,ncols,*bi,*bj;
2370   PetscErrorCode ierr;
2371   Mat            B = mat->B;
2372   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2373 
2374   PetscFunctionBegin;
2375   /* When a process holds entire A and other processes have no entry */
2376   if (A->cmap->N == n) {
2377     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2378     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2379     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2380     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2381     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2382     PetscFunctionReturn(0);
2383   } else if (n == 0) {
2384     if (m) {
2385       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2386       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2387       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2388     }
2389     PetscFunctionReturn(0);
2390   }
2391 
2392   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2393   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2394   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2395   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2396 
2397   /* Get offdiagIdx[] for implicit 0.0 */
2398   ba = b->a;
2399   bi = b->i;
2400   bj = b->j;
2401   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2402   for (r = 0; r < m; r++) {
2403     ncols = bi[r+1] - bi[r];
2404     if (ncols == A->cmap->N - n) { /* Brow is dense */
2405       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2406     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2407       offdiagA[r] = 0.0;
2408 
2409       /* Find first hole in the cmap */
2410       for (j=0; j<ncols; j++) {
2411         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2412         if (col > j && j < cstart) {
2413           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2414           break;
2415         } else if (col > j + n && j >= cstart) {
2416           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2417           break;
2418         }
2419       }
2420       if (j == ncols && ncols < A->cmap->N - n) {
2421         /* a hole is outside compressed Bcols */
2422         if (ncols == 0) {
2423           if (cstart) {
2424             offdiagIdx[r] = 0;
2425           } else offdiagIdx[r] = cend;
2426         } else { /* ncols > 0 */
2427           offdiagIdx[r] = cmap[ncols-1] + 1;
2428           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2429         }
2430       }
2431     }
2432 
2433     for (j=0; j<ncols; j++) {
2434       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2435       ba++; bj++;
2436     }
2437   }
2438 
2439   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2440   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2441   for (r = 0; r < m; ++r) {
2442     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2443       a[r]   = diagA[r];
2444       if (idx) idx[r] = cstart + diagIdx[r];
2445     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2446       a[r] = diagA[r];
2447       if (idx) {
2448         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2449           idx[r] = cstart + diagIdx[r];
2450         } else idx[r] = offdiagIdx[r];
2451       }
2452     } else {
2453       a[r]   = offdiagA[r];
2454       if (idx) idx[r] = offdiagIdx[r];
2455     }
2456   }
2457   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2458   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2459   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2460   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2461   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2462   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])2466 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2467 {
2468   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2469   PetscInt       m = A->rmap->n,n = A->cmap->n;
2470   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2471   PetscInt       *cmap  = mat->garray;
2472   PetscInt       *diagIdx, *offdiagIdx;
2473   Vec            diagV, offdiagV;
2474   PetscScalar    *a, *diagA, *offdiagA, *ba;
2475   PetscInt       r,j,col,ncols,*bi,*bj;
2476   PetscErrorCode ierr;
2477   Mat            B = mat->B;
2478   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2479 
2480   PetscFunctionBegin;
2481   /* When a process holds entire A and other processes have no entry */
2482   if (A->cmap->N == n) {
2483     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2484     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2485     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2486     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2487     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2488     PetscFunctionReturn(0);
2489   } else if (n == 0) {
2490     if (m) {
2491       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2492       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2493       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2494     }
2495     PetscFunctionReturn(0);
2496   }
2497 
2498   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2499   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2500   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2501   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2502 
2503   /* Get offdiagIdx[] for implicit 0.0 */
2504   ba = b->a;
2505   bi = b->i;
2506   bj = b->j;
2507   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r+1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2512     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2513       offdiagA[r] = 0.0;
2514 
2515       /* Find first hole in the cmap */
2516       for (j=0; j<ncols; j++) {
2517         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2518         if (col > j && j < cstart) {
2519           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2520           break;
2521         } else if (col > j + n && j >= cstart) {
2522           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2523           break;
2524         }
2525       }
2526       if (j == ncols && ncols < A->cmap->N - n) {
2527         /* a hole is outside compressed Bcols */
2528         if (ncols == 0) {
2529           if (cstart) {
2530             offdiagIdx[r] = 0;
2531           } else offdiagIdx[r] = cend;
2532         } else { /* ncols > 0 */
2533           offdiagIdx[r] = cmap[ncols-1] + 1;
2534           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2535         }
2536       }
2537     }
2538 
2539     for (j=0; j<ncols; j++) {
2540       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2541       ba++; bj++;
2542     }
2543   }
2544 
2545   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2546   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2547   for (r = 0; r < m; ++r) {
2548     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2549       a[r]   = diagA[r];
2550       if (idx) idx[r] = cstart + diagIdx[r];
2551     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2552       a[r] = diagA[r];
2553       if (idx) {
2554         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2555           idx[r] = cstart + diagIdx[r];
2556         } else idx[r] = offdiagIdx[r];
2557       }
2558     } else {
2559       a[r]   = offdiagA[r];
2560       if (idx) idx[r] = offdiagIdx[r];
2561     }
2562   }
2563   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2564   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2565   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2566   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2567   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2568   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2569   PetscFunctionReturn(0);
2570 }
2571 
MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])2572 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2573 {
2574   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*)A->data;
2575   PetscInt       m = A->rmap->n,n = A->cmap->n;
2576   PetscInt       cstart = A->cmap->rstart,cend = A->cmap->rend;
2577   PetscInt       *cmap  = mat->garray;
2578   PetscInt       *diagIdx, *offdiagIdx;
2579   Vec            diagV, offdiagV;
2580   PetscScalar    *a, *diagA, *offdiagA, *ba;
2581   PetscInt       r,j,col,ncols,*bi,*bj;
2582   PetscErrorCode ierr;
2583   Mat            B = mat->B;
2584   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
2585 
2586   PetscFunctionBegin;
2587   /* When a process holds entire A and other processes have no entry */
2588   if (A->cmap->N == n) {
2589     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2590     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2591     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2592     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2593     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2594     PetscFunctionReturn(0);
2595   } else if (n == 0) {
2596     if (m) {
2597       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2598       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2599       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2600     }
2601     PetscFunctionReturn(0);
2602   }
2603 
2604   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2605   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2606   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2607   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2608 
2609   /* Get offdiagIdx[] for implicit 0.0 */
2610   ba = b->a;
2611   bi = b->i;
2612   bj = b->j;
2613   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2614   for (r = 0; r < m; r++) {
2615     ncols = bi[r+1] - bi[r];
2616     if (ncols == A->cmap->N - n) { /* Brow is dense */
2617       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2618     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2619       offdiagA[r] = 0.0;
2620 
2621       /* Find first hole in the cmap */
2622       for (j=0; j<ncols; j++) {
2623         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2624         if (col > j && j < cstart) {
2625           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2626           break;
2627         } else if (col > j + n && j >= cstart) {
2628           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2629           break;
2630         }
2631       }
2632       if (j == ncols && ncols < A->cmap->N - n) {
2633         /* a hole is outside compressed Bcols */
2634         if (ncols == 0) {
2635           if (cstart) {
2636             offdiagIdx[r] = 0;
2637           } else offdiagIdx[r] = cend;
2638         } else { /* ncols > 0 */
2639           offdiagIdx[r] = cmap[ncols-1] + 1;
2640           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2641         }
2642       }
2643     }
2644 
2645     for (j=0; j<ncols; j++) {
2646       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2647       ba++; bj++;
2648     }
2649   }
2650 
2651   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2652   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2653   for (r = 0; r < m; ++r) {
2654     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2655       a[r] = diagA[r];
2656       if (idx) idx[r] = cstart + diagIdx[r];
2657     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2658       a[r] = diagA[r];
2659       if (idx) {
2660         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2661           idx[r] = cstart + diagIdx[r];
2662         } else idx[r] = offdiagIdx[r];
2663       }
2664     } else {
2665       a[r] = offdiagA[r];
2666       if (idx) idx[r] = offdiagIdx[r];
2667     }
2668   }
2669   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2670   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2671   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2672   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2673   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2674   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2675   PetscFunctionReturn(0);
2676 }
2677 
MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat * newmat)2678 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2679 {
2680   PetscErrorCode ierr;
2681   Mat            *dummy;
2682 
2683   PetscFunctionBegin;
2684   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2685   *newmat = *dummy;
2686   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar ** values)2690 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2691 {
2692   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2693   PetscErrorCode ierr;
2694 
2695   PetscFunctionBegin;
2696   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2697   A->factorerrortype = a->A->factorerrortype;
2698   PetscFunctionReturn(0);
2699 }
2700 
MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)2701 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2702 {
2703   PetscErrorCode ierr;
2704   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2705 
2706   PetscFunctionBegin;
2707   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2708   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2709   if (x->assembled) {
2710     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2711   } else {
2712     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2713   }
2714   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2715   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2716   PetscFunctionReturn(0);
2717 }
2718 
MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)2719 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2720 {
2721   PetscFunctionBegin;
2722   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2723   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 /*@
2728    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2729 
2730    Collective on Mat
2731 
2732    Input Parameters:
2733 +    A - the matrix
2734 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2735 
2736  Level: advanced
2737 
2738 @*/
MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)2739 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2740 {
2741   PetscErrorCode       ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
MatSetFromOptions_MPIAIJ(PetscOptionItems * PetscOptionsObject,Mat A)2748 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2749 {
2750   PetscErrorCode       ierr;
2751   PetscBool            sc = PETSC_FALSE,flg;
2752 
2753   PetscFunctionBegin;
2754   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2755   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2756   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2757   if (flg) {
2758     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2759   }
2760   ierr = PetscOptionsTail();CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
MatShift_MPIAIJ(Mat Y,PetscScalar a)2764 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2765 {
2766   PetscErrorCode ierr;
2767   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2768   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2769 
2770   PetscFunctionBegin;
2771   if (!Y->preallocated) {
2772     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2773   } else if (!aij->nz) {
2774     PetscInt nonew = aij->nonew;
2775     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2776     aij->nonew = nonew;
2777   }
2778   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
MatMissingDiagonal_MPIAIJ(Mat A,PetscBool * missing,PetscInt * d)2782 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2783 {
2784   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2785   PetscErrorCode ierr;
2786 
2787   PetscFunctionBegin;
2788   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2789   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2790   if (d) {
2791     PetscInt rstart;
2792     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2793     *d += rstart;
2794 
2795   }
2796   PetscFunctionReturn(0);
2797 }
2798 
MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt * bsizes,PetscScalar * diag)2799 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2800 {
2801   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 /* -------------------------------------------------------------------*/
2810 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2811                                        MatGetRow_MPIAIJ,
2812                                        MatRestoreRow_MPIAIJ,
2813                                        MatMult_MPIAIJ,
2814                                 /* 4*/ MatMultAdd_MPIAIJ,
2815                                        MatMultTranspose_MPIAIJ,
2816                                        MatMultTransposeAdd_MPIAIJ,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                 /*10*/ NULL,
2821                                        NULL,
2822                                        NULL,
2823                                        MatSOR_MPIAIJ,
2824                                        MatTranspose_MPIAIJ,
2825                                 /*15*/ MatGetInfo_MPIAIJ,
2826                                        MatEqual_MPIAIJ,
2827                                        MatGetDiagonal_MPIAIJ,
2828                                        MatDiagonalScale_MPIAIJ,
2829                                        MatNorm_MPIAIJ,
2830                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2831                                        MatAssemblyEnd_MPIAIJ,
2832                                        MatSetOption_MPIAIJ,
2833                                        MatZeroEntries_MPIAIJ,
2834                                 /*24*/ MatZeroRows_MPIAIJ,
2835                                        NULL,
2836                                        NULL,
2837                                        NULL,
2838                                        NULL,
2839                                 /*29*/ MatSetUp_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        MatGetDiagonalBlock_MPIAIJ,
2843                                        NULL,
2844                                 /*34*/ MatDuplicate_MPIAIJ,
2845                                        NULL,
2846                                        NULL,
2847                                        NULL,
2848                                        NULL,
2849                                 /*39*/ MatAXPY_MPIAIJ,
2850                                        MatCreateSubMatrices_MPIAIJ,
2851                                        MatIncreaseOverlap_MPIAIJ,
2852                                        MatGetValues_MPIAIJ,
2853                                        MatCopy_MPIAIJ,
2854                                 /*44*/ MatGetRowMax_MPIAIJ,
2855                                        MatScale_MPIAIJ,
2856                                        MatShift_MPIAIJ,
2857                                        MatDiagonalSet_MPIAIJ,
2858                                        MatZeroRowsColumns_MPIAIJ,
2859                                 /*49*/ MatSetRandom_MPIAIJ,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        NULL,
2864                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2865                                        NULL,
2866                                        MatSetUnfactored_MPIAIJ,
2867                                        MatPermute_MPIAIJ,
2868                                        NULL,
2869                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2870                                        MatDestroy_MPIAIJ,
2871                                        MatView_MPIAIJ,
2872                                        NULL,
2873                                        NULL,
2874                                 /*64*/ NULL,
2875                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2876                                        NULL,
2877                                        NULL,
2878                                        NULL,
2879                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2880                                        MatGetRowMinAbs_MPIAIJ,
2881                                        NULL,
2882                                        NULL,
2883                                        NULL,
2884                                        NULL,
2885                                 /*75*/ MatFDColoringApply_AIJ,
2886                                        MatSetFromOptions_MPIAIJ,
2887                                        NULL,
2888                                        NULL,
2889                                        MatFindZeroDiagonals_MPIAIJ,
2890                                 /*80*/ NULL,
2891                                        NULL,
2892                                        NULL,
2893                                 /*83*/ MatLoad_MPIAIJ,
2894                                        MatIsSymmetric_MPIAIJ,
2895                                        NULL,
2896                                        NULL,
2897                                        NULL,
2898                                        NULL,
2899                                 /*89*/ NULL,
2900                                        NULL,
2901                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2902                                        NULL,
2903                                        NULL,
2904                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2905                                        NULL,
2906                                        NULL,
2907                                        NULL,
2908                                        MatBindToCPU_MPIAIJ,
2909                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2910                                        NULL,
2911                                        NULL,
2912                                        MatConjugate_MPIAIJ,
2913                                        NULL,
2914                                 /*104*/MatSetValuesRow_MPIAIJ,
2915                                        MatRealPart_MPIAIJ,
2916                                        MatImaginaryPart_MPIAIJ,
2917                                        NULL,
2918                                        NULL,
2919                                 /*109*/NULL,
2920                                        NULL,
2921                                        MatGetRowMin_MPIAIJ,
2922                                        NULL,
2923                                        MatMissingDiagonal_MPIAIJ,
2924                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2925                                        NULL,
2926                                        MatGetGhosts_MPIAIJ,
2927                                        NULL,
2928                                        NULL,
2929                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2930                                        NULL,
2931                                        NULL,
2932                                        NULL,
2933                                        MatGetMultiProcBlock_MPIAIJ,
2934                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2935                                        MatGetColumnNorms_MPIAIJ,
2936                                        MatInvertBlockDiagonal_MPIAIJ,
2937                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2938                                        MatCreateSubMatricesMPI_MPIAIJ,
2939                                 /*129*/NULL,
2940                                        NULL,
2941                                        NULL,
2942                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2943                                        NULL,
2944                                 /*134*/NULL,
2945                                        NULL,
2946                                        NULL,
2947                                        NULL,
2948                                        NULL,
2949                                 /*139*/MatSetBlockSizes_MPIAIJ,
2950                                        NULL,
2951                                        NULL,
2952                                        MatFDColoringSetUp_MPIXAIJ,
2953                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2954                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2955                                 /*145*/NULL,
2956                                        NULL,
2957                                        NULL
2958 };
2959 
2960 /* ----------------------------------------------------------------------------------------*/
2961 
MatStoreValues_MPIAIJ(Mat mat)2962 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2963 {
2964   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2969   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
MatRetrieveValues_MPIAIJ(Mat mat)2973 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2974 {
2975   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2976   PetscErrorCode ierr;
2977 
2978   PetscFunctionBegin;
2979   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2980   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])2984 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2985 {
2986   Mat_MPIAIJ     *b;
2987   PetscErrorCode ierr;
2988   PetscMPIInt    size;
2989 
2990   PetscFunctionBegin;
2991   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2992   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2993   b = (Mat_MPIAIJ*)B->data;
2994 
2995 #if defined(PETSC_USE_CTABLE)
2996   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2997 #else
2998   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2999 #endif
3000   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3001   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3002   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3003 
3004   /* Because the B will have been resized we simply destroy it and create a new one each time */
3005   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3006   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3007   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3008   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
3009   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3010   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3011   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3012 
3013   if (!B->preallocated) {
3014     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3015     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3016     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3017     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3018     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3019   }
3020 
3021   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3022   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3023   B->preallocated  = PETSC_TRUE;
3024   B->was_assembled = PETSC_FALSE;
3025   B->assembled     = PETSC_FALSE;
3026   PetscFunctionReturn(0);
3027 }
3028 
MatResetPreallocation_MPIAIJ(Mat B)3029 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3030 {
3031   Mat_MPIAIJ     *b;
3032   PetscErrorCode ierr;
3033 
3034   PetscFunctionBegin;
3035   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3036   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3037   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3038   b = (Mat_MPIAIJ*)B->data;
3039 
3040 #if defined(PETSC_USE_CTABLE)
3041   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3042 #else
3043   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3044 #endif
3045   ierr = PetscFree(b->garray);CHKERRQ(ierr);
3046   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3047   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3048 
3049   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3050   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3051   B->preallocated  = PETSC_TRUE;
3052   B->was_assembled = PETSC_FALSE;
3053   B->assembled = PETSC_FALSE;
3054   PetscFunctionReturn(0);
3055 }
3056 
MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)3057 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3058 {
3059   Mat            mat;
3060   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3061   PetscErrorCode ierr;
3062 
3063   PetscFunctionBegin;
3064   *newmat = NULL;
3065   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3066   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3067   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3068   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3069   a       = (Mat_MPIAIJ*)mat->data;
3070 
3071   mat->factortype   = matin->factortype;
3072   mat->assembled    = matin->assembled;
3073   mat->insertmode   = NOT_SET_VALUES;
3074   mat->preallocated = matin->preallocated;
3075 
3076   a->size         = oldmat->size;
3077   a->rank         = oldmat->rank;
3078   a->donotstash   = oldmat->donotstash;
3079   a->roworiented  = oldmat->roworiented;
3080   a->rowindices   = NULL;
3081   a->rowvalues    = NULL;
3082   a->getrowactive = PETSC_FALSE;
3083 
3084   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3085   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3086 
3087   if (oldmat->colmap) {
3088 #if defined(PETSC_USE_CTABLE)
3089     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3090 #else
3091     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3092     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3093     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3094 #endif
3095   } else a->colmap = NULL;
3096   if (oldmat->garray) {
3097     PetscInt len;
3098     len  = oldmat->B->cmap->n;
3099     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3100     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3101     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3102   } else a->garray = NULL;
3103 
3104   /* It may happen MatDuplicate is called with a non-assembled matrix
3105      In fact, MatDuplicate only requires the matrix to be preallocated
3106      This may happen inside a DMCreateMatrix_Shell */
3107   if (oldmat->lvec) {
3108     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3109     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3110   }
3111   if (oldmat->Mvctx) {
3112     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3113     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3114   }
3115   if (oldmat->Mvctx_mpi1) {
3116     ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3117     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3118   }
3119 
3120   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3121   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3122   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3123   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3124   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3125   *newmat = mat;
3126   PetscFunctionReturn(0);
3127 }
3128 
MatLoad_MPIAIJ(Mat newMat,PetscViewer viewer)3129 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3130 {
3131   PetscBool      isbinary, ishdf5;
3132   PetscErrorCode ierr;
3133 
3134   PetscFunctionBegin;
3135   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3136   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3137   /* force binary viewer to load .info file if it has not yet done so */
3138   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3139   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3140   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
3141   if (isbinary) {
3142     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3143   } else if (ishdf5) {
3144 #if defined(PETSC_HAVE_HDF5)
3145     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3146 #else
3147     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3148 #endif
3149   } else {
3150     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3151   }
3152   PetscFunctionReturn(0);
3153 }
3154 
MatLoad_MPIAIJ_Binary(Mat mat,PetscViewer viewer)3155 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3156 {
3157   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3158   PetscInt       *rowidxs,*colidxs;
3159   PetscScalar    *matvals;
3160   PetscErrorCode ierr;
3161 
3162   PetscFunctionBegin;
3163   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3164 
3165   /* read in matrix header */
3166   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3167   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3168   M  = header[1]; N = header[2]; nz = header[3];
3169   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3170   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3171   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3172 
3173   /* set block sizes from the viewer's .info file */
3174   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3175   /* set global sizes if not set already */
3176   if (mat->rmap->N < 0) mat->rmap->N = M;
3177   if (mat->cmap->N < 0) mat->cmap->N = N;
3178   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3180 
3181   /* check if the matrix sizes are correct */
3182   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3183   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3184 
3185   /* read in row lengths and build row indices */
3186   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3187   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3188   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3189   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3190   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3191   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3192   /* read in column indices and matrix values */
3193   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3194   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3195   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3196   /* store matrix indices and values */
3197   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3198   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3199   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3200   PetscFunctionReturn(0);
3201 }
3202 
3203 /* Not scalable because of ISAllGather() unless getting all columns. */
ISGetSeqIS_Private(Mat mat,IS iscol,IS * isseq)3204 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3205 {
3206   PetscErrorCode ierr;
3207   IS             iscol_local;
3208   PetscBool      isstride;
3209   PetscMPIInt    lisstride=0,gisstride;
3210 
3211   PetscFunctionBegin;
3212   /* check if we are grabbing all columns*/
3213   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3214 
3215   if (isstride) {
3216     PetscInt  start,len,mstart,mlen;
3217     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3218     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3219     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3220     if (mstart == start && mlen-mstart == len) lisstride = 1;
3221   }
3222 
3223   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3224   if (gisstride) {
3225     PetscInt N;
3226     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3227     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3228     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3229     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3230   } else {
3231     PetscInt cbs;
3232     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3233     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3234     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3235   }
3236 
3237   *isseq = iscol_local;
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 /*
3242  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3243  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3244 
3245  Input Parameters:
3246    mat - matrix
3247    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3248            i.e., mat->rstart <= isrow[i] < mat->rend
3249    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3250            i.e., mat->cstart <= iscol[i] < mat->cend
3251  Output Parameter:
3252    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3253    iscol_o - sequential column index set for retrieving mat->B
3254    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3255  */
ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS * isrow_d,IS * iscol_d,IS * iscol_o,const PetscInt * garray[])3256 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3257 {
3258   PetscErrorCode ierr;
3259   Vec            x,cmap;
3260   const PetscInt *is_idx;
3261   PetscScalar    *xarray,*cmaparray;
3262   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3263   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3264   Mat            B=a->B;
3265   Vec            lvec=a->lvec,lcmap;
3266   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3267   MPI_Comm       comm;
3268   VecScatter     Mvctx=a->Mvctx;
3269 
3270   PetscFunctionBegin;
3271   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3272   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3273 
3274   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3275   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3276   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3277   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3278   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3279 
3280   /* Get start indices */
3281   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3282   isstart -= ncols;
3283   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3284 
3285   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3286   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3287   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3288   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3289   for (i=0; i<ncols; i++) {
3290     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3291     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3292     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3293   }
3294   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3295   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3296   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3297 
3298   /* Get iscol_d */
3299   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3300   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3301   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3302 
3303   /* Get isrow_d */
3304   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3305   rstart = mat->rmap->rstart;
3306   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3307   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3308   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3309   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3310 
3311   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3312   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3313   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3314 
3315   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3316   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3317   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3318 
3319   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3320 
3321   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3322   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3323 
3324   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3325   /* off-process column indices */
3326   count = 0;
3327   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3328   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3329 
3330   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3331   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3332   for (i=0; i<Bn; i++) {
3333     if (PetscRealPart(xarray[i]) > -1.0) {
3334       idx[count]     = i;                   /* local column index in off-diagonal part B */
3335       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3336       count++;
3337     }
3338   }
3339   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3340   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3341 
3342   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3343   /* cannot ensure iscol_o has same blocksize as iscol! */
3344 
3345   ierr = PetscFree(idx);CHKERRQ(ierr);
3346   *garray = cmap1;
3347 
3348   ierr = VecDestroy(&x);CHKERRQ(ierr);
3349   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3350   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * submat)3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3356 {
3357   PetscErrorCode ierr;
3358   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3359   Mat            M = NULL;
3360   MPI_Comm       comm;
3361   IS             iscol_d,isrow_d,iscol_o;
3362   Mat            Asub = NULL,Bsub = NULL;
3363   PetscInt       n;
3364 
3365   PetscFunctionBegin;
3366   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3367 
3368   if (call == MAT_REUSE_MATRIX) {
3369     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3370     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3371     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3372 
3373     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3374     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3375 
3376     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3377     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3378 
3379     /* Update diagonal and off-diagonal portions of submat */
3380     asub = (Mat_MPIAIJ*)(*submat)->data;
3381     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3382     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3383     if (n) {
3384       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3385     }
3386     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3388 
3389   } else { /* call == MAT_INITIAL_MATRIX) */
3390     const PetscInt *garray;
3391     PetscInt        BsubN;
3392 
3393     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3394     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3395 
3396     /* Create local submatrices Asub and Bsub */
3397     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3398     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3399 
3400     /* Create submatrix M */
3401     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3402 
3403     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3404     asub = (Mat_MPIAIJ*)M->data;
3405 
3406     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3407     n = asub->B->cmap->N;
3408     if (BsubN > n) {
3409       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3410       const PetscInt *idx;
3411       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3412       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3413 
3414       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3415       j = 0;
3416       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3417       for (i=0; i<n; i++) {
3418         if (j >= BsubN) break;
3419         while (subgarray[i] > garray[j]) j++;
3420 
3421         if (subgarray[i] == garray[j]) {
3422           idx_new[i] = idx[j++];
3423         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3424       }
3425       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3426 
3427       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3428       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3429 
3430     } else if (BsubN < n) {
3431       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3432     }
3433 
3434     ierr = PetscFree(garray);CHKERRQ(ierr);
3435     *submat = M;
3436 
3437     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3438     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3439     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3440 
3441     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3442     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3443 
3444     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3445     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3446   }
3447   PetscFunctionReturn(0);
3448 }
3449 
MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)3450 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3451 {
3452   PetscErrorCode ierr;
3453   IS             iscol_local=NULL,isrow_d;
3454   PetscInt       csize;
3455   PetscInt       n,i,j,start,end;
3456   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3457   MPI_Comm       comm;
3458 
3459   PetscFunctionBegin;
3460   /* If isrow has same processor distribution as mat,
3461      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3462   if (call == MAT_REUSE_MATRIX) {
3463     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3464     if (isrow_d) {
3465       sameRowDist  = PETSC_TRUE;
3466       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3467     } else {
3468       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3469       if (iscol_local) {
3470         sameRowDist  = PETSC_TRUE;
3471         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3472       }
3473     }
3474   } else {
3475     /* Check if isrow has same processor distribution as mat */
3476     sameDist[0] = PETSC_FALSE;
3477     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3478     if (!n) {
3479       sameDist[0] = PETSC_TRUE;
3480     } else {
3481       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3482       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3483       if (i >= start && j < end) {
3484         sameDist[0] = PETSC_TRUE;
3485       }
3486     }
3487 
3488     /* Check if iscol has same processor distribution as mat */
3489     sameDist[1] = PETSC_FALSE;
3490     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3491     if (!n) {
3492       sameDist[1] = PETSC_TRUE;
3493     } else {
3494       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3495       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3496       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3497     }
3498 
3499     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3500     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3501     sameRowDist = tsameDist[0];
3502   }
3503 
3504   if (sameRowDist) {
3505     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3506       /* isrow and iscol have same processor distribution as mat */
3507       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3508       PetscFunctionReturn(0);
3509     } else { /* sameRowDist */
3510       /* isrow has same processor distribution as mat */
3511       if (call == MAT_INITIAL_MATRIX) {
3512         PetscBool sorted;
3513         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3514         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3515         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3516         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3517 
3518         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3519         if (sorted) {
3520           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3521           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3522           PetscFunctionReturn(0);
3523         }
3524       } else { /* call == MAT_REUSE_MATRIX */
3525         IS    iscol_sub;
3526         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3527         if (iscol_sub) {
3528           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3529           PetscFunctionReturn(0);
3530         }
3531       }
3532     }
3533   }
3534 
3535   /* General case: iscol -> iscol_local which has global size of iscol */
3536   if (call == MAT_REUSE_MATRIX) {
3537     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3538     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3539   } else {
3540     if (!iscol_local) {
3541       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3542     }
3543   }
3544 
3545   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3546   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3547 
3548   if (call == MAT_INITIAL_MATRIX) {
3549     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3550     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3551   }
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 /*@C
3556      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3557          and "off-diagonal" part of the matrix in CSR format.
3558 
3559    Collective
3560 
3561    Input Parameters:
3562 +  comm - MPI communicator
3563 .  A - "diagonal" portion of matrix
3564 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3565 -  garray - global index of B columns
3566 
3567    Output Parameter:
3568 .   mat - the matrix, with input A as its local diagonal matrix
3569    Level: advanced
3570 
3571    Notes:
3572        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3573        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3574 
3575 .seealso: MatCreateMPIAIJWithSplitArrays()
3576 @*/
MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat * mat)3577 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3578 {
3579   PetscErrorCode ierr;
3580   Mat_MPIAIJ     *maij;
3581   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3582   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3583   PetscScalar    *oa=b->a;
3584   Mat            Bnew;
3585   PetscInt       m,n,N;
3586 
3587   PetscFunctionBegin;
3588   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3589   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3590   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3591   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3592   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3593   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3594 
3595   /* Get global columns of mat */
3596   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3597 
3598   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3599   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3600   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3601   maij = (Mat_MPIAIJ*)(*mat)->data;
3602 
3603   (*mat)->preallocated = PETSC_TRUE;
3604 
3605   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3606   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3607 
3608   /* Set A as diagonal portion of *mat */
3609   maij->A = A;
3610 
3611   nz = oi[m];
3612   for (i=0; i<nz; i++) {
3613     col   = oj[i];
3614     oj[i] = garray[col];
3615   }
3616 
3617    /* Set Bnew as off-diagonal portion of *mat */
3618   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3619   bnew        = (Mat_SeqAIJ*)Bnew->data;
3620   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3621   maij->B     = Bnew;
3622 
3623   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3624 
3625   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3626   b->free_a       = PETSC_FALSE;
3627   b->free_ij      = PETSC_FALSE;
3628   ierr = MatDestroy(&B);CHKERRQ(ierr);
3629 
3630   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3631   bnew->free_a       = PETSC_TRUE;
3632   bnew->free_ij      = PETSC_TRUE;
3633 
3634   /* condense columns of maij->B */
3635   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3636   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3639   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3640   PetscFunctionReturn(0);
3641 }
3642 
3643 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3644 
MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat * newmat)3645 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3646 {
3647   PetscErrorCode ierr;
3648   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3649   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3650   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3651   Mat            M,Msub,B=a->B;
3652   MatScalar      *aa;
3653   Mat_SeqAIJ     *aij;
3654   PetscInt       *garray = a->garray,*colsub,Ncols;
3655   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3656   IS             iscol_sub,iscmap;
3657   const PetscInt *is_idx,*cmap;
3658   PetscBool      allcolumns=PETSC_FALSE;
3659   MPI_Comm       comm;
3660 
3661   PetscFunctionBegin;
3662   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3663 
3664   if (call == MAT_REUSE_MATRIX) {
3665     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3666     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3667     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3668 
3669     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3670     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3671 
3672     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3673     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3674 
3675     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3676 
3677   } else { /* call == MAT_INITIAL_MATRIX) */
3678     PetscBool flg;
3679 
3680     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3681     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3682 
3683     /* (1) iscol -> nonscalable iscol_local */
3684     /* Check for special case: each processor gets entire matrix columns */
3685     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3686     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3687     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3688     if (allcolumns) {
3689       iscol_sub = iscol_local;
3690       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3691       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3692 
3693     } else {
3694       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3695       PetscInt *idx,*cmap1,k;
3696       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3697       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3698       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3699       count = 0;
3700       k     = 0;
3701       for (i=0; i<Ncols; i++) {
3702         j = is_idx[i];
3703         if (j >= cstart && j < cend) {
3704           /* diagonal part of mat */
3705           idx[count]     = j;
3706           cmap1[count++] = i; /* column index in submat */
3707         } else if (Bn) {
3708           /* off-diagonal part of mat */
3709           if (j == garray[k]) {
3710             idx[count]     = j;
3711             cmap1[count++] = i;  /* column index in submat */
3712           } else if (j > garray[k]) {
3713             while (j > garray[k] && k < Bn-1) k++;
3714             if (j == garray[k]) {
3715               idx[count]     = j;
3716               cmap1[count++] = i; /* column index in submat */
3717             }
3718           }
3719         }
3720       }
3721       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3722 
3723       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3724       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3725       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3726 
3727       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3728     }
3729 
3730     /* (3) Create sequential Msub */
3731     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3732   }
3733 
3734   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3735   aij  = (Mat_SeqAIJ*)(Msub)->data;
3736   ii   = aij->i;
3737   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3738 
3739   /*
3740       m - number of local rows
3741       Ncols - number of columns (same on all processors)
3742       rstart - first row in new global matrix generated
3743   */
3744   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3745 
3746   if (call == MAT_INITIAL_MATRIX) {
3747     /* (4) Create parallel newmat */
3748     PetscMPIInt    rank,size;
3749     PetscInt       csize;
3750 
3751     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3752     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3753 
3754     /*
3755         Determine the number of non-zeros in the diagonal and off-diagonal
3756         portions of the matrix in order to do correct preallocation
3757     */
3758 
3759     /* first get start and end of "diagonal" columns */
3760     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3761     if (csize == PETSC_DECIDE) {
3762       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3763       if (mglobal == Ncols) { /* square matrix */
3764         nlocal = m;
3765       } else {
3766         nlocal = Ncols/size + ((Ncols % size) > rank);
3767       }
3768     } else {
3769       nlocal = csize;
3770     }
3771     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3772     rstart = rend - nlocal;
3773     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3774 
3775     /* next, compute all the lengths */
3776     jj    = aij->j;
3777     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3778     olens = dlens + m;
3779     for (i=0; i<m; i++) {
3780       jend = ii[i+1] - ii[i];
3781       olen = 0;
3782       dlen = 0;
3783       for (j=0; j<jend; j++) {
3784         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3785         else dlen++;
3786         jj++;
3787       }
3788       olens[i] = olen;
3789       dlens[i] = dlen;
3790     }
3791 
3792     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3793     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3794 
3795     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3796     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3797     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3798     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3799     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3800     ierr = PetscFree(dlens);CHKERRQ(ierr);
3801 
3802   } else { /* call == MAT_REUSE_MATRIX */
3803     M    = *newmat;
3804     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3805     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3806     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3807     /*
3808          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3809        rather than the slower MatSetValues().
3810     */
3811     M->was_assembled = PETSC_TRUE;
3812     M->assembled     = PETSC_FALSE;
3813   }
3814 
3815   /* (5) Set values of Msub to *newmat */
3816   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3817   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3818 
3819   jj   = aij->j;
3820   aa   = aij->a;
3821   for (i=0; i<m; i++) {
3822     row = rstart + i;
3823     nz  = ii[i+1] - ii[i];
3824     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3825     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3826     jj += nz; aa += nz;
3827   }
3828   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3829 
3830   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832 
3833   ierr = PetscFree(colsub);CHKERRQ(ierr);
3834 
3835   /* save Msub, iscol_sub and iscmap used in processor for next request */
3836   if (call ==  MAT_INITIAL_MATRIX) {
3837     *newmat = M;
3838     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3839     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3840 
3841     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3842     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3843 
3844     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3845     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3846 
3847     if (iscol_local) {
3848       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3849       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3850     }
3851   }
3852   PetscFunctionReturn(0);
3853 }
3854 
3855 /*
3856     Not great since it makes two copies of the submatrix, first an SeqAIJ
3857   in local and then by concatenating the local matrices the end result.
3858   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3859 
3860   Note: This requires a sequential iscol with all indices.
3861 */
MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat)3862 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3863 {
3864   PetscErrorCode ierr;
3865   PetscMPIInt    rank,size;
3866   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3867   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3868   Mat            M,Mreuse;
3869   MatScalar      *aa,*vwork;
3870   MPI_Comm       comm;
3871   Mat_SeqAIJ     *aij;
3872   PetscBool      colflag,allcolumns=PETSC_FALSE;
3873 
3874   PetscFunctionBegin;
3875   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3876   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3877   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3878 
3879   /* Check for special case: each processor gets entire matrix columns */
3880   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3881   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3882   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3883   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3884 
3885   if (call ==  MAT_REUSE_MATRIX) {
3886     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3887     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3888     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3889   } else {
3890     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3891   }
3892 
3893   /*
3894       m - number of local rows
3895       n - number of columns (same on all processors)
3896       rstart - first row in new global matrix generated
3897   */
3898   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3899   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3900   if (call == MAT_INITIAL_MATRIX) {
3901     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3902     ii  = aij->i;
3903     jj  = aij->j;
3904 
3905     /*
3906         Determine the number of non-zeros in the diagonal and off-diagonal
3907         portions of the matrix in order to do correct preallocation
3908     */
3909 
3910     /* first get start and end of "diagonal" columns */
3911     if (csize == PETSC_DECIDE) {
3912       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3913       if (mglobal == n) { /* square matrix */
3914         nlocal = m;
3915       } else {
3916         nlocal = n/size + ((n % size) > rank);
3917       }
3918     } else {
3919       nlocal = csize;
3920     }
3921     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3922     rstart = rend - nlocal;
3923     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3924 
3925     /* next, compute all the lengths */
3926     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3927     olens = dlens + m;
3928     for (i=0; i<m; i++) {
3929       jend = ii[i+1] - ii[i];
3930       olen = 0;
3931       dlen = 0;
3932       for (j=0; j<jend; j++) {
3933         if (*jj < rstart || *jj >= rend) olen++;
3934         else dlen++;
3935         jj++;
3936       }
3937       olens[i] = olen;
3938       dlens[i] = dlen;
3939     }
3940     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3941     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3942     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3943     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3944     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3945     ierr = PetscFree(dlens);CHKERRQ(ierr);
3946   } else {
3947     PetscInt ml,nl;
3948 
3949     M    = *newmat;
3950     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3951     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3952     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3953     /*
3954          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3955        rather than the slower MatSetValues().
3956     */
3957     M->was_assembled = PETSC_TRUE;
3958     M->assembled     = PETSC_FALSE;
3959   }
3960   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3961   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3962   ii   = aij->i;
3963   jj   = aij->j;
3964   aa   = aij->a;
3965   for (i=0; i<m; i++) {
3966     row   = rstart + i;
3967     nz    = ii[i+1] - ii[i];
3968     cwork = jj;     jj += nz;
3969     vwork = aa;     aa += nz;
3970     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3971   }
3972 
3973   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   *newmat = M;
3976 
3977   /* save submatrix used in processor for next request */
3978   if (call ==  MAT_INITIAL_MATRIX) {
3979     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3980     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3981   }
3982   PetscFunctionReturn(0);
3983 }
3984 
MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])3985 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3986 {
3987   PetscInt       m,cstart, cend,j,nnz,i,d;
3988   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3989   const PetscInt *JJ;
3990   PetscErrorCode ierr;
3991   PetscBool      nooffprocentries;
3992 
3993   PetscFunctionBegin;
3994   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3995 
3996   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3997   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3998   m      = B->rmap->n;
3999   cstart = B->cmap->rstart;
4000   cend   = B->cmap->rend;
4001   rstart = B->rmap->rstart;
4002 
4003   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
4004 
4005   if (PetscDefined(USE_DEBUG)) {
4006     for (i=0; i<m; i++) {
4007       nnz = Ii[i+1]- Ii[i];
4008       JJ  = J + Ii[i];
4009       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4010       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4011       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4012     }
4013   }
4014 
4015   for (i=0; i<m; i++) {
4016     nnz     = Ii[i+1]- Ii[i];
4017     JJ      = J + Ii[i];
4018     nnz_max = PetscMax(nnz_max,nnz);
4019     d       = 0;
4020     for (j=0; j<nnz; j++) {
4021       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4022     }
4023     d_nnz[i] = d;
4024     o_nnz[i] = nnz - d;
4025   }
4026   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4027   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4028 
4029   for (i=0; i<m; i++) {
4030     ii   = i + rstart;
4031     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4032   }
4033   nooffprocentries    = B->nooffprocentries;
4034   B->nooffprocentries = PETSC_TRUE;
4035   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4036   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4037   B->nooffprocentries = nooffprocentries;
4038 
4039   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4040   PetscFunctionReturn(0);
4041 }
4042 
4043 /*@
4044    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4045    (the default parallel PETSc format).
4046 
4047    Collective
4048 
4049    Input Parameters:
4050 +  B - the matrix
4051 .  i - the indices into j for the start of each local row (starts with zero)
4052 .  j - the column indices for each local row (starts with zero)
4053 -  v - optional values in the matrix
4054 
4055    Level: developer
4056 
4057    Notes:
4058        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4059      thus you CANNOT change the matrix entries by changing the values of v[] after you have
4060      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4061 
4062        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4063 
4064        The format which is used for the sparse matrix input, is equivalent to a
4065     row-major ordering.. i.e for the following matrix, the input data expected is
4066     as shown
4067 
4068 $        1 0 0
4069 $        2 0 3     P0
4070 $       -------
4071 $        4 5 6     P1
4072 $
4073 $     Process0 [P0]: rows_owned=[0,1]
4074 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4075 $        j =  {0,0,2}  [size = 3]
4076 $        v =  {1,2,3}  [size = 3]
4077 $
4078 $     Process1 [P1]: rows_owned=[2]
4079 $        i =  {0,3}    [size = nrow+1  = 1+1]
4080 $        j =  {0,1,2}  [size = 3]
4081 $        v =  {4,5,6}  [size = 3]
4082 
4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4084           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4085 @*/
MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[],const PetscScalar v[])4086 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4087 {
4088   PetscErrorCode ierr;
4089 
4090   PetscFunctionBegin;
4091   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4092   PetscFunctionReturn(0);
4093 }
4094 
4095 /*@C
4096    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4097    (the default parallel PETSc format).  For good matrix assembly performance
4098    the user should preallocate the matrix storage by setting the parameters
4099    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4100    performance can be increased by more than a factor of 50.
4101 
4102    Collective
4103 
4104    Input Parameters:
4105 +  B - the matrix
4106 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4107            (same value is used for all local rows)
4108 .  d_nnz - array containing the number of nonzeros in the various rows of the
4109            DIAGONAL portion of the local submatrix (possibly different for each row)
4110            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4111            The size of this array is equal to the number of local rows, i.e 'm'.
4112            For matrices that will be factored, you must leave room for (and set)
4113            the diagonal entry even if it is zero.
4114 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4115            submatrix (same value is used for all local rows).
4116 -  o_nnz - array containing the number of nonzeros in the various rows of the
4117            OFF-DIAGONAL portion of the local submatrix (possibly different for
4118            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4119            structure. The size of this array is equal to the number
4120            of local rows, i.e 'm'.
4121 
4122    If the *_nnz parameter is given then the *_nz parameter is ignored
4123 
4124    The AIJ format (also called the Yale sparse matrix format or
4125    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4126    storage.  The stored row and column indices begin with zero.
4127    See Users-Manual: ch_mat for details.
4128 
4129    The parallel matrix is partitioned such that the first m0 rows belong to
4130    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4131    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4132 
4133    The DIAGONAL portion of the local submatrix of a processor can be defined
4134    as the submatrix which is obtained by extraction the part corresponding to
4135    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4136    first row that belongs to the processor, r2 is the last row belonging to
4137    the this processor, and c1-c2 is range of indices of the local part of a
4138    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4139    common case of a square matrix, the row and column ranges are the same and
4140    the DIAGONAL part is also square. The remaining portion of the local
4141    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4142 
4143    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4144 
4145    You can call MatGetInfo() to get information on how effective the preallocation was;
4146    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4147    You can also run with the option -info and look for messages with the string
4148    malloc in them to see if additional memory allocation was needed.
4149 
4150    Example usage:
4151 
4152    Consider the following 8x8 matrix with 34 non-zero values, that is
4153    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4154    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4155    as follows:
4156 
4157 .vb
4158             1  2  0  |  0  3  0  |  0  4
4159     Proc0   0  5  6  |  7  0  0  |  8  0
4160             9  0 10  | 11  0  0  | 12  0
4161     -------------------------------------
4162            13  0 14  | 15 16 17  |  0  0
4163     Proc1   0 18  0  | 19 20 21  |  0  0
4164             0  0  0  | 22 23  0  | 24  0
4165     -------------------------------------
4166     Proc2  25 26 27  |  0  0 28  | 29  0
4167            30  0  0  | 31 32 33  |  0 34
4168 .ve
4169 
4170    This can be represented as a collection of submatrices as:
4171 
4172 .vb
4173       A B C
4174       D E F
4175       G H I
4176 .ve
4177 
4178    Where the submatrices A,B,C are owned by proc0, D,E,F are
4179    owned by proc1, G,H,I are owned by proc2.
4180 
4181    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4182    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4183    The 'M','N' parameters are 8,8, and have the same values on all procs.
4184 
4185    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4186    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4187    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4188    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4189    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4190    matrix, ans [DF] as another SeqAIJ matrix.
4191 
4192    When d_nz, o_nz parameters are specified, d_nz storage elements are
4193    allocated for every row of the local diagonal submatrix, and o_nz
4194    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4195    One way to choose d_nz and o_nz is to use the max nonzerors per local
4196    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4197    In this case, the values of d_nz,o_nz are:
4198 .vb
4199      proc0 : dnz = 2, o_nz = 2
4200      proc1 : dnz = 3, o_nz = 2
4201      proc2 : dnz = 1, o_nz = 4
4202 .ve
4203    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4204    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4205    for proc3. i.e we are using 12+15+10=37 storage locations to store
4206    34 values.
4207 
4208    When d_nnz, o_nnz parameters are specified, the storage is specified
4209    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4210    In the above case the values for d_nnz,o_nnz are:
4211 .vb
4212      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4213      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4214      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4215 .ve
4216    Here the space allocated is sum of all the above values i.e 34, and
4217    hence pre-allocation is perfect.
4218 
4219    Level: intermediate
4220 
4221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4222           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4223 @*/
MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])4224 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4225 {
4226   PetscErrorCode ierr;
4227 
4228   PetscFunctionBegin;
4229   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4230   PetscValidType(B,1);
4231   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4232   PetscFunctionReturn(0);
4233 }
4234 
4235 /*@
4236      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4237          CSR format for the local rows.
4238 
4239    Collective
4240 
4241    Input Parameters:
4242 +  comm - MPI communicator
4243 .  m - number of local rows (Cannot be PETSC_DECIDE)
4244 .  n - This value should be the same as the local size used in creating the
4245        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4246        calculated if N is given) For square matrices n is almost always m.
4247 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4248 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4249 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4250 .   j - column indices
4251 -   a - matrix values
4252 
4253    Output Parameter:
4254 .   mat - the matrix
4255 
4256    Level: intermediate
4257 
4258    Notes:
4259        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4260      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4261      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4262 
4263        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4264 
4265        The format which is used for the sparse matrix input, is equivalent to a
4266     row-major ordering.. i.e for the following matrix, the input data expected is
4267     as shown
4268 
4269        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4270 
4271 $        1 0 0
4272 $        2 0 3     P0
4273 $       -------
4274 $        4 5 6     P1
4275 $
4276 $     Process0 [P0]: rows_owned=[0,1]
4277 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4278 $        j =  {0,0,2}  [size = 3]
4279 $        v =  {1,2,3}  [size = 3]
4280 $
4281 $     Process1 [P1]: rows_owned=[2]
4282 $        i =  {0,3}    [size = nrow+1  = 1+1]
4283 $        j =  {0,1,2}  [size = 3]
4284 $        v =  {4,5,6}  [size = 3]
4285 
4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4287           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4288 @*/
MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)4289 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4290 {
4291   PetscErrorCode ierr;
4292 
4293   PetscFunctionBegin;
4294   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4295   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4296   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4297   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4298   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4299   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4300   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 /*@
4305      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4306          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4307 
4308    Collective
4309 
4310    Input Parameters:
4311 +  mat - the matrix
4312 .  m - number of local rows (Cannot be PETSC_DECIDE)
4313 .  n - This value should be the same as the local size used in creating the
4314        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4315        calculated if N is given) For square matrices n is almost always m.
4316 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4317 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4318 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4319 .  J - column indices
4320 -  v - matrix values
4321 
4322    Level: intermediate
4323 
4324 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4325           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4326 @*/
MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])4327 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4328 {
4329   PetscErrorCode ierr;
4330   PetscInt       cstart,nnz,i,j;
4331   PetscInt       *ld;
4332   PetscBool      nooffprocentries;
4333   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4334   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4335   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4336   const PetscInt *Adi = Ad->i;
4337   PetscInt       ldi,Iii,md;
4338 
4339   PetscFunctionBegin;
4340   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4341   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4342   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4343   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4344 
4345   cstart = mat->cmap->rstart;
4346   if (!Aij->ld) {
4347     /* count number of entries below block diagonal */
4348     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4349     Aij->ld = ld;
4350     for (i=0; i<m; i++) {
4351       nnz  = Ii[i+1]- Ii[i];
4352       j     = 0;
4353       while  (J[j] < cstart && j < nnz) {j++;}
4354       J    += nnz;
4355       ld[i] = j;
4356     }
4357   } else {
4358     ld = Aij->ld;
4359   }
4360 
4361   for (i=0; i<m; i++) {
4362     nnz  = Ii[i+1]- Ii[i];
4363     Iii  = Ii[i];
4364     ldi  = ld[i];
4365     md   = Adi[i+1]-Adi[i];
4366     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4367     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4368     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4369     ad  += md;
4370     ao  += nnz - md;
4371   }
4372   nooffprocentries      = mat->nooffprocentries;
4373   mat->nooffprocentries = PETSC_TRUE;
4374   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4375   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4376   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4377   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379   mat->nooffprocentries = nooffprocentries;
4380   PetscFunctionReturn(0);
4381 }
4382 
4383 /*@C
4384    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4385    (the default parallel PETSc format).  For good matrix assembly performance
4386    the user should preallocate the matrix storage by setting the parameters
4387    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4388    performance can be increased by more than a factor of 50.
4389 
4390    Collective
4391 
4392    Input Parameters:
4393 +  comm - MPI communicator
4394 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4395            This value should be the same as the local size used in creating the
4396            y vector for the matrix-vector product y = Ax.
4397 .  n - This value should be the same as the local size used in creating the
4398        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4399        calculated if N is given) For square matrices n is almost always m.
4400 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4401 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4402 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4403            (same value is used for all local rows)
4404 .  d_nnz - array containing the number of nonzeros in the various rows of the
4405            DIAGONAL portion of the local submatrix (possibly different for each row)
4406            or NULL, if d_nz is used to specify the nonzero structure.
4407            The size of this array is equal to the number of local rows, i.e 'm'.
4408 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4409            submatrix (same value is used for all local rows).
4410 -  o_nnz - array containing the number of nonzeros in the various rows of the
4411            OFF-DIAGONAL portion of the local submatrix (possibly different for
4412            each row) or NULL, if o_nz is used to specify the nonzero
4413            structure. The size of this array is equal to the number
4414            of local rows, i.e 'm'.
4415 
4416    Output Parameter:
4417 .  A - the matrix
4418 
4419    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4420    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4421    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4422 
4423    Notes:
4424    If the *_nnz parameter is given then the *_nz parameter is ignored
4425 
4426    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4427    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4428    storage requirements for this matrix.
4429 
4430    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4431    processor than it must be used on all processors that share the object for
4432    that argument.
4433 
4434    The user MUST specify either the local or global matrix dimensions
4435    (possibly both).
4436 
4437    The parallel matrix is partitioned across processors such that the
4438    first m0 rows belong to process 0, the next m1 rows belong to
4439    process 1, the next m2 rows belong to process 2 etc.. where
4440    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4441    values corresponding to [m x N] submatrix.
4442 
4443    The columns are logically partitioned with the n0 columns belonging
4444    to 0th partition, the next n1 columns belonging to the next
4445    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4446 
4447    The DIAGONAL portion of the local submatrix on any given processor
4448    is the submatrix corresponding to the rows and columns m,n
4449    corresponding to the given processor. i.e diagonal matrix on
4450    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4451    etc. The remaining portion of the local submatrix [m x (N-n)]
4452    constitute the OFF-DIAGONAL portion. The example below better
4453    illustrates this concept.
4454 
4455    For a square global matrix we define each processor's diagonal portion
4456    to be its local rows and the corresponding columns (a square submatrix);
4457    each processor's off-diagonal portion encompasses the remainder of the
4458    local matrix (a rectangular submatrix).
4459 
4460    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4461 
4462    When calling this routine with a single process communicator, a matrix of
4463    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4464    type of communicator, use the construction mechanism
4465 .vb
4466      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4467 .ve
4468 
4469 $     MatCreate(...,&A);
4470 $     MatSetType(A,MATMPIAIJ);
4471 $     MatSetSizes(A, m,n,M,N);
4472 $     MatMPIAIJSetPreallocation(A,...);
4473 
4474    By default, this format uses inodes (identical nodes) when possible.
4475    We search for consecutive rows with the same nonzero structure, thereby
4476    reusing matrix information to achieve increased efficiency.
4477 
4478    Options Database Keys:
4479 +  -mat_no_inode  - Do not use inodes
4480 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4481 
4482 
4483 
4484    Example usage:
4485 
4486    Consider the following 8x8 matrix with 34 non-zero values, that is
4487    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4488    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4489    as follows
4490 
4491 .vb
4492             1  2  0  |  0  3  0  |  0  4
4493     Proc0   0  5  6  |  7  0  0  |  8  0
4494             9  0 10  | 11  0  0  | 12  0
4495     -------------------------------------
4496            13  0 14  | 15 16 17  |  0  0
4497     Proc1   0 18  0  | 19 20 21  |  0  0
4498             0  0  0  | 22 23  0  | 24  0
4499     -------------------------------------
4500     Proc2  25 26 27  |  0  0 28  | 29  0
4501            30  0  0  | 31 32 33  |  0 34
4502 .ve
4503 
4504    This can be represented as a collection of submatrices as
4505 
4506 .vb
4507       A B C
4508       D E F
4509       G H I
4510 .ve
4511 
4512    Where the submatrices A,B,C are owned by proc0, D,E,F are
4513    owned by proc1, G,H,I are owned by proc2.
4514 
4515    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4516    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4517    The 'M','N' parameters are 8,8, and have the same values on all procs.
4518 
4519    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4520    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4521    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4522    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4523    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4524    matrix, ans [DF] as another SeqAIJ matrix.
4525 
4526    When d_nz, o_nz parameters are specified, d_nz storage elements are
4527    allocated for every row of the local diagonal submatrix, and o_nz
4528    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4529    One way to choose d_nz and o_nz is to use the max nonzerors per local
4530    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4531    In this case, the values of d_nz,o_nz are
4532 .vb
4533      proc0 : dnz = 2, o_nz = 2
4534      proc1 : dnz = 3, o_nz = 2
4535      proc2 : dnz = 1, o_nz = 4
4536 .ve
4537    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4538    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4539    for proc3. i.e we are using 12+15+10=37 storage locations to store
4540    34 values.
4541 
4542    When d_nnz, o_nnz parameters are specified, the storage is specified
4543    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4544    In the above case the values for d_nnz,o_nnz are
4545 .vb
4546      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4547      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4548      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4549 .ve
4550    Here the space allocated is sum of all the above values i.e 34, and
4551    hence pre-allocation is perfect.
4552 
4553    Level: intermediate
4554 
4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4556           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4557 @*/
MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)4558 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4559 {
4560   PetscErrorCode ierr;
4561   PetscMPIInt    size;
4562 
4563   PetscFunctionBegin;
4564   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4565   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4566   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4567   if (size > 1) {
4568     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4569     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4570   } else {
4571     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4572     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4573   }
4574   PetscFunctionReturn(0);
4575 }
4576 
4577 /*@C
4578   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4579 
4580   Not collective
4581 
4582   Input Parameter:
4583 . A - The MPIAIJ matrix
4584 
4585   Output Parameters:
4586 + Ad - The local diagonal block as a SeqAIJ matrix
4587 . Ao - The local off-diagonal block as a SeqAIJ matrix
4588 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4589 
4590   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4591   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4592   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4593   local column numbers to global column numbers in the original matrix.
4594 
4595   Level: intermediate
4596 
4597 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4598 @*/
MatMPIAIJGetSeqAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])4599 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4600 {
4601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4602   PetscBool      flg;
4603   PetscErrorCode ierr;
4604 
4605   PetscFunctionBegin;
4606   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4607   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4608   if (Ad)     *Ad     = a->A;
4609   if (Ao)     *Ao     = a->B;
4610   if (colmap) *colmap = a->garray;
4611   PetscFunctionReturn(0);
4612 }
4613 
MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)4614 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4615 {
4616   PetscErrorCode ierr;
4617   PetscInt       m,N,i,rstart,nnz,Ii;
4618   PetscInt       *indx;
4619   PetscScalar    *values;
4620 
4621   PetscFunctionBegin;
4622   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4623   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4624     PetscInt       *dnz,*onz,sum,bs,cbs;
4625 
4626     if (n == PETSC_DECIDE) {
4627       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4628     }
4629     /* Check sum(n) = N */
4630     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4631     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4632 
4633     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4634     rstart -= m;
4635 
4636     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4637     for (i=0; i<m; i++) {
4638       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4639       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4640       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4641     }
4642 
4643     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4644     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4645     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4646     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4647     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4648     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4649     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4650     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4651   }
4652 
4653   /* numeric phase */
4654   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4655   for (i=0; i<m; i++) {
4656     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4657     Ii   = i + rstart;
4658     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4659     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4660   }
4661   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4662   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4663   PetscFunctionReturn(0);
4664 }
4665 
MatFileSplit(Mat A,char * outfile)4666 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4667 {
4668   PetscErrorCode    ierr;
4669   PetscMPIInt       rank;
4670   PetscInt          m,N,i,rstart,nnz;
4671   size_t            len;
4672   const PetscInt    *indx;
4673   PetscViewer       out;
4674   char              *name;
4675   Mat               B;
4676   const PetscScalar *values;
4677 
4678   PetscFunctionBegin;
4679   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4680   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4681   /* Should this be the type of the diagonal block of A? */
4682   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4683   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4684   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4685   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4686   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4687   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4688   for (i=0; i<m; i++) {
4689     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4690     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4691     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4692   }
4693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4695 
4696   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4697   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4698   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4699   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4700   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4701   ierr = PetscFree(name);CHKERRQ(ierr);
4702   ierr = MatView(B,out);CHKERRQ(ierr);
4703   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4704   ierr = MatDestroy(&B);CHKERRQ(ierr);
4705   PetscFunctionReturn(0);
4706 }
4707 
MatDestroy_MPIAIJ_SeqsToMPI(void * data)4708 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4709 {
4710   PetscErrorCode      ierr;
4711   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4712 
4713   PetscFunctionBegin;
4714   if (!merge) PetscFunctionReturn(0);
4715   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4716   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4717   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4718   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4719   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4720   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4721   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4722   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4723   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4724   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4725   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4726   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4727   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4728   ierr = PetscFree(merge);CHKERRQ(ierr);
4729   PetscFunctionReturn(0);
4730 }
4731 
4732 #include <../src/mat/utils/freespace.h>
4733 #include <petscbt.h>
4734 
MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)4735 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4736 {
4737   PetscErrorCode      ierr;
4738   MPI_Comm            comm;
4739   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4740   PetscMPIInt         size,rank,taga,*len_s;
4741   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4742   PetscInt            proc,m;
4743   PetscInt            **buf_ri,**buf_rj;
4744   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4745   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4746   MPI_Request         *s_waits,*r_waits;
4747   MPI_Status          *status;
4748   MatScalar           *aa=a->a;
4749   MatScalar           **abuf_r,*ba_i;
4750   Mat_Merge_SeqsToMPI *merge;
4751   PetscContainer      container;
4752 
4753   PetscFunctionBegin;
4754   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4755   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4756 
4757   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4758   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4759 
4760   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4761   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4762   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4763 
4764   bi     = merge->bi;
4765   bj     = merge->bj;
4766   buf_ri = merge->buf_ri;
4767   buf_rj = merge->buf_rj;
4768 
4769   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4770   owners = merge->rowmap->range;
4771   len_s  = merge->len_s;
4772 
4773   /* send and recv matrix values */
4774   /*-----------------------------*/
4775   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4776   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4777 
4778   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4779   for (proc=0,k=0; proc<size; proc++) {
4780     if (!len_s[proc]) continue;
4781     i    = owners[proc];
4782     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4783     k++;
4784   }
4785 
4786   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4787   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4788   ierr = PetscFree(status);CHKERRQ(ierr);
4789 
4790   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4791   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4792 
4793   /* insert mat values of mpimat */
4794   /*----------------------------*/
4795   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4796   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4797 
4798   for (k=0; k<merge->nrecv; k++) {
4799     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4800     nrows       = *(buf_ri_k[k]);
4801     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4802     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4803   }
4804 
4805   /* set values of ba */
4806   m = merge->rowmap->n;
4807   for (i=0; i<m; i++) {
4808     arow = owners[rank] + i;
4809     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4810     bnzi = bi[i+1] - bi[i];
4811     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4812 
4813     /* add local non-zero vals of this proc's seqmat into ba */
4814     anzi   = ai[arow+1] - ai[arow];
4815     aj     = a->j + ai[arow];
4816     aa     = a->a + ai[arow];
4817     nextaj = 0;
4818     for (j=0; nextaj<anzi; j++) {
4819       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4820         ba_i[j] += aa[nextaj++];
4821       }
4822     }
4823 
4824     /* add received vals into ba */
4825     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4826       /* i-th row */
4827       if (i == *nextrow[k]) {
4828         anzi   = *(nextai[k]+1) - *nextai[k];
4829         aj     = buf_rj[k] + *(nextai[k]);
4830         aa     = abuf_r[k] + *(nextai[k]);
4831         nextaj = 0;
4832         for (j=0; nextaj<anzi; j++) {
4833           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4834             ba_i[j] += aa[nextaj++];
4835           }
4836         }
4837         nextrow[k]++; nextai[k]++;
4838       }
4839     }
4840     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4841   }
4842   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4843   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4844 
4845   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4846   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4847   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4848   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4849   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4850   PetscFunctionReturn(0);
4851 }
4852 
MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat * mpimat)4853 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4854 {
4855   PetscErrorCode      ierr;
4856   Mat                 B_mpi;
4857   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4858   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4859   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4860   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4861   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4862   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4863   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4864   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4865   MPI_Status          *status;
4866   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4867   PetscBT             lnkbt;
4868   Mat_Merge_SeqsToMPI *merge;
4869   PetscContainer      container;
4870 
4871   PetscFunctionBegin;
4872   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4873 
4874   /* make sure it is a PETSc comm */
4875   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4878 
4879   ierr = PetscNew(&merge);CHKERRQ(ierr);
4880   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4881 
4882   /* determine row ownership */
4883   /*---------------------------------------------------------*/
4884   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4885   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4886   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4887   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4888   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4889   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4890   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4891 
4892   m      = merge->rowmap->n;
4893   owners = merge->rowmap->range;
4894 
4895   /* determine the number of messages to send, their lengths */
4896   /*---------------------------------------------------------*/
4897   len_s = merge->len_s;
4898 
4899   len          = 0; /* length of buf_si[] */
4900   merge->nsend = 0;
4901   for (proc=0; proc<size; proc++) {
4902     len_si[proc] = 0;
4903     if (proc == rank) {
4904       len_s[proc] = 0;
4905     } else {
4906       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4907       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4908     }
4909     if (len_s[proc]) {
4910       merge->nsend++;
4911       nrows = 0;
4912       for (i=owners[proc]; i<owners[proc+1]; i++) {
4913         if (ai[i+1] > ai[i]) nrows++;
4914       }
4915       len_si[proc] = 2*(nrows+1);
4916       len         += len_si[proc];
4917     }
4918   }
4919 
4920   /* determine the number and length of messages to receive for ij-structure */
4921   /*-------------------------------------------------------------------------*/
4922   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4923   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4924 
4925   /* post the Irecv of j-structure */
4926   /*-------------------------------*/
4927   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4928   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4929 
4930   /* post the Isend of j-structure */
4931   /*--------------------------------*/
4932   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4933 
4934   for (proc=0, k=0; proc<size; proc++) {
4935     if (!len_s[proc]) continue;
4936     i    = owners[proc];
4937     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4938     k++;
4939   }
4940 
4941   /* receives and sends of j-structure are complete */
4942   /*------------------------------------------------*/
4943   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4944   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4945 
4946   /* send and recv i-structure */
4947   /*---------------------------*/
4948   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4949   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4950 
4951   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4952   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4953   for (proc=0,k=0; proc<size; proc++) {
4954     if (!len_s[proc]) continue;
4955     /* form outgoing message for i-structure:
4956          buf_si[0]:                 nrows to be sent
4957                [1:nrows]:           row index (global)
4958                [nrows+1:2*nrows+1]: i-structure index
4959     */
4960     /*-------------------------------------------*/
4961     nrows       = len_si[proc]/2 - 1;
4962     buf_si_i    = buf_si + nrows+1;
4963     buf_si[0]   = nrows;
4964     buf_si_i[0] = 0;
4965     nrows       = 0;
4966     for (i=owners[proc]; i<owners[proc+1]; i++) {
4967       anzi = ai[i+1] - ai[i];
4968       if (anzi) {
4969         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4970         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4971         nrows++;
4972       }
4973     }
4974     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4975     k++;
4976     buf_si += len_si[proc];
4977   }
4978 
4979   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4980   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4981 
4982   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4983   for (i=0; i<merge->nrecv; i++) {
4984     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4985   }
4986 
4987   ierr = PetscFree(len_si);CHKERRQ(ierr);
4988   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4989   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4990   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4991   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4992   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4993   ierr = PetscFree(status);CHKERRQ(ierr);
4994 
4995   /* compute a local seq matrix in each processor */
4996   /*----------------------------------------------*/
4997   /* allocate bi array and free space for accumulating nonzero column info */
4998   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4999   bi[0] = 0;
5000 
5001   /* create and initialize a linked list */
5002   nlnk = N+1;
5003   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5004 
5005   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5006   len  = ai[owners[rank+1]] - ai[owners[rank]];
5007   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
5008 
5009   current_space = free_space;
5010 
5011   /* determine symbolic info for each local row */
5012   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5013 
5014   for (k=0; k<merge->nrecv; k++) {
5015     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5016     nrows       = *buf_ri_k[k];
5017     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5018     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5019   }
5020 
5021   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5022   len  = 0;
5023   for (i=0; i<m; i++) {
5024     bnzi = 0;
5025     /* add local non-zero cols of this proc's seqmat into lnk */
5026     arow  = owners[rank] + i;
5027     anzi  = ai[arow+1] - ai[arow];
5028     aj    = a->j + ai[arow];
5029     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5030     bnzi += nlnk;
5031     /* add received col data into lnk */
5032     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5033       if (i == *nextrow[k]) { /* i-th row */
5034         anzi  = *(nextai[k]+1) - *nextai[k];
5035         aj    = buf_rj[k] + *nextai[k];
5036         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5037         bnzi += nlnk;
5038         nextrow[k]++; nextai[k]++;
5039       }
5040     }
5041     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5042 
5043     /* if free space is not available, make more free space */
5044     if (current_space->local_remaining<bnzi) {
5045       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
5046       nspacedouble++;
5047     }
5048     /* copy data into free space, then initialize lnk */
5049     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5050     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5051 
5052     current_space->array           += bnzi;
5053     current_space->local_used      += bnzi;
5054     current_space->local_remaining -= bnzi;
5055 
5056     bi[i+1] = bi[i] + bnzi;
5057   }
5058 
5059   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5060 
5061   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5062   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5063   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5064 
5065   /* create symbolic parallel matrix B_mpi */
5066   /*---------------------------------------*/
5067   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5068   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5069   if (n==PETSC_DECIDE) {
5070     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5071   } else {
5072     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5073   }
5074   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5075   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5076   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5077   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5078   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5079 
5080   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5081   B_mpi->assembled  = PETSC_FALSE;
5082   merge->bi         = bi;
5083   merge->bj         = bj;
5084   merge->buf_ri     = buf_ri;
5085   merge->buf_rj     = buf_rj;
5086   merge->coi        = NULL;
5087   merge->coj        = NULL;
5088   merge->owners_co  = NULL;
5089 
5090   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5091 
5092   /* attach the supporting struct to B_mpi for reuse */
5093   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5094   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5095   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5096   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5097   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5098   *mpimat = B_mpi;
5099 
5100   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5101   PetscFunctionReturn(0);
5102 }
5103 
5104 /*@C
5105       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5106                  matrices from each processor
5107 
5108     Collective
5109 
5110    Input Parameters:
5111 +    comm - the communicators the parallel matrix will live on
5112 .    seqmat - the input sequential matrices
5113 .    m - number of local rows (or PETSC_DECIDE)
5114 .    n - number of local columns (or PETSC_DECIDE)
5115 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5116 
5117    Output Parameter:
5118 .    mpimat - the parallel matrix generated
5119 
5120     Level: advanced
5121 
5122    Notes:
5123      The dimensions of the sequential matrix in each processor MUST be the same.
5124      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5125      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5126 @*/
MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat * mpimat)5127 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5128 {
5129   PetscErrorCode ierr;
5130   PetscMPIInt    size;
5131 
5132   PetscFunctionBegin;
5133   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5134   if (size == 1) {
5135     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5136     if (scall == MAT_INITIAL_MATRIX) {
5137       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5138     } else {
5139       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5140     }
5141     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5142     PetscFunctionReturn(0);
5143   }
5144   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5145   if (scall == MAT_INITIAL_MATRIX) {
5146     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5147   }
5148   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5149   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5150   PetscFunctionReturn(0);
5151 }
5152 
5153 /*@
5154      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5155           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5156           with MatGetSize()
5157 
5158     Not Collective
5159 
5160    Input Parameters:
5161 +    A - the matrix
5162 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5163 
5164    Output Parameter:
5165 .    A_loc - the local sequential matrix generated
5166 
5167     Level: developer
5168 
5169    Notes:
5170      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5171      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5172      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5173      modify the values of the returned A_loc.
5174 
5175 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5176 
5177 @*/
MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat * A_loc)5178 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5179 {
5180   PetscErrorCode ierr;
5181   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5182   Mat_SeqAIJ     *mat,*a,*b;
5183   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5184   MatScalar      *aa,*ba,*cam;
5185   PetscScalar    *ca;
5186   PetscMPIInt    size;
5187   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5188   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5189   PetscBool      match;
5190 
5191   PetscFunctionBegin;
5192   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5193   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5194   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5195   if (size == 1) {
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5198       *A_loc = mpimat->A;
5199     } else if (scall == MAT_REUSE_MATRIX) {
5200       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5201     }
5202     PetscFunctionReturn(0);
5203   }
5204 
5205   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5206   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5207   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5208   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5209   aa = a->a; ba = b->a;
5210   if (scall == MAT_INITIAL_MATRIX) {
5211     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5212     ci[0] = 0;
5213     for (i=0; i<am; i++) {
5214       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5215     }
5216     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5217     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5218     k    = 0;
5219     for (i=0; i<am; i++) {
5220       ncols_o = bi[i+1] - bi[i];
5221       ncols_d = ai[i+1] - ai[i];
5222       /* off-diagonal portion of A */
5223       for (jo=0; jo<ncols_o; jo++) {
5224         col = cmap[*bj];
5225         if (col >= cstart) break;
5226         cj[k]   = col; bj++;
5227         ca[k++] = *ba++;
5228       }
5229       /* diagonal portion of A */
5230       for (j=0; j<ncols_d; j++) {
5231         cj[k]   = cstart + *aj++;
5232         ca[k++] = *aa++;
5233       }
5234       /* off-diagonal portion of A */
5235       for (j=jo; j<ncols_o; j++) {
5236         cj[k]   = cmap[*bj++];
5237         ca[k++] = *ba++;
5238       }
5239     }
5240     /* put together the new matrix */
5241     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5242     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5243     /* Since these are PETSc arrays, change flags to free them as necessary. */
5244     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5245     mat->free_a  = PETSC_TRUE;
5246     mat->free_ij = PETSC_TRUE;
5247     mat->nonew   = 0;
5248   } else if (scall == MAT_REUSE_MATRIX) {
5249     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5250     ci = mat->i; cj = mat->j; cam = mat->a;
5251     for (i=0; i<am; i++) {
5252       /* off-diagonal portion of A */
5253       ncols_o = bi[i+1] - bi[i];
5254       for (jo=0; jo<ncols_o; jo++) {
5255         col = cmap[*bj];
5256         if (col >= cstart) break;
5257         *cam++ = *ba++; bj++;
5258       }
5259       /* diagonal portion of A */
5260       ncols_d = ai[i+1] - ai[i];
5261       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5262       /* off-diagonal portion of A */
5263       for (j=jo; j<ncols_o; j++) {
5264         *cam++ = *ba++; bj++;
5265       }
5266     }
5267   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5268   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5269   PetscFunctionReturn(0);
5270 }
5271 
5272 /*@C
5273      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5274 
5275     Not Collective
5276 
5277    Input Parameters:
5278 +    A - the matrix
5279 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5280 -    row, col - index sets of rows and columns to extract (or NULL)
5281 
5282    Output Parameter:
5283 .    A_loc - the local sequential matrix generated
5284 
5285     Level: developer
5286 
5287 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5288 
5289 @*/
MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS * row,IS * col,Mat * A_loc)5290 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5291 {
5292   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5293   PetscErrorCode ierr;
5294   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5295   IS             isrowa,iscola;
5296   Mat            *aloc;
5297   PetscBool      match;
5298 
5299   PetscFunctionBegin;
5300   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5301   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5302   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5303   if (!row) {
5304     start = A->rmap->rstart; end = A->rmap->rend;
5305     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5306   } else {
5307     isrowa = *row;
5308   }
5309   if (!col) {
5310     start = A->cmap->rstart;
5311     cmap  = a->garray;
5312     nzA   = a->A->cmap->n;
5313     nzB   = a->B->cmap->n;
5314     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5315     ncols = 0;
5316     for (i=0; i<nzB; i++) {
5317       if (cmap[i] < start) idx[ncols++] = cmap[i];
5318       else break;
5319     }
5320     imark = i;
5321     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5322     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5323     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5324   } else {
5325     iscola = *col;
5326   }
5327   if (scall != MAT_INITIAL_MATRIX) {
5328     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5329     aloc[0] = *A_loc;
5330   }
5331   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5332   if (!col) { /* attach global id of condensed columns */
5333     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5334   }
5335   *A_loc = aloc[0];
5336   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5337   if (!row) {
5338     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5339   }
5340   if (!col) {
5341     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5342   }
5343   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5344   PetscFunctionReturn(0);
5345 }
5346 
5347 /*
5348  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5349  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5350  * on a global size.
5351  * */
MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat * P_oth)5352 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5353 {
5354   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5355   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5356   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5357   PetscMPIInt              owner;
5358   PetscSFNode              *iremote,*oiremote;
5359   const PetscInt           *lrowindices;
5360   PetscErrorCode           ierr;
5361   PetscSF                  sf,osf;
5362   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5363   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5364   MPI_Comm                 comm;
5365   ISLocalToGlobalMapping   mapping;
5366 
5367   PetscFunctionBegin;
5368   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5369   /* plocalsize is the number of roots
5370    * nrows is the number of leaves
5371    * */
5372   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5373   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5374   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5375   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5376   for (i=0;i<nrows;i++) {
5377     /* Find a remote index and an owner for a row
5378      * The row could be local or remote
5379      * */
5380     owner = 0;
5381     lidx  = 0;
5382     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5383     iremote[i].index = lidx;
5384     iremote[i].rank  = owner;
5385   }
5386   /* Create SF to communicate how many nonzero columns for each row */
5387   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5388   /* SF will figure out the number of nonzero colunms for each row, and their
5389    * offsets
5390    * */
5391   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5392   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5393   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5394 
5395   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5396   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5397   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5398   roffsets[0] = 0;
5399   roffsets[1] = 0;
5400   for (i=0;i<plocalsize;i++) {
5401     /* diag */
5402     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5403     /* off diag */
5404     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5405     /* compute offsets so that we relative location for each row */
5406     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5407     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5408   }
5409   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5410   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5411   /* 'r' means root, and 'l' means leaf */
5412   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5413   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5414   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5415   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5416   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5417   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5418   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5419   dntotalcols = 0;
5420   ontotalcols = 0;
5421   ncol = 0;
5422   for (i=0;i<nrows;i++) {
5423     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5424     ncol = PetscMax(pnnz[i],ncol);
5425     /* diag */
5426     dntotalcols += nlcols[i*2+0];
5427     /* off diag */
5428     ontotalcols += nlcols[i*2+1];
5429   }
5430   /* We do not need to figure the right number of columns
5431    * since all the calculations will be done by going through the raw data
5432    * */
5433   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5434   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5435   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5436   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5437   /* diag */
5438   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5439   /* off diag */
5440   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5441   /* diag */
5442   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5443   /* off diag */
5444   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5445   dntotalcols = 0;
5446   ontotalcols = 0;
5447   ntotalcols  = 0;
5448   for (i=0;i<nrows;i++) {
5449     owner = 0;
5450     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5451     /* Set iremote for diag matrix */
5452     for (j=0;j<nlcols[i*2+0];j++) {
5453       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5454       iremote[dntotalcols].rank    = owner;
5455       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5456       ilocal[dntotalcols++]        = ntotalcols++;
5457     }
5458     /* off diag */
5459     for (j=0;j<nlcols[i*2+1];j++) {
5460       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5461       oiremote[ontotalcols].rank    = owner;
5462       oilocal[ontotalcols++]        = ntotalcols++;
5463     }
5464   }
5465   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5466   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5467   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5468   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5469   /* P serves as roots and P_oth is leaves
5470    * Diag matrix
5471    * */
5472   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5473   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5474   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5475 
5476   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5477   /* Off diag */
5478   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5479   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5480   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5481   /* We operate on the matrix internal data for saving memory */
5482   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5483   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5484   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5485   /* Convert to global indices for diag matrix */
5486   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5487   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5488   /* We want P_oth store global indices */
5489   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5490   /* Use memory scalable approach */
5491   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5492   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5493   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5494   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5495   /* Convert back to local indices */
5496   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5497   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5498   nout = 0;
5499   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5500   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5501   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5502   /* Exchange values */
5503   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5504   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5505   /* Stop PETSc from shrinking memory */
5506   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5507   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5508   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5509   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5510   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5511   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5512   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5513   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5514   PetscFunctionReturn(0);
5515 }
5516 
5517 /*
5518  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5519  * This supports MPIAIJ and MAIJ
5520  * */
MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat * P_oth)5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5522 {
5523   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5524   Mat_SeqAIJ            *p_oth;
5525   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5526   IS                    rows,map;
5527   PetscHMapI            hamp;
5528   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5529   MPI_Comm              comm;
5530   PetscSF               sf,osf;
5531   PetscBool             has;
5532   PetscErrorCode        ierr;
5533 
5534   PetscFunctionBegin;
5535   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5536   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5537   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5538    *  and then create a submatrix (that often is an overlapping matrix)
5539    * */
5540   if (reuse == MAT_INITIAL_MATRIX) {
5541     /* Use a hash table to figure out unique keys */
5542     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5543     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5544     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5545     count = 0;
5546     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5547     for (i=0;i<a->B->cmap->n;i++) {
5548       key  = a->garray[i]/dof;
5549       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5550       if (!has) {
5551         mapping[i] = count;
5552         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5553       } else {
5554         /* Current 'i' has the same value the previous step */
5555         mapping[i] = count-1;
5556       }
5557     }
5558     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5559     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5560     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5561     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5562     off = 0;
5563     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5564     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5565     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5566     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5567     /* In case, the matrix was already created but users want to recreate the matrix */
5568     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5569     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5570     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5571     ierr = ISDestroy(&map);CHKERRQ(ierr);
5572     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5573   } else if (reuse == MAT_REUSE_MATRIX) {
5574     /* If matrix was already created, we simply update values using SF objects
5575      * that as attached to the matrix ealier.
5576      *  */
5577     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5578     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5579     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5580     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5581     /* Update values in place */
5582     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5583     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5584     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5585     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5586   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5587   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5588   PetscFunctionReturn(0);
5589 }
5590 
5591 /*@C
5592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5593 
5594     Collective on Mat
5595 
5596    Input Parameters:
5597 +    A,B - the matrices in mpiaij format
5598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5599 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5600 
5601    Output Parameter:
5602 +    rowb, colb - index sets of rows and columns of B to extract
5603 -    B_seq - the sequential matrix generated
5604 
5605     Level: developer
5606 
5607 @*/
MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS * rowb,IS * colb,Mat * B_seq)5608 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5609 {
5610   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5611   PetscErrorCode ierr;
5612   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5613   IS             isrowb,iscolb;
5614   Mat            *bseq=NULL;
5615 
5616   PetscFunctionBegin;
5617   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5618     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5619   }
5620   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5621 
5622   if (scall == MAT_INITIAL_MATRIX) {
5623     start = A->cmap->rstart;
5624     cmap  = a->garray;
5625     nzA   = a->A->cmap->n;
5626     nzB   = a->B->cmap->n;
5627     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5628     ncols = 0;
5629     for (i=0; i<nzB; i++) {  /* row < local row index */
5630       if (cmap[i] < start) idx[ncols++] = cmap[i];
5631       else break;
5632     }
5633     imark = i;
5634     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5635     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5636     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5637     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5638   } else {
5639     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5640     isrowb  = *rowb; iscolb = *colb;
5641     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5642     bseq[0] = *B_seq;
5643   }
5644   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5645   *B_seq = bseq[0];
5646   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5647   if (!rowb) {
5648     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5649   } else {
5650     *rowb = isrowb;
5651   }
5652   if (!colb) {
5653     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5654   } else {
5655     *colb = iscolb;
5656   }
5657   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5658   PetscFunctionReturn(0);
5659 }
5660 
5661 /*
5662     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5663     of the OFF-DIAGONAL portion of local A
5664 
5665     Collective on Mat
5666 
5667    Input Parameters:
5668 +    A,B - the matrices in mpiaij format
5669 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5670 
5671    Output Parameter:
5672 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5673 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5674 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5675 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5676 
5677     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5678      for this matrix. This is not desirable..
5679 
5680     Level: developer
5681 
5682 */
MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt ** startsj_s,PetscInt ** startsj_r,MatScalar ** bufa_ptr,Mat * B_oth)5683 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5684 {
5685   PetscErrorCode         ierr;
5686   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5687   Mat_SeqAIJ             *b_oth;
5688   VecScatter             ctx;
5689   MPI_Comm               comm;
5690   const PetscMPIInt      *rprocs,*sprocs;
5691   const PetscInt         *srow,*rstarts,*sstarts;
5692   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5693   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5694   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5695   MPI_Request            *rwaits = NULL,*swaits = NULL;
5696   MPI_Status             rstatus;
5697   PetscMPIInt            jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5698 
5699   PetscFunctionBegin;
5700   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5701   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5702 
5703   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5704     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5705   }
5706   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5708 
5709   if (size == 1) {
5710     startsj_s = NULL;
5711     bufa_ptr  = NULL;
5712     *B_oth    = NULL;
5713     PetscFunctionReturn(0);
5714   }
5715 
5716   ctx = a->Mvctx;
5717   tag = ((PetscObject)ctx)->tag;
5718 
5719   if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5720   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5721   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5722   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5723   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5724   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5725   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5726 
5727   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5728   if (scall == MAT_INITIAL_MATRIX) {
5729     /* i-array */
5730     /*---------*/
5731     /*  post receives */
5732     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5733     for (i=0; i<nrecvs; i++) {
5734       rowlen = rvalues + rstarts[i]*rbs;
5735       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5736       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5737     }
5738 
5739     /* pack the outgoing message */
5740     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5741 
5742     sstartsj[0] = 0;
5743     rstartsj[0] = 0;
5744     len         = 0; /* total length of j or a array to be sent */
5745     if (nsends) {
5746       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5747       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5748     }
5749     for (i=0; i<nsends; i++) {
5750       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5751       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5752       for (j=0; j<nrows; j++) {
5753         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5754         for (l=0; l<sbs; l++) {
5755           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5756 
5757           rowlen[j*sbs+l] = ncols;
5758 
5759           len += ncols;
5760           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5761         }
5762         k++;
5763       }
5764       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5765 
5766       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5767     }
5768     /* recvs and sends of i-array are completed */
5769     i = nrecvs;
5770     while (i--) {
5771       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5772     }
5773     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5774     ierr = PetscFree(svalues);CHKERRQ(ierr);
5775 
5776     /* allocate buffers for sending j and a arrays */
5777     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5778     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5779 
5780     /* create i-array of B_oth */
5781     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5782 
5783     b_othi[0] = 0;
5784     len       = 0; /* total length of j or a array to be received */
5785     k         = 0;
5786     for (i=0; i<nrecvs; i++) {
5787       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5788       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5789       for (j=0; j<nrows; j++) {
5790         b_othi[k+1] = b_othi[k] + rowlen[j];
5791         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5792         k++;
5793       }
5794       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5795     }
5796     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5797 
5798     /* allocate space for j and a arrrays of B_oth */
5799     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5800     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5801 
5802     /* j-array */
5803     /*---------*/
5804     /*  post receives of j-array */
5805     for (i=0; i<nrecvs; i++) {
5806       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5807       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5808     }
5809 
5810     /* pack the outgoing message j-array */
5811     if (nsends) k = sstarts[0];
5812     for (i=0; i<nsends; i++) {
5813       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5814       bufJ  = bufj+sstartsj[i];
5815       for (j=0; j<nrows; j++) {
5816         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5817         for (ll=0; ll<sbs; ll++) {
5818           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5819           for (l=0; l<ncols; l++) {
5820             *bufJ++ = cols[l];
5821           }
5822           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5823         }
5824       }
5825       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5826     }
5827 
5828     /* recvs and sends of j-array are completed */
5829     i = nrecvs;
5830     while (i--) {
5831       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5832     }
5833     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5834   } else if (scall == MAT_REUSE_MATRIX) {
5835     sstartsj = *startsj_s;
5836     rstartsj = *startsj_r;
5837     bufa     = *bufa_ptr;
5838     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5839     b_otha   = b_oth->a;
5840   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5841 
5842   /* a-array */
5843   /*---------*/
5844   /*  post receives of a-array */
5845   for (i=0; i<nrecvs; i++) {
5846     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5847     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5848   }
5849 
5850   /* pack the outgoing message a-array */
5851   if (nsends) k = sstarts[0];
5852   for (i=0; i<nsends; i++) {
5853     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5854     bufA  = bufa+sstartsj[i];
5855     for (j=0; j<nrows; j++) {
5856       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5857       for (ll=0; ll<sbs; ll++) {
5858         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5859         for (l=0; l<ncols; l++) {
5860           *bufA++ = vals[l];
5861         }
5862         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5863       }
5864     }
5865     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5866   }
5867   /* recvs and sends of a-array are completed */
5868   i = nrecvs;
5869   while (i--) {
5870     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5871   }
5872   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5873   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5874 
5875   if (scall == MAT_INITIAL_MATRIX) {
5876     /* put together the new matrix */
5877     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5878 
5879     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5880     /* Since these are PETSc arrays, change flags to free them as necessary. */
5881     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5882     b_oth->free_a  = PETSC_TRUE;
5883     b_oth->free_ij = PETSC_TRUE;
5884     b_oth->nonew   = 0;
5885 
5886     ierr = PetscFree(bufj);CHKERRQ(ierr);
5887     if (!startsj_s || !bufa_ptr) {
5888       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5889       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5890     } else {
5891       *startsj_s = sstartsj;
5892       *startsj_r = rstartsj;
5893       *bufa_ptr  = bufa;
5894     }
5895   }
5896 
5897   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5898   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5899   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5900   PetscFunctionReturn(0);
5901 }
5902 
5903 /*@C
5904   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5905 
5906   Not Collective
5907 
5908   Input Parameters:
5909 . A - The matrix in mpiaij format
5910 
5911   Output Parameter:
5912 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5913 . colmap - A map from global column index to local index into lvec
5914 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5915 
5916   Level: developer
5917 
5918 @*/
5919 #if defined(PETSC_USE_CTABLE)
MatGetCommunicationStructs(Mat A,Vec * lvec,PetscTable * colmap,VecScatter * multScatter)5920 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5921 #else
5922 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5923 #endif
5924 {
5925   Mat_MPIAIJ *a;
5926 
5927   PetscFunctionBegin;
5928   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5929   PetscValidPointer(lvec, 2);
5930   PetscValidPointer(colmap, 3);
5931   PetscValidPointer(multScatter, 4);
5932   a = (Mat_MPIAIJ*) A->data;
5933   if (lvec) *lvec = a->lvec;
5934   if (colmap) *colmap = a->colmap;
5935   if (multScatter) *multScatter = a->Mvctx;
5936   PetscFunctionReturn(0);
5937 }
5938 
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5942 #if defined(PETSC_HAVE_MKL_SPARSE)
5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5944 #endif
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5947 #if defined(PETSC_HAVE_ELEMENTAL)
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 #if defined(PETSC_HAVE_SCALAPACK)
5951 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5952 #endif
5953 #if defined(PETSC_HAVE_HYPRE)
5954 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5955 #endif
5956 #if defined(PETSC_HAVE_CUDA)
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5958 #endif
5959 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5961 #endif
5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5963 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5964 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5965 
5966 /*
5967     Computes (B'*A')' since computing B*A directly is untenable
5968 
5969                n                       p                          p
5970         [             ]       [             ]         [                 ]
5971       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5972         [             ]       [             ]         [                 ]
5973 
5974 */
MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)5975 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5976 {
5977   PetscErrorCode ierr;
5978   Mat            At,Bt,Ct;
5979 
5980   PetscFunctionBegin;
5981   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5982   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5983   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5984   ierr = MatDestroy(&At);CHKERRQ(ierr);
5985   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5986   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5987   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5988   PetscFunctionReturn(0);
5989 }
5990 
MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)5991 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5992 {
5993   PetscErrorCode ierr;
5994   PetscBool      cisdense;
5995 
5996   PetscFunctionBegin;
5997   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5998   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5999   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6000   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6001   if (!cisdense) {
6002     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6003   }
6004   ierr = MatSetUp(C);CHKERRQ(ierr);
6005 
6006   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6007   PetscFunctionReturn(0);
6008 }
6009 
6010 /* ----------------------------------------------------------------*/
MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)6011 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6012 {
6013   Mat_Product *product = C->product;
6014   Mat         A = product->A,B=product->B;
6015 
6016   PetscFunctionBegin;
6017   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6018     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6019 
6020   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6021   C->ops->productsymbolic = MatProductSymbolic_AB;
6022   PetscFunctionReturn(0);
6023 }
6024 
MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)6025 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6026 {
6027   PetscErrorCode ierr;
6028   Mat_Product    *product = C->product;
6029 
6030   PetscFunctionBegin;
6031   if (product->type == MATPRODUCT_AB) {
6032     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6033   }
6034   PetscFunctionReturn(0);
6035 }
6036 /* ----------------------------------------------------------------*/
6037 
6038 /*MC
6039    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6040 
6041    Options Database Keys:
6042 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6043 
6044    Level: beginner
6045 
6046    Notes:
6047     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6048     in this case the values associated with the rows and columns one passes in are set to zero
6049     in the matrix
6050 
6051     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6052     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6053 
6054 .seealso: MatCreateAIJ()
6055 M*/
6056 
MatCreate_MPIAIJ(Mat B)6057 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6058 {
6059   Mat_MPIAIJ     *b;
6060   PetscErrorCode ierr;
6061   PetscMPIInt    size;
6062 
6063   PetscFunctionBegin;
6064   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6065 
6066   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6067   B->data       = (void*)b;
6068   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6069   B->assembled  = PETSC_FALSE;
6070   B->insertmode = NOT_SET_VALUES;
6071   b->size       = size;
6072 
6073   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6074 
6075   /* build cache for off array entries formed */
6076   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6077 
6078   b->donotstash  = PETSC_FALSE;
6079   b->colmap      = NULL;
6080   b->garray      = NULL;
6081   b->roworiented = PETSC_TRUE;
6082 
6083   /* stuff used for matrix vector multiply */
6084   b->lvec  = NULL;
6085   b->Mvctx = NULL;
6086 
6087   /* stuff for MatGetRow() */
6088   b->rowindices   = NULL;
6089   b->rowvalues    = NULL;
6090   b->getrowactive = PETSC_FALSE;
6091 
6092   /* flexible pointer used in CUSP/CUSPARSE classes */
6093   b->spptr = NULL;
6094 
6095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6105 #if defined(PETSC_HAVE_CUDA)
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6107 #endif
6108 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6110 #endif
6111 #if defined(PETSC_HAVE_MKL_SPARSE)
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6113 #endif
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6117 #if defined(PETSC_HAVE_ELEMENTAL)
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6119 #endif
6120 #if defined(PETSC_HAVE_SCALAPACK)
6121   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6122 #endif
6123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6125 #if defined(PETSC_HAVE_HYPRE)
6126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6128 #endif
6129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6131   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6132   PetscFunctionReturn(0);
6133 }
6134 
6135 /*@C
6136      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6137          and "off-diagonal" part of the matrix in CSR format.
6138 
6139    Collective
6140 
6141    Input Parameters:
6142 +  comm - MPI communicator
6143 .  m - number of local rows (Cannot be PETSC_DECIDE)
6144 .  n - This value should be the same as the local size used in creating the
6145        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6146        calculated if N is given) For square matrices n is almost always m.
6147 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6148 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6149 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6150 .   j - column indices
6151 .   a - matrix values
6152 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6153 .   oj - column indices
6154 -   oa - matrix values
6155 
6156    Output Parameter:
6157 .   mat - the matrix
6158 
6159    Level: advanced
6160 
6161    Notes:
6162        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6163        must free the arrays once the matrix has been destroyed and not before.
6164 
6165        The i and j indices are 0 based
6166 
6167        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6168 
6169        This sets local rows and cannot be used to set off-processor values.
6170 
6171        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6172        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6173        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6174        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6175        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6176        communication if it is known that only local entries will be set.
6177 
6178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6179           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6180 @*/
MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[],PetscInt oj[],PetscScalar oa[],Mat * mat)6181 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6182 {
6183   PetscErrorCode ierr;
6184   Mat_MPIAIJ     *maij;
6185 
6186   PetscFunctionBegin;
6187   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6188   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6189   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6190   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6191   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6192   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6193   maij = (Mat_MPIAIJ*) (*mat)->data;
6194 
6195   (*mat)->preallocated = PETSC_TRUE;
6196 
6197   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6198   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6199 
6200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6201   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6202 
6203   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6204   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6205   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6206   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6207 
6208   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6209   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6210   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6211   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6212   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6213   PetscFunctionReturn(0);
6214 }
6215 
6216 /*
6217     Special version for direct calls from Fortran
6218 */
6219 #include <petsc/private/fortranimpl.h>
6220 
6221 /* Change these macros so can be used in void function */
6222 #undef CHKERRQ
6223 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6224 #undef SETERRQ2
6225 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6226 #undef SETERRQ3
6227 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6228 #undef SETERRQ
6229 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6230 
6231 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6232 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6233 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6234 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6235 #else
6236 #endif
matsetvaluesmpiaij_(Mat * mmat,PetscInt * mm,const PetscInt im[],PetscInt * mn,const PetscInt in[],const PetscScalar v[],InsertMode * maddv,PetscErrorCode * _ierr)6237 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6238 {
6239   Mat            mat  = *mmat;
6240   PetscInt       m    = *mm, n = *mn;
6241   InsertMode     addv = *maddv;
6242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6243   PetscScalar    value;
6244   PetscErrorCode ierr;
6245 
6246   MatCheckPreallocated(mat,1);
6247   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6248   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6249   {
6250     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6251     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6252     PetscBool roworiented = aij->roworiented;
6253 
6254     /* Some Variables required in the macro */
6255     Mat        A                    = aij->A;
6256     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6257     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6258     MatScalar  *aa                  = a->a;
6259     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6260     Mat        B                    = aij->B;
6261     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6262     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6263     MatScalar  *ba                  = b->a;
6264     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6265      * cannot use "#if defined" inside a macro. */
6266     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6267 
6268     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6269     PetscInt  nonew = a->nonew;
6270     MatScalar *ap1,*ap2;
6271 
6272     PetscFunctionBegin;
6273     for (i=0; i<m; i++) {
6274       if (im[i] < 0) continue;
6275       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6276       if (im[i] >= rstart && im[i] < rend) {
6277         row      = im[i] - rstart;
6278         lastcol1 = -1;
6279         rp1      = aj + ai[row];
6280         ap1      = aa + ai[row];
6281         rmax1    = aimax[row];
6282         nrow1    = ailen[row];
6283         low1     = 0;
6284         high1    = nrow1;
6285         lastcol2 = -1;
6286         rp2      = bj + bi[row];
6287         ap2      = ba + bi[row];
6288         rmax2    = bimax[row];
6289         nrow2    = bilen[row];
6290         low2     = 0;
6291         high2    = nrow2;
6292 
6293         for (j=0; j<n; j++) {
6294           if (roworiented) value = v[i*n+j];
6295           else value = v[i+j*m];
6296           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6297           if (in[j] >= cstart && in[j] < cend) {
6298             col = in[j] - cstart;
6299             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6300 #if defined(PETSC_HAVE_DEVICE)
6301             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6302 #endif
6303           } else if (in[j] < 0) continue;
6304           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6305             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6306             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6307           } else {
6308             if (mat->was_assembled) {
6309               if (!aij->colmap) {
6310                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6311               }
6312 #if defined(PETSC_USE_CTABLE)
6313               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6314               col--;
6315 #else
6316               col = aij->colmap[in[j]] - 1;
6317 #endif
6318               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6319                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6320                 col  =  in[j];
6321                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6322                 B        = aij->B;
6323                 b        = (Mat_SeqAIJ*)B->data;
6324                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6325                 rp2      = bj + bi[row];
6326                 ap2      = ba + bi[row];
6327                 rmax2    = bimax[row];
6328                 nrow2    = bilen[row];
6329                 low2     = 0;
6330                 high2    = nrow2;
6331                 bm       = aij->B->rmap->n;
6332                 ba       = b->a;
6333                 inserted = PETSC_FALSE;
6334               }
6335             } else col = in[j];
6336             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6337 #if defined(PETSC_HAVE_DEVICE)
6338             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6339 #endif
6340           }
6341         }
6342       } else if (!aij->donotstash) {
6343         if (roworiented) {
6344           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6345         } else {
6346           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6347         }
6348       }
6349     }
6350   }
6351   PetscFunctionReturnVoid();
6352 }
6353