1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode    ierr;
15   PetscInt          i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16   PetscScalar       *va,*vv;
17   Vec               vB,vA;
18   const PetscScalar *vb;
19 
20   PetscFunctionBegin;
21   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23 
24   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25   if (idx) {
26     for (i=0; i<m; i++) {
27       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28     }
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34 
35   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37   for (i=0; i<m; i++) {
38     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39       vv[i] = vb[i];
40       if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41     } else {
42       vv[i] = va[i];
43       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44         idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45     }
46   }
47   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50   ierr = PetscFree(idxb);CHKERRQ(ierr);
51   ierr = VecDestroy(&vA);CHKERRQ(ierr);
52   ierr = VecDestroy(&vB);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
MatStoreValues_MPIBAIJ(Mat mat)56 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
57 {
58   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
59   PetscErrorCode ierr;
60 
61   PetscFunctionBegin;
62   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
MatRetrieveValues_MPIBAIJ(Mat mat)67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
MatCreateColmap_MPIBAIJ_Private(Mat mat)84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
87   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
88   PetscErrorCode ierr;
89   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
90 
91   PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94   for (i=0; i<nbs; i++) {
95     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96   }
97 #else
98   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102   PetscFunctionReturn(0);
103 }
104 
105 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
106   { \
107     brow = row/bs;  \
108     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109     rmax = aimax[brow]; nrow = ailen[brow]; \
110     bcol = col/bs; \
111     ridx = row % bs; cidx = col % bs; \
112     low  = 0; high = nrow; \
113     while (high-low > 3) { \
114       t = (low+high)/2; \
115       if (rp[t] > bcol) high = t; \
116       else              low  = t; \
117     } \
118     for (_i=low; _i<high; _i++) { \
119       if (rp[_i] > bcol) break; \
120       if (rp[_i] == bcol) { \
121         bap = ap +  bs2*_i + bs*cidx + ridx; \
122         if (addv == ADD_VALUES) *bap += value;  \
123         else                    *bap  = value;  \
124         goto a_noinsert; \
125       } \
126     } \
127     if (a->nonew == 1) goto a_noinsert; \
128     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130     N = nrow++ - 1;  \
131     /* shift up all the later entries in this row */ \
132     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
135     rp[_i]                      = bcol;  \
136     ap[bs2*_i + bs*cidx + ridx] = value;  \
137 a_noinsert:; \
138     ailen[brow] = nrow; \
139   }
140 
141 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
142   { \
143     brow = row/bs;  \
144     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145     rmax = bimax[brow]; nrow = bilen[brow]; \
146     bcol = col/bs; \
147     ridx = row % bs; cidx = col % bs; \
148     low  = 0; high = nrow; \
149     while (high-low > 3) { \
150       t = (low+high)/2; \
151       if (rp[t] > bcol) high = t; \
152       else              low  = t; \
153     } \
154     for (_i=low; _i<high; _i++) { \
155       if (rp[_i] > bcol) break; \
156       if (rp[_i] == bcol) { \
157         bap = ap +  bs2*_i + bs*cidx + ridx; \
158         if (addv == ADD_VALUES) *bap += value;  \
159         else                    *bap  = value;  \
160         goto b_noinsert; \
161       } \
162     } \
163     if (b->nonew == 1) goto b_noinsert; \
164     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
165     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166     N = nrow++ - 1;  \
167     /* shift up all the later entries in this row */ \
168     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
171     rp[_i]                      = bcol;  \
172     ap[bs2*_i + bs*cidx + ridx] = value;  \
173 b_noinsert:; \
174     bilen[brow] = nrow; \
175   }
176 
MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
180   MatScalar      value;
181   PetscBool      roworiented = baij->roworiented;
182   PetscErrorCode ierr;
183   PetscInt       i,j,row,col;
184   PetscInt       rstart_orig=mat->rmap->rstart;
185   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
187 
188   /* Some Variables required in the macro */
189   Mat         A     = baij->A;
190   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
191   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192   MatScalar   *aa   =a->a;
193 
194   Mat         B     = baij->B;
195   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
196   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197   MatScalar   *ba   =b->a;
198 
199   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
200   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
201   MatScalar *ap,*bap;
202 
203   PetscFunctionBegin;
204   for (i=0; i<m; i++) {
205     if (im[i] < 0) continue;
206     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
207     if (im[i] >= rstart_orig && im[i] < rend_orig) {
208       row = im[i] - rstart_orig;
209       for (j=0; j<n; j++) {
210         if (in[j] >= cstart_orig && in[j] < cend_orig) {
211           col = in[j] - cstart_orig;
212           if (roworiented) value = v[i*n+j];
213           else             value = v[i+j*m];
214           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215         } else if (in[j] < 0) continue;
216         else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
217         else {
218           if (mat->was_assembled) {
219             if (!baij->colmap) {
220               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221             }
222 #if defined(PETSC_USE_CTABLE)
223             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224             col  = col - 1;
225 #else
226             col = baij->colmap[in[j]/bs] - 1;
227 #endif
228             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230               col  =  in[j];
231               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232               B    = baij->B;
233               b    = (Mat_SeqBAIJ*)(B)->data;
234               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235               ba   =b->a;
236             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237             else col += in[j]%bs;
238           } else col = in[j];
239           if (roworiented) value = v[i*n+j];
240           else             value = v[i+j*m];
241           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244       }
245     } else {
246       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247       if (!baij->donotstash) {
248         mat->assembled = PETSC_FALSE;
249         if (roworiented) {
250           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251         } else {
252           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253         }
254       }
255     }
256   }
257   PetscFunctionReturn(0);
258 }
259 
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
263   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265   PetscErrorCode    ierr;
266   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267   PetscBool         roworiented=a->roworiented;
268   const PetscScalar *value     = v;
269   MatScalar         *ap,*aa = a->a,*bap;
270 
271   PetscFunctionBegin;
272   rp   = aj + ai[row];
273   ap   = aa + bs2*ai[row];
274   rmax = imax[row];
275   nrow = ailen[row];
276   value = v;
277   low = 0;
278   high = nrow;
279   while (high-low > 7) {
280     t = (low+high)/2;
281     if (rp[t] > col) high = t;
282     else             low  = t;
283   }
284   for (i=low; i<high; i++) {
285     if (rp[i] > col) break;
286     if (rp[i] == col) {
287       bap = ap +  bs2*i;
288       if (roworiented) {
289         if (is == ADD_VALUES) {
290           for (ii=0; ii<bs; ii++) {
291             for (jj=ii; jj<bs2; jj+=bs) {
292               bap[jj] += *value++;
293             }
294           }
295         } else {
296           for (ii=0; ii<bs; ii++) {
297             for (jj=ii; jj<bs2; jj+=bs) {
298               bap[jj] = *value++;
299             }
300           }
301         }
302       } else {
303         if (is == ADD_VALUES) {
304           for (ii=0; ii<bs; ii++,value+=bs) {
305             for (jj=0; jj<bs; jj++) {
306               bap[jj] += value[jj];
307             }
308             bap += bs;
309           }
310         } else {
311           for (ii=0; ii<bs; ii++,value+=bs) {
312             for (jj=0; jj<bs; jj++) {
313               bap[jj]  = value[jj];
314             }
315             bap += bs;
316           }
317         }
318       }
319       goto noinsert2;
320     }
321   }
322   if (nonew == 1) goto noinsert2;
323   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325   N = nrow++ - 1; high++;
326   /* shift up all the later entries in this row */
327   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329   rp[i] = col;
330   bap   = ap +  bs2*i;
331   if (roworiented) {
332     for (ii=0; ii<bs; ii++) {
333       for (jj=ii; jj<bs2; jj+=bs) {
334         bap[jj] = *value++;
335       }
336     }
337   } else {
338     for (ii=0; ii<bs; ii++) {
339       for (jj=0; jj<bs; jj++) {
340         *bap++ = *value++;
341       }
342     }
343   }
344   noinsert2:;
345   ailen[row] = nrow;
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
356   const PetscScalar *value;
357   MatScalar         *barray     = baij->barray;
358   PetscBool         roworiented = baij->roworiented;
359   PetscErrorCode    ierr;
360   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
361   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363 
364   PetscFunctionBegin;
365   if (!barray) {
366     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367     baij->barray = barray;
368   }
369 
370   if (roworiented) stepval = (n-1)*bs;
371   else stepval = (m-1)*bs;
372 
373   for (i=0; i<m; i++) {
374     if (im[i] < 0) continue;
375     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376     if (im[i] >= rstart && im[i] < rend) {
377       row = im[i] - rstart;
378       for (j=0; j<n; j++) {
379         /* If NumCol = 1 then a copy is not required */
380         if ((roworiented) && (n == 1)) {
381           barray = (MatScalar*)v + i*bs2;
382         } else if ((!roworiented) && (m == 1)) {
383           barray = (MatScalar*)v + j*bs2;
384         } else { /* Here a copy is required */
385           if (roworiented) {
386             value = v + (i*(stepval+bs) + j)*bs;
387           } else {
388             value = v + (j*(stepval+bs) + i)*bs;
389           }
390           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392             barray += bs;
393           }
394           barray -= bs2;
395         }
396 
397         if (in[j] >= cstart && in[j] < cend) {
398           col  = in[j] - cstart;
399           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400         } else if (in[j] < 0) continue;
401         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
402         else {
403           if (mat->was_assembled) {
404             if (!baij->colmap) {
405               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406             }
407 
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410             { PetscInt data;
411               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413             }
414 #else
415             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420             col  = (col - 1)/bs;
421 #else
422             col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426               col  =  in[j];
427             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428           } else col = in[j];
429           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430         }
431       }
432     } else {
433       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434       if (!baij->donotstash) {
435         if (roworiented) {
436           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437         } else {
438           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439         }
440       }
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
453   PetscBool      roworiented = baij->roworiented;
454   PetscErrorCode ierr;
455   PetscInt       i,j,row,col;
456   PetscInt       rstart_orig=mat->rmap->rstart;
457   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
458   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459   PetscReal      tmp;
460   MatScalar      **HD = baij->hd,value;
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 
463   PetscFunctionBegin;
464   for (i=0; i<m; i++) {
465     if (PetscDefined(USE_DEBUG)) {
466       if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
468     }
469     row = im[i];
470     if (row >= rstart_orig && row < rend_orig) {
471       for (j=0; j<n; j++) {
472         col = in[j];
473         if (roworiented) value = v[i*n+j];
474         else             value = v[i+j*m];
475         /* Look up PetscInto the Hash Table */
476         key = (row/bs)*Nbs+(col/bs)+1;
477         h1  = HASH(size,key,tmp);
478 
479 
480         idx = h1;
481         if (PetscDefined(USE_DEBUG)) {
482           insert_ct++;
483           total_ct++;
484           if (HT[idx] != key) {
485             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
486             if (idx == size) {
487               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
488               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
489             }
490           }
491         } else if (HT[idx] != key) {
492           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
493           if (idx == size) {
494             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
495             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
496           }
497         }
498         /* A HASH table entry is found, so insert the values at the correct address */
499         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
500         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
501       }
502     } else if (!baij->donotstash) {
503       if (roworiented) {
504         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
505       } else {
506         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
507       }
508     }
509   }
510   if (PetscDefined(USE_DEBUG)) {
511     baij->ht_total_ct  += total_ct;
512     baij->ht_insert_ct += insert_ct;
513   }
514   PetscFunctionReturn(0);
515 }
516 
MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)517 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
518 {
519   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
520   PetscBool         roworiented = baij->roworiented;
521   PetscErrorCode    ierr;
522   PetscInt          i,j,ii,jj,row,col;
523   PetscInt          rstart=baij->rstartbs;
524   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
525   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
526   PetscReal         tmp;
527   MatScalar         **HD = baij->hd,*baij_a;
528   const PetscScalar *v_t,*value;
529   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
530 
531   PetscFunctionBegin;
532   if (roworiented) stepval = (n-1)*bs;
533   else stepval = (m-1)*bs;
534 
535   for (i=0; i<m; i++) {
536     if (PetscDefined(USE_DEBUG)) {
537       if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
538       if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
539     }
540     row = im[i];
541     v_t = v + i*nbs2;
542     if (row >= rstart && row < rend) {
543       for (j=0; j<n; j++) {
544         col = in[j];
545 
546         /* Look up into the Hash Table */
547         key = row*Nbs+col+1;
548         h1  = HASH(size,key,tmp);
549 
550         idx = h1;
551         if (PetscDefined(USE_DEBUG)) {
552           total_ct++;
553           insert_ct++;
554           if (HT[idx] != key) {
555             for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
556             if (idx == size) {
557               for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
558               if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
559             }
560           }
561         } else if (HT[idx] != key) {
562           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
563           if (idx == size) {
564             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
565             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
566           }
567         }
568         baij_a = HD[idx];
569         if (roworiented) {
570           /*value = v + i*(stepval+bs)*bs + j*bs;*/
571           /* value = v + (i*(stepval+bs)+j)*bs; */
572           value = v_t;
573           v_t  += bs;
574           if (addv == ADD_VALUES) {
575             for (ii=0; ii<bs; ii++,value+=stepval) {
576               for (jj=ii; jj<bs2; jj+=bs) {
577                 baij_a[jj] += *value++;
578               }
579             }
580           } else {
581             for (ii=0; ii<bs; ii++,value+=stepval) {
582               for (jj=ii; jj<bs2; jj+=bs) {
583                 baij_a[jj] = *value++;
584               }
585             }
586           }
587         } else {
588           value = v + j*(stepval+bs)*bs + i*bs;
589           if (addv == ADD_VALUES) {
590             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
591               for (jj=0; jj<bs; jj++) {
592                 baij_a[jj] += *value++;
593               }
594             }
595           } else {
596             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
597               for (jj=0; jj<bs; jj++) {
598                 baij_a[jj] = *value++;
599               }
600             }
601           }
602         }
603       }
604     } else {
605       if (!baij->donotstash) {
606         if (roworiented) {
607           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
608         } else {
609           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
610         }
611       }
612     }
613   }
614   if (PetscDefined(USE_DEBUG)) {
615     baij->ht_total_ct  += total_ct;
616     baij->ht_insert_ct += insert_ct;
617   }
618   PetscFunctionReturn(0);
619 }
620 
MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])621 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
622 {
623   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
624   PetscErrorCode ierr;
625   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
626   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
627 
628   PetscFunctionBegin;
629   for (i=0; i<m; i++) {
630     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
631     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
632     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
633       row = idxm[i] - bsrstart;
634       for (j=0; j<n; j++) {
635         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
636         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
637         if (idxn[j] >= bscstart && idxn[j] < bscend) {
638           col  = idxn[j] - bscstart;
639           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
640         } else {
641           if (!baij->colmap) {
642             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
643           }
644 #if defined(PETSC_USE_CTABLE)
645           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
646           data--;
647 #else
648           data = baij->colmap[idxn[j]/bs]-1;
649 #endif
650           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
651           else {
652             col  = data + idxn[j]%bs;
653             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
654           }
655         }
656       }
657     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
658   }
659   PetscFunctionReturn(0);
660 }
661 
MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal * nrm)662 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
663 {
664   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
665   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
666   PetscErrorCode ierr;
667   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
668   PetscReal      sum = 0.0;
669   MatScalar      *v;
670 
671   PetscFunctionBegin;
672   if (baij->size == 1) {
673     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
674   } else {
675     if (type == NORM_FROBENIUS) {
676       v  = amat->a;
677       nz = amat->nz*bs2;
678       for (i=0; i<nz; i++) {
679         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
680       }
681       v  = bmat->a;
682       nz = bmat->nz*bs2;
683       for (i=0; i<nz; i++) {
684         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
685       }
686       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
687       *nrm = PetscSqrtReal(*nrm);
688     } else if (type == NORM_1) { /* max column sum */
689       PetscReal *tmp,*tmp2;
690       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
691       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
692       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
693       v    = amat->a; jj = amat->j;
694       for (i=0; i<amat->nz; i++) {
695         for (j=0; j<bs; j++) {
696           col = bs*(cstart + *jj) + j; /* column index */
697           for (row=0; row<bs; row++) {
698             tmp[col] += PetscAbsScalar(*v);  v++;
699           }
700         }
701         jj++;
702       }
703       v = bmat->a; jj = bmat->j;
704       for (i=0; i<bmat->nz; i++) {
705         for (j=0; j<bs; j++) {
706           col = bs*garray[*jj] + j;
707           for (row=0; row<bs; row++) {
708             tmp[col] += PetscAbsScalar(*v); v++;
709           }
710         }
711         jj++;
712       }
713       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714       *nrm = 0.0;
715       for (j=0; j<mat->cmap->N; j++) {
716         if (tmp2[j] > *nrm) *nrm = tmp2[j];
717       }
718       ierr = PetscFree(tmp);CHKERRQ(ierr);
719       ierr = PetscFree(tmp2);CHKERRQ(ierr);
720     } else if (type == NORM_INFINITY) { /* max row sum */
721       PetscReal *sums;
722       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
723       sum  = 0.0;
724       for (j=0; j<amat->mbs; j++) {
725         for (row=0; row<bs; row++) sums[row] = 0.0;
726         v  = amat->a + bs2*amat->i[j];
727         nz = amat->i[j+1]-amat->i[j];
728         for (i=0; i<nz; i++) {
729           for (col=0; col<bs; col++) {
730             for (row=0; row<bs; row++) {
731               sums[row] += PetscAbsScalar(*v); v++;
732             }
733           }
734         }
735         v  = bmat->a + bs2*bmat->i[j];
736         nz = bmat->i[j+1]-bmat->i[j];
737         for (i=0; i<nz; i++) {
738           for (col=0; col<bs; col++) {
739             for (row=0; row<bs; row++) {
740               sums[row] += PetscAbsScalar(*v); v++;
741             }
742           }
743         }
744         for (row=0; row<bs; row++) {
745           if (sums[row] > sum) sum = sums[row];
746         }
747       }
748       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749       ierr = PetscFree(sums);CHKERRQ(ierr);
750     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
751   }
752   PetscFunctionReturn(0);
753 }
754 
755 /*
756   Creates the hash table, and sets the table
757   This table is created only once.
758   If new entried need to be added to the matrix
759   then the hash table has to be destroyed and
760   recreated.
761 */
MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)762 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
763 {
764   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
765   Mat            A     = baij->A,B=baij->B;
766   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
767   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
768   PetscErrorCode ierr;
769   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
770   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
771   PetscInt       *HT,key;
772   MatScalar      **HD;
773   PetscReal      tmp;
774 #if defined(PETSC_USE_INFO)
775   PetscInt ct=0,max=0;
776 #endif
777 
778   PetscFunctionBegin;
779   if (baij->ht) PetscFunctionReturn(0);
780 
781   baij->ht_size = (PetscInt)(factor*nz);
782   ht_size       = baij->ht_size;
783 
784   /* Allocate Memory for Hash Table */
785   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
786   HD   = baij->hd;
787   HT   = baij->ht;
788 
789   /* Loop Over A */
790   for (i=0; i<a->mbs; i++) {
791     for (j=ai[i]; j<ai[i+1]; j++) {
792       row = i+rstart;
793       col = aj[j]+cstart;
794 
795       key = row*Nbs + col + 1;
796       h1  = HASH(ht_size,key,tmp);
797       for (k=0; k<ht_size; k++) {
798         if (!HT[(h1+k)%ht_size]) {
799           HT[(h1+k)%ht_size] = key;
800           HD[(h1+k)%ht_size] = a->a + j*bs2;
801           break;
802 #if defined(PETSC_USE_INFO)
803         } else {
804           ct++;
805 #endif
806         }
807       }
808 #if defined(PETSC_USE_INFO)
809       if (k> max) max = k;
810 #endif
811     }
812   }
813   /* Loop Over B */
814   for (i=0; i<b->mbs; i++) {
815     for (j=bi[i]; j<bi[i+1]; j++) {
816       row = i+rstart;
817       col = garray[bj[j]];
818       key = row*Nbs + col + 1;
819       h1  = HASH(ht_size,key,tmp);
820       for (k=0; k<ht_size; k++) {
821         if (!HT[(h1+k)%ht_size]) {
822           HT[(h1+k)%ht_size] = key;
823           HD[(h1+k)%ht_size] = b->a + j*bs2;
824           break;
825 #if defined(PETSC_USE_INFO)
826         } else {
827           ct++;
828 #endif
829         }
830       }
831 #if defined(PETSC_USE_INFO)
832       if (k> max) max = k;
833 #endif
834     }
835   }
836 
837   /* Print Summary */
838 #if defined(PETSC_USE_INFO)
839   for (i=0,j=0; i<ht_size; i++) {
840     if (HT[i]) j++;
841   }
842   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
843 #endif
844   PetscFunctionReturn(0);
845 }
846 
MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)847 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
848 {
849   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
850   PetscErrorCode ierr;
851   PetscInt       nstash,reallocs;
852 
853   PetscFunctionBegin;
854   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
855 
856   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
857   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
858   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
859   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
860   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
861   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)865 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
868   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
869   PetscErrorCode ierr;
870   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
871   PetscInt       *row,*col;
872   PetscBool      r1,r2,r3,other_disassembled;
873   MatScalar      *val;
874   PetscMPIInt    n;
875 
876   PetscFunctionBegin;
877   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
878   if (!baij->donotstash && !mat->nooffprocentries) {
879     while (1) {
880       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
881       if (!flg) break;
882 
883       for (i=0; i<n;) {
884         /* Now identify the consecutive vals belonging to the same row */
885         for (j=i,rstart=row[j]; j<n; j++) {
886           if (row[j] != rstart) break;
887         }
888         if (j < n) ncols = j-i;
889         else       ncols = n-i;
890         /* Now assemble all these values with a single function call */
891         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
892         i    = j;
893       }
894     }
895     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
896     /* Now process the block-stash. Since the values are stashed column-oriented,
897        set the roworiented flag to column oriented, and after MatSetValues()
898        restore the original flags */
899     r1 = baij->roworiented;
900     r2 = a->roworiented;
901     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
902 
903     baij->roworiented = PETSC_FALSE;
904     a->roworiented    = PETSC_FALSE;
905 
906     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
907     while (1) {
908       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
909       if (!flg) break;
910 
911       for (i=0; i<n;) {
912         /* Now identify the consecutive vals belonging to the same row */
913         for (j=i,rstart=row[j]; j<n; j++) {
914           if (row[j] != rstart) break;
915         }
916         if (j < n) ncols = j-i;
917         else       ncols = n-i;
918         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
919         i    = j;
920       }
921     }
922     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
923 
924     baij->roworiented = r1;
925     a->roworiented    = r2;
926 
927     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
928   }
929 
930   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932 
933   /* determine if any processor has disassembled, if so we must
934      also disassemble ourselfs, in order that we may reassemble. */
935   /*
936      if nonzero structure of submatrix B cannot change then we know that
937      no processor disassembled thus we can skip this stuff
938   */
939   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
940     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
941     if (mat->was_assembled && !other_disassembled) {
942       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943     }
944   }
945 
946   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948   }
949   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951 
952 #if defined(PETSC_USE_INFO)
953   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
955 
956     baij->ht_total_ct  = 0;
957     baij->ht_insert_ct = 0;
958   }
959 #endif
960   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
961     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
962 
963     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
964     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
965   }
966 
967   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
968 
969   baij->rowvalues = NULL;
970 
971   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
972   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
973     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
974     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
980 #include <petscdraw.h>
MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)981 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
982 {
983   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
984   PetscErrorCode    ierr;
985   PetscMPIInt       rank = baij->rank;
986   PetscInt          bs   = mat->rmap->bs;
987   PetscBool         iascii,isdraw;
988   PetscViewer       sviewer;
989   PetscViewerFormat format;
990 
991   PetscFunctionBegin;
992   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
994   if (iascii) {
995     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
996     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
997       MatInfo info;
998       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
999       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1000       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1001       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1002                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1003       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1004       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1005       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1006       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1007       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1009       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1010       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1011       PetscFunctionReturn(0);
1012     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1013       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1014       PetscFunctionReturn(0);
1015     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1016       PetscFunctionReturn(0);
1017     }
1018   }
1019 
1020   if (isdraw) {
1021     PetscDraw draw;
1022     PetscBool isnull;
1023     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1024     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1025     if (isnull) PetscFunctionReturn(0);
1026   }
1027 
1028   {
1029     /* assemble the entire matrix onto first processor. */
1030     Mat         A;
1031     Mat_SeqBAIJ *Aloc;
1032     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1033     MatScalar   *a;
1034     const char  *matname;
1035 
1036     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1037     /* Perhaps this should be the type of mat? */
1038     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1039     if (!rank) {
1040       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1041     } else {
1042       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1043     }
1044     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1045     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1046     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1047     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1048 
1049     /* copy over the A part */
1050     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1051     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1053 
1054     for (i=0; i<mbs; i++) {
1055       rvals[0] = bs*(baij->rstartbs + i);
1056       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1057       for (j=ai[i]; j<ai[i+1]; j++) {
1058         col = (baij->cstartbs+aj[j])*bs;
1059         for (k=0; k<bs; k++) {
1060           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1061           col++; a += bs;
1062         }
1063       }
1064     }
1065     /* copy over the B part */
1066     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1067     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1068     for (i=0; i<mbs; i++) {
1069       rvals[0] = bs*(baij->rstartbs + i);
1070       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1071       for (j=ai[i]; j<ai[i+1]; j++) {
1072         col = baij->garray[aj[j]]*bs;
1073         for (k=0; k<bs; k++) {
1074           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1075           col++; a += bs;
1076         }
1077       }
1078     }
1079     ierr = PetscFree(rvals);CHKERRQ(ierr);
1080     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1082     /*
1083        Everyone has to call to draw the matrix since the graphics waits are
1084        synchronized across all processors that share the PetscDraw object
1085     */
1086     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1087     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1088     if (!rank) {
1089       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1090       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1091     }
1092     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1093     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1094     ierr = MatDestroy(&A);CHKERRQ(ierr);
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)1100 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1101 {
1102   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1103   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1104   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1105   const PetscInt *garray = aij->garray;
1106   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1107   PetscInt       *rowlens,*colidxs;
1108   PetscScalar    *matvals;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1113 
1114   M  = mat->rmap->N;
1115   N  = mat->cmap->N;
1116   m  = mat->rmap->n;
1117   rs = mat->rmap->rstart;
1118   cs = mat->cmap->rstart;
1119   bs = mat->rmap->bs;
1120   nz = bs*bs*(A->nz + B->nz);
1121 
1122   /* write matrix header */
1123   header[0] = MAT_FILE_CLASSID;
1124   header[1] = M; header[2] = N; header[3] = nz;
1125   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1126   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1127 
1128   /* fill in and store row lengths */
1129   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1130   for (cnt=0, i=0; i<A->mbs; i++)
1131     for (j=0; j<bs; j++)
1132       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1133   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1134   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1135 
1136   /* fill in and store column indices */
1137   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1138   for (cnt=0, i=0; i<A->mbs; i++) {
1139     for (k=0; k<bs; k++) {
1140       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1141         if (garray[B->j[jb]] > cs/bs) break;
1142         for (l=0; l<bs; l++)
1143           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1144       }
1145       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1146         for (l=0; l<bs; l++)
1147           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1148       for (; jb<B->i[i+1]; jb++)
1149         for (l=0; l<bs; l++)
1150           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1151     }
1152   }
1153   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1154   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1155   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1156 
1157   /* fill in and store nonzero values */
1158   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1159   for (cnt=0, i=0; i<A->mbs; i++) {
1160     for (k=0; k<bs; k++) {
1161       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1162         if (garray[B->j[jb]] > cs/bs) break;
1163         for (l=0; l<bs; l++)
1164           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1165       }
1166       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1167         for (l=0; l<bs; l++)
1168           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1169       for (; jb<B->i[i+1]; jb++)
1170         for (l=0; l<bs; l++)
1171           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1172     }
1173   }
1174   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1175   ierr = PetscFree(matvals);CHKERRQ(ierr);
1176 
1177   /* write block size option to the viewer's .info file */
1178   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
MatView_MPIBAIJ(Mat mat,PetscViewer viewer)1182 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1183 {
1184   PetscErrorCode ierr;
1185   PetscBool      iascii,isdraw,issocket,isbinary;
1186 
1187   PetscFunctionBegin;
1188   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1189   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1190   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1191   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   if (iascii || isdraw || issocket) {
1193     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1194   } else if (isbinary) {
1195     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
MatDestroy_MPIBAIJ(Mat mat)1200 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1201 {
1202   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1208 #endif
1209   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1210   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1211   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1212   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1213 #if defined(PETSC_USE_CTABLE)
1214   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1215 #else
1216   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1217 #endif
1218   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1219   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1220   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1221   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1222   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1223   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1224   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1225   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1226 
1227   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1236 #if defined(PETSC_HAVE_HYPRE)
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1238 #endif
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)1243 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1244 {
1245   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1246   PetscErrorCode ierr;
1247   PetscInt       nt;
1248 
1249   PetscFunctionBegin;
1250   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1251   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1252   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1253   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1254   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1256   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1261 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1262 {
1263   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)1274 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1275 {
1276   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1277   PetscErrorCode ierr;
1278 
1279   PetscFunctionBegin;
1280   /* do nondiagonal part */
1281   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1282   /* do local part */
1283   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1284   /* add partial results together */
1285   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1290 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291 {
1292   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   /* do nondiagonal part */
1297   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1298   /* do local part */
1299   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1300   /* add partial results together */
1301   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 /*
1307   This only works correctly for square matrices where the subblock A->A is the
1308    diagonal block
1309 */
MatGetDiagonal_MPIBAIJ(Mat A,Vec v)1310 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1311 {
1312   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1317   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
MatScale_MPIBAIJ(Mat A,PetscScalar aa)1321 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1322 {
1323   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1328   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1332 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1335   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1336   PetscErrorCode ierr;
1337   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1338   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1339   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1340 
1341   PetscFunctionBegin;
1342   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1343   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1344   mat->getrowactive = PETSC_TRUE;
1345 
1346   if (!mat->rowvalues && (idx || v)) {
1347     /*
1348         allocate enough space to hold information from the longest row.
1349     */
1350     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1351     PetscInt    max = 1,mbs = mat->mbs,tmp;
1352     for (i=0; i<mbs; i++) {
1353       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1354       if (max < tmp) max = tmp;
1355     }
1356     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1357   }
1358   lrow = row - brstart;
1359 
1360   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1361   if (!v)   {pvA = NULL; pvB = NULL;}
1362   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1363   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1364   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1365   nztot = nzA + nzB;
1366 
1367   cmap = mat->garray;
1368   if (v  || idx) {
1369     if (nztot) {
1370       /* Sort by increasing column numbers, assuming A and B already sorted */
1371       PetscInt imark = -1;
1372       if (v) {
1373         *v = v_p = mat->rowvalues;
1374         for (i=0; i<nzB; i++) {
1375           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1376           else break;
1377         }
1378         imark = i;
1379         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1380         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1381       }
1382       if (idx) {
1383         *idx = idx_p = mat->rowindices;
1384         if (imark > -1) {
1385           for (i=0; i<imark; i++) {
1386             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1387           }
1388         } else {
1389           for (i=0; i<nzB; i++) {
1390             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1391             else break;
1392           }
1393           imark = i;
1394         }
1395         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1396         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1397       }
1398     } else {
1399       if (idx) *idx = NULL;
1400       if (v)   *v   = NULL;
1401     }
1402   }
1403   *nz  = nztot;
1404   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1405   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1409 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1410 {
1411   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1412 
1413   PetscFunctionBegin;
1414   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1415   baij->getrowactive = PETSC_FALSE;
1416   PetscFunctionReturn(0);
1417 }
1418 
MatZeroEntries_MPIBAIJ(Mat A)1419 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1420 {
1421   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1426   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1430 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1431 {
1432   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1433   Mat            A  = a->A,B = a->B;
1434   PetscErrorCode ierr;
1435   PetscLogDouble isend[5],irecv[5];
1436 
1437   PetscFunctionBegin;
1438   info->block_size = (PetscReal)matin->rmap->bs;
1439 
1440   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1441 
1442   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1443   isend[3] = info->memory;  isend[4] = info->mallocs;
1444 
1445   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1446 
1447   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1448   isend[3] += info->memory;  isend[4] += info->mallocs;
1449 
1450   if (flag == MAT_LOCAL) {
1451     info->nz_used      = isend[0];
1452     info->nz_allocated = isend[1];
1453     info->nz_unneeded  = isend[2];
1454     info->memory       = isend[3];
1455     info->mallocs      = isend[4];
1456   } else if (flag == MAT_GLOBAL_MAX) {
1457     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1458 
1459     info->nz_used      = irecv[0];
1460     info->nz_allocated = irecv[1];
1461     info->nz_unneeded  = irecv[2];
1462     info->memory       = irecv[3];
1463     info->mallocs      = irecv[4];
1464   } else if (flag == MAT_GLOBAL_SUM) {
1465     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1466 
1467     info->nz_used      = irecv[0];
1468     info->nz_allocated = irecv[1];
1469     info->nz_unneeded  = irecv[2];
1470     info->memory       = irecv[3];
1471     info->mallocs      = irecv[4];
1472   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1473   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1474   info->fill_ratio_needed = 0;
1475   info->factor_mallocs    = 0;
1476   PetscFunctionReturn(0);
1477 }
1478 
MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)1479 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1480 {
1481   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1482   PetscErrorCode ierr;
1483 
1484   PetscFunctionBegin;
1485   switch (op) {
1486   case MAT_NEW_NONZERO_LOCATIONS:
1487   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1488   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1489   case MAT_KEEP_NONZERO_PATTERN:
1490   case MAT_NEW_NONZERO_LOCATION_ERR:
1491     MatCheckPreallocated(A,1);
1492     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1493     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1494     break;
1495   case MAT_ROW_ORIENTED:
1496     MatCheckPreallocated(A,1);
1497     a->roworiented = flg;
1498 
1499     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1501     break;
1502   case MAT_NEW_DIAGONALS:
1503   case MAT_SORTED_FULL:
1504     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505     break;
1506   case MAT_IGNORE_OFF_PROC_ENTRIES:
1507     a->donotstash = flg;
1508     break;
1509   case MAT_USE_HASH_TABLE:
1510     a->ht_flag = flg;
1511     a->ht_fact = 1.39;
1512     break;
1513   case MAT_SYMMETRIC:
1514   case MAT_STRUCTURALLY_SYMMETRIC:
1515   case MAT_HERMITIAN:
1516   case MAT_SUBMAT_SINGLEIS:
1517   case MAT_SYMMETRY_ETERNAL:
1518     MatCheckPreallocated(A,1);
1519     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1520     break;
1521   default:
1522     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat * matout)1527 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1528 {
1529   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1530   Mat_SeqBAIJ    *Aloc;
1531   Mat            B;
1532   PetscErrorCode ierr;
1533   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1534   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1535   MatScalar      *a;
1536 
1537   PetscFunctionBegin;
1538   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1539     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1540     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1541     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1542     /* Do not know preallocation information, but must set block size */
1543     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1544   } else {
1545     B = *matout;
1546   }
1547 
1548   /* copy over the A part */
1549   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1550   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1551   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1552 
1553   for (i=0; i<mbs; i++) {
1554     rvals[0] = bs*(baij->rstartbs + i);
1555     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1556     for (j=ai[i]; j<ai[i+1]; j++) {
1557       col = (baij->cstartbs+aj[j])*bs;
1558       for (k=0; k<bs; k++) {
1559         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1560 
1561         col++; a += bs;
1562       }
1563     }
1564   }
1565   /* copy over the B part */
1566   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1567   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1568   for (i=0; i<mbs; i++) {
1569     rvals[0] = bs*(baij->rstartbs + i);
1570     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1571     for (j=ai[i]; j<ai[i+1]; j++) {
1572       col = baij->garray[aj[j]]*bs;
1573       for (k=0; k<bs; k++) {
1574         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1575         col++;
1576         a += bs;
1577       }
1578     }
1579   }
1580   ierr = PetscFree(rvals);CHKERRQ(ierr);
1581   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583 
1584   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1585   else {
1586     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)1591 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1592 {
1593   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1594   Mat            a     = baij->A,b = baij->B;
1595   PetscErrorCode ierr;
1596   PetscInt       s1,s2,s3;
1597 
1598   PetscFunctionBegin;
1599   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1600   if (rr) {
1601     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1602     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1603     /* Overlap communication with computation. */
1604     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605   }
1606   if (ll) {
1607     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1608     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1609     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1610   }
1611   /* scale  the diagonal block */
1612   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1613 
1614   if (rr) {
1615     /* Do a scatter end and then right scale the off-diagonal block */
1616     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1618   }
1619   PetscFunctionReturn(0);
1620 }
1621 
MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1622 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1623 {
1624   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1625   PetscInt      *lrows;
1626   PetscInt       r, len;
1627   PetscBool      cong;
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   /* get locally owned rows */
1632   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1633   /* fix right hand side if needed */
1634   if (x && b) {
1635     const PetscScalar *xx;
1636     PetscScalar       *bb;
1637 
1638     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1639     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1640     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1641     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1642     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1643   }
1644 
1645   /* actually zap the local rows */
1646   /*
1647         Zero the required rows. If the "diagonal block" of the matrix
1648      is square and the user wishes to set the diagonal we use separate
1649      code so that MatSetValues() is not called for each diagonal allocating
1650      new memory, thus calling lots of mallocs and slowing things down.
1651 
1652   */
1653   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1654   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1655   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1656   if ((diag != 0.0) && cong) {
1657     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1658   } else if (diag != 0.0) {
1659     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1660     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1661        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1662     for (r = 0; r < len; ++r) {
1663       const PetscInt row = lrows[r] + A->rmap->rstart;
1664       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   } else {
1669     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1670   }
1671   ierr = PetscFree(lrows);CHKERRQ(ierr);
1672 
1673   /* only change matrix nonzero state if pattern was allowed to be changed */
1674   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1675     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1676     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1681 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1682 {
1683   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1684   PetscErrorCode    ierr;
1685   PetscMPIInt       n = A->rmap->n,p = 0;
1686   PetscInt          i,j,k,r,len = 0,row,col,count;
1687   PetscInt          *lrows,*owners = A->rmap->range;
1688   PetscSFNode       *rrows;
1689   PetscSF           sf;
1690   const PetscScalar *xx;
1691   PetscScalar       *bb,*mask;
1692   Vec               xmask,lmask;
1693   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1694   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1695   PetscScalar       *aa;
1696 
1697   PetscFunctionBegin;
1698   /* Create SF where leaves are input rows and roots are owned rows */
1699   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1700   for (r = 0; r < n; ++r) lrows[r] = -1;
1701   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1702   for (r = 0; r < N; ++r) {
1703     const PetscInt idx   = rows[r];
1704     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1705     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1706       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1707     }
1708     rrows[r].rank  = p;
1709     rrows[r].index = rows[r] - owners[p];
1710   }
1711   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1712   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1713   /* Collect flags for rows to be zeroed */
1714   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1716   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1717   /* Compress and put in row numbers */
1718   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1719   /* zero diagonal part of matrix */
1720   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1721   /* handle off diagonal part of matrix */
1722   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1723   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1724   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1725   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1726   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1727   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1730   if (x) {
1731     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1733     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1734     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1735   }
1736   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1737   /* remove zeroed rows of off diagonal matrix */
1738   for (i = 0; i < len; ++i) {
1739     row   = lrows[i];
1740     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1741     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1742     for (k = 0; k < count; ++k) {
1743       aa[0] = 0.0;
1744       aa   += bs;
1745     }
1746   }
1747   /* loop over all elements of off process part of matrix zeroing removed columns*/
1748   for (i = 0; i < l->B->rmap->N; ++i) {
1749     row = i/bs;
1750     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1751       for (k = 0; k < bs; ++k) {
1752         col = bs*baij->j[j] + k;
1753         if (PetscAbsScalar(mask[col])) {
1754           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1755           if (x) bb[i] -= aa[0]*xx[col];
1756           aa[0] = 0.0;
1757         }
1758       }
1759     }
1760   }
1761   if (x) {
1762     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1763     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1764   }
1765   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1766   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1767   ierr = PetscFree(lrows);CHKERRQ(ierr);
1768 
1769   /* only change matrix nonzero state if pattern was allowed to be changed */
1770   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1771     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1772     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
MatSetUnfactored_MPIBAIJ(Mat A)1777 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1778 {
1779   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1780   PetscErrorCode ierr;
1781 
1782   PetscFunctionBegin;
1783   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1788 
MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool * flag)1789 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1790 {
1791   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1792   Mat            a,b,c,d;
1793   PetscBool      flg;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   a = matA->A; b = matA->B;
1798   c = matB->A; d = matB->B;
1799 
1800   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1801   if (flg) {
1802     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1803   }
1804   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)1808 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1809 {
1810   PetscErrorCode ierr;
1811   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1812   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1813 
1814   PetscFunctionBegin;
1815   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1816   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1817     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1818   } else {
1819     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1820     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1821   }
1822   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
MatSetUp_MPIBAIJ(Mat A)1826 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1827 {
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)1835 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1836 {
1837   PetscErrorCode ierr;
1838   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1839   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1840   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1841 
1842   PetscFunctionBegin;
1843   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1847 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1848 {
1849   PetscErrorCode ierr;
1850   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1851   PetscBLASInt   bnz,one=1;
1852   Mat_SeqBAIJ    *x,*y;
1853   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1854 
1855   PetscFunctionBegin;
1856   if (str == SAME_NONZERO_PATTERN) {
1857     PetscScalar alpha = a;
1858     x    = (Mat_SeqBAIJ*)xx->A->data;
1859     y    = (Mat_SeqBAIJ*)yy->A->data;
1860     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1861     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1862     x    = (Mat_SeqBAIJ*)xx->B->data;
1863     y    = (Mat_SeqBAIJ*)yy->B->data;
1864     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1865     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1866     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1867   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1868     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1869   } else {
1870     Mat      B;
1871     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1872     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1873     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1874     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1875     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1876     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1877     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1878     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1880     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1881     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1882     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1883     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1884     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1886     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1887   }
1888   PetscFunctionReturn(0);
1889 }
1890 
MatRealPart_MPIBAIJ(Mat A)1891 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1892 {
1893   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1894   PetscErrorCode ierr;
1895 
1896   PetscFunctionBegin;
1897   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1898   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
MatImaginaryPart_MPIBAIJ(Mat A)1902 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1903 {
1904   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1909   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1913 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1914 {
1915   PetscErrorCode ierr;
1916   IS             iscol_local;
1917   PetscInt       csize;
1918 
1919   PetscFunctionBegin;
1920   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1921   if (call == MAT_REUSE_MATRIX) {
1922     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1923     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1924   } else {
1925     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1926   }
1927   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1928   if (call == MAT_INITIAL_MATRIX) {
1929     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1930     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 /*
1936   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1937   in local and then by concatenating the local matrices the end result.
1938   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1939   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1940 */
MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat)1941 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1942 {
1943   PetscErrorCode ierr;
1944   PetscMPIInt    rank,size;
1945   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1946   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1947   Mat            M,Mreuse;
1948   MatScalar      *vwork,*aa;
1949   MPI_Comm       comm;
1950   IS             isrow_new, iscol_new;
1951   Mat_SeqBAIJ    *aij;
1952 
1953   PetscFunctionBegin;
1954   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1955   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1956   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1957   /* The compression and expansion should be avoided. Doesn't point
1958      out errors, might change the indices, hence buggey */
1959   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1960   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1961 
1962   if (call ==  MAT_REUSE_MATRIX) {
1963     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1964     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1965     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1966   } else {
1967     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1968   }
1969   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1970   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1971   /*
1972       m - number of local rows
1973       n - number of columns (same on all processors)
1974       rstart - first row in new global matrix generated
1975   */
1976   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1977   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1978   m    = m/bs;
1979   n    = n/bs;
1980 
1981   if (call == MAT_INITIAL_MATRIX) {
1982     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1983     ii  = aij->i;
1984     jj  = aij->j;
1985 
1986     /*
1987         Determine the number of non-zeros in the diagonal and off-diagonal
1988         portions of the matrix in order to do correct preallocation
1989     */
1990 
1991     /* first get start and end of "diagonal" columns */
1992     if (csize == PETSC_DECIDE) {
1993       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1994       if (mglobal == n*bs) { /* square matrix */
1995         nlocal = m;
1996       } else {
1997         nlocal = n/size + ((n % size) > rank);
1998       }
1999     } else {
2000       nlocal = csize/bs;
2001     }
2002     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2003     rstart = rend - nlocal;
2004     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2005 
2006     /* next, compute all the lengths */
2007     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2008     for (i=0; i<m; i++) {
2009       jend = ii[i+1] - ii[i];
2010       olen = 0;
2011       dlen = 0;
2012       for (j=0; j<jend; j++) {
2013         if (*jj < rstart || *jj >= rend) olen++;
2014         else dlen++;
2015         jj++;
2016       }
2017       olens[i] = olen;
2018       dlens[i] = dlen;
2019     }
2020     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2021     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2022     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2023     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2024     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2025     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2026   } else {
2027     PetscInt ml,nl;
2028 
2029     M    = *newmat;
2030     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2031     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2032     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2033     /*
2034          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2035        rather than the slower MatSetValues().
2036     */
2037     M->was_assembled = PETSC_TRUE;
2038     M->assembled     = PETSC_FALSE;
2039   }
2040   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2041   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2042   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2043   ii   = aij->i;
2044   jj   = aij->j;
2045   aa   = aij->a;
2046   for (i=0; i<m; i++) {
2047     row   = rstart/bs + i;
2048     nz    = ii[i+1] - ii[i];
2049     cwork = jj;     jj += nz;
2050     vwork = aa;     aa += nz*bs*bs;
2051     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2052   }
2053 
2054   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2055   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2056   *newmat = M;
2057 
2058   /* save submatrix used in processor for next request */
2059   if (call ==  MAT_INITIAL_MATRIX) {
2060     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2061     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 
MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat * B)2066 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2067 {
2068   MPI_Comm       comm,pcomm;
2069   PetscInt       clocal_size,nrows;
2070   const PetscInt *rows;
2071   PetscMPIInt    size;
2072   IS             crowp,lcolp;
2073   PetscErrorCode ierr;
2074 
2075   PetscFunctionBegin;
2076   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2077   /* make a collective version of 'rowp' */
2078   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2079   if (pcomm==comm) {
2080     crowp = rowp;
2081   } else {
2082     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2083     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2084     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2085     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2086   }
2087   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2088   /* make a local version of 'colp' */
2089   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2090   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2091   if (size==1) {
2092     lcolp = colp;
2093   } else {
2094     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2095   }
2096   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2097   /* now we just get the submatrix */
2098   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2099   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2100   /* clean up */
2101   if (pcomm!=comm) {
2102     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2103   }
2104   if (size>1) {
2105     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
MatGetGhosts_MPIBAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])2110 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2111 {
2112   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2113   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2114 
2115   PetscFunctionBegin;
2116   if (nghosts) *nghosts = B->nbs;
2117   if (ghosts) *ghosts = baij->garray;
2118   PetscFunctionReturn(0);
2119 }
2120 
MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat * newmat)2121 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2122 {
2123   Mat            B;
2124   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2125   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2126   Mat_SeqAIJ     *b;
2127   PetscErrorCode ierr;
2128   PetscMPIInt    size,rank,*recvcounts = NULL,*displs = NULL;
2129   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2130   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2131 
2132   PetscFunctionBegin;
2133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2134   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2135 
2136   /* ----------------------------------------------------------------
2137      Tell every processor the number of nonzeros per row
2138   */
2139   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2140   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2141     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2142   }
2143   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2144   displs    = recvcounts + size;
2145   for (i=0; i<size; i++) {
2146     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2147     displs[i]     = A->rmap->range[i]/bs;
2148   }
2149 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2150   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2151 #else
2152   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2153   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2154 #endif
2155   /* ---------------------------------------------------------------
2156      Create the sequential matrix of the same type as the local block diagonal
2157   */
2158   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2159   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2160   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2161   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2162   b    = (Mat_SeqAIJ*)B->data;
2163 
2164   /*--------------------------------------------------------------------
2165     Copy my part of matrix column indices over
2166   */
2167   sendcount  = ad->nz + bd->nz;
2168   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2169   a_jsendbuf = ad->j;
2170   b_jsendbuf = bd->j;
2171   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2172   cnt        = 0;
2173   for (i=0; i<n; i++) {
2174 
2175     /* put in lower diagonal portion */
2176     m = bd->i[i+1] - bd->i[i];
2177     while (m > 0) {
2178       /* is it above diagonal (in bd (compressed) numbering) */
2179       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2180       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2181       m--;
2182     }
2183 
2184     /* put in diagonal portion */
2185     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2186       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2187     }
2188 
2189     /* put in upper diagonal portion */
2190     while (m-- > 0) {
2191       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2192     }
2193   }
2194   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2195 
2196   /*--------------------------------------------------------------------
2197     Gather all column indices to all processors
2198   */
2199   for (i=0; i<size; i++) {
2200     recvcounts[i] = 0;
2201     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2202       recvcounts[i] += lens[j];
2203     }
2204   }
2205   displs[0] = 0;
2206   for (i=1; i<size; i++) {
2207     displs[i] = displs[i-1] + recvcounts[i-1];
2208   }
2209 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2210   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2211 #else
2212   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2213 #endif
2214   /*--------------------------------------------------------------------
2215     Assemble the matrix into useable form (note numerical values not yet set)
2216   */
2217   /* set the b->ilen (length of each row) values */
2218   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2219   /* set the b->i indices */
2220   b->i[0] = 0;
2221   for (i=1; i<=A->rmap->N/bs; i++) {
2222     b->i[i] = b->i[i-1] + lens[i-1];
2223   }
2224   ierr = PetscFree(lens);CHKERRQ(ierr);
2225   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2226   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2227   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2228 
2229   if (A->symmetric) {
2230     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2231   } else if (A->hermitian) {
2232     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2233   } else if (A->structurally_symmetric) {
2234     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2235   }
2236   *newmat = B;
2237   PetscFunctionReturn(0);
2238 }
2239 
MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2240 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2241 {
2242   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2243   PetscErrorCode ierr;
2244   Vec            bb1 = NULL;
2245 
2246   PetscFunctionBegin;
2247   if (flag == SOR_APPLY_UPPER) {
2248     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2249     PetscFunctionReturn(0);
2250   }
2251 
2252   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2253     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2254   }
2255 
2256   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2257     if (flag & SOR_ZERO_INITIAL_GUESS) {
2258       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2259       its--;
2260     }
2261 
2262     while (its--) {
2263       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2264       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2265 
2266       /* update rhs: bb1 = bb - B*x */
2267       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2268       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2269 
2270       /* local sweep */
2271       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2272     }
2273   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2274     if (flag & SOR_ZERO_INITIAL_GUESS) {
2275       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2276       its--;
2277     }
2278     while (its--) {
2279       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2280       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2281 
2282       /* update rhs: bb1 = bb - B*x */
2283       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2284       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2285 
2286       /* local sweep */
2287       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2288     }
2289   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2290     if (flag & SOR_ZERO_INITIAL_GUESS) {
2291       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2292       its--;
2293     }
2294     while (its--) {
2295       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2296       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2297 
2298       /* update rhs: bb1 = bb - B*x */
2299       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2300       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2301 
2302       /* local sweep */
2303       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2304     }
2305   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2306 
2307   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2308   PetscFunctionReturn(0);
2309 }
2310 
MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal * norms)2311 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2312 {
2313   PetscErrorCode ierr;
2314   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2315   PetscInt       N,i,*garray = aij->garray;
2316   PetscInt       ib,jb,bs = A->rmap->bs;
2317   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2318   MatScalar      *a_val = a_aij->a;
2319   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2320   MatScalar      *b_val = b_aij->a;
2321   PetscReal      *work;
2322 
2323   PetscFunctionBegin;
2324   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2325   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2326   if (type == NORM_2) {
2327     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2328       for (jb=0; jb<bs; jb++) {
2329         for (ib=0; ib<bs; ib++) {
2330           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2331           a_val++;
2332         }
2333       }
2334     }
2335     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2336       for (jb=0; jb<bs; jb++) {
2337         for (ib=0; ib<bs; ib++) {
2338           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2339           b_val++;
2340         }
2341       }
2342     }
2343   } else if (type == NORM_1) {
2344     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2345       for (jb=0; jb<bs; jb++) {
2346         for (ib=0; ib<bs; ib++) {
2347           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2348           a_val++;
2349         }
2350       }
2351     }
2352     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2353       for (jb=0; jb<bs; jb++) {
2354        for (ib=0; ib<bs; ib++) {
2355           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2356           b_val++;
2357         }
2358       }
2359     }
2360   } else if (type == NORM_INFINITY) {
2361     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2362       for (jb=0; jb<bs; jb++) {
2363         for (ib=0; ib<bs; ib++) {
2364           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2365           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2366           a_val++;
2367         }
2368       }
2369     }
2370     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2371       for (jb=0; jb<bs; jb++) {
2372         for (ib=0; ib<bs; ib++) {
2373           int col = garray[b_aij->j[i]] * bs + jb;
2374           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2375           b_val++;
2376         }
2377       }
2378     }
2379   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2380   if (type == NORM_INFINITY) {
2381     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2382   } else {
2383     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2384   }
2385   ierr = PetscFree(work);CHKERRQ(ierr);
2386   if (type == NORM_2) {
2387     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2388   }
2389   PetscFunctionReturn(0);
2390 }
2391 
MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar ** values)2392 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2393 {
2394   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2395   PetscErrorCode ierr;
2396 
2397   PetscFunctionBegin;
2398   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2399   A->factorerrortype             = a->A->factorerrortype;
2400   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2401   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2402   PetscFunctionReturn(0);
2403 }
2404 
MatShift_MPIBAIJ(Mat Y,PetscScalar a)2405 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2406 {
2407   PetscErrorCode ierr;
2408   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2409   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2410 
2411   PetscFunctionBegin;
2412   if (!Y->preallocated) {
2413     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2414   } else if (!aij->nz) {
2415     PetscInt nonew = aij->nonew;
2416     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2417     aij->nonew = nonew;
2418   }
2419   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool * missing,PetscInt * d)2423 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2424 {
2425   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2426   PetscErrorCode ierr;
2427 
2428   PetscFunctionBegin;
2429   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2430   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2431   if (d) {
2432     PetscInt rstart;
2433     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2434     *d += rstart/A->rmap->bs;
2435 
2436   }
2437   PetscFunctionReturn(0);
2438 }
2439 
MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat * a)2440 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2441 {
2442   PetscFunctionBegin;
2443   *a = ((Mat_MPIBAIJ*)A->data)->A;
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 /* -------------------------------------------------------------------*/
2448 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2449                                        MatGetRow_MPIBAIJ,
2450                                        MatRestoreRow_MPIBAIJ,
2451                                        MatMult_MPIBAIJ,
2452                                 /* 4*/ MatMultAdd_MPIBAIJ,
2453                                        MatMultTranspose_MPIBAIJ,
2454                                        MatMultTransposeAdd_MPIBAIJ,
2455                                        NULL,
2456                                        NULL,
2457                                        NULL,
2458                                 /*10*/ NULL,
2459                                        NULL,
2460                                        NULL,
2461                                        MatSOR_MPIBAIJ,
2462                                        MatTranspose_MPIBAIJ,
2463                                 /*15*/ MatGetInfo_MPIBAIJ,
2464                                        MatEqual_MPIBAIJ,
2465                                        MatGetDiagonal_MPIBAIJ,
2466                                        MatDiagonalScale_MPIBAIJ,
2467                                        MatNorm_MPIBAIJ,
2468                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2469                                        MatAssemblyEnd_MPIBAIJ,
2470                                        MatSetOption_MPIBAIJ,
2471                                        MatZeroEntries_MPIBAIJ,
2472                                 /*24*/ MatZeroRows_MPIBAIJ,
2473                                        NULL,
2474                                        NULL,
2475                                        NULL,
2476                                        NULL,
2477                                 /*29*/ MatSetUp_MPIBAIJ,
2478                                        NULL,
2479                                        NULL,
2480                                        MatGetDiagonalBlock_MPIBAIJ,
2481                                        NULL,
2482                                 /*34*/ MatDuplicate_MPIBAIJ,
2483                                        NULL,
2484                                        NULL,
2485                                        NULL,
2486                                        NULL,
2487                                 /*39*/ MatAXPY_MPIBAIJ,
2488                                        MatCreateSubMatrices_MPIBAIJ,
2489                                        MatIncreaseOverlap_MPIBAIJ,
2490                                        MatGetValues_MPIBAIJ,
2491                                        MatCopy_MPIBAIJ,
2492                                 /*44*/ NULL,
2493                                        MatScale_MPIBAIJ,
2494                                        MatShift_MPIBAIJ,
2495                                        NULL,
2496                                        MatZeroRowsColumns_MPIBAIJ,
2497                                 /*49*/ NULL,
2498                                        NULL,
2499                                        NULL,
2500                                        NULL,
2501                                        NULL,
2502                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2503                                        NULL,
2504                                        MatSetUnfactored_MPIBAIJ,
2505                                        MatPermute_MPIBAIJ,
2506                                        MatSetValuesBlocked_MPIBAIJ,
2507                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2508                                        MatDestroy_MPIBAIJ,
2509                                        MatView_MPIBAIJ,
2510                                        NULL,
2511                                        NULL,
2512                                 /*64*/ NULL,
2513                                        NULL,
2514                                        NULL,
2515                                        NULL,
2516                                        NULL,
2517                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2518                                        NULL,
2519                                        NULL,
2520                                        NULL,
2521                                        NULL,
2522                                 /*74*/ NULL,
2523                                        MatFDColoringApply_BAIJ,
2524                                        NULL,
2525                                        NULL,
2526                                        NULL,
2527                                 /*79*/ NULL,
2528                                        NULL,
2529                                        NULL,
2530                                        NULL,
2531                                        MatLoad_MPIBAIJ,
2532                                 /*84*/ NULL,
2533                                        NULL,
2534                                        NULL,
2535                                        NULL,
2536                                        NULL,
2537                                 /*89*/ NULL,
2538                                        NULL,
2539                                        NULL,
2540                                        NULL,
2541                                        NULL,
2542                                 /*94*/ NULL,
2543                                        NULL,
2544                                        NULL,
2545                                        NULL,
2546                                        NULL,
2547                                 /*99*/ NULL,
2548                                        NULL,
2549                                        NULL,
2550                                        NULL,
2551                                        NULL,
2552                                 /*104*/NULL,
2553                                        MatRealPart_MPIBAIJ,
2554                                        MatImaginaryPart_MPIBAIJ,
2555                                        NULL,
2556                                        NULL,
2557                                 /*109*/NULL,
2558                                        NULL,
2559                                        NULL,
2560                                        NULL,
2561                                        MatMissingDiagonal_MPIBAIJ,
2562                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2563                                        NULL,
2564                                        MatGetGhosts_MPIBAIJ,
2565                                        NULL,
2566                                        NULL,
2567                                 /*119*/NULL,
2568                                        NULL,
2569                                        NULL,
2570                                        NULL,
2571                                        MatGetMultiProcBlock_MPIBAIJ,
2572                                 /*124*/NULL,
2573                                        MatGetColumnNorms_MPIBAIJ,
2574                                        MatInvertBlockDiagonal_MPIBAIJ,
2575                                        NULL,
2576                                        NULL,
2577                                /*129*/ NULL,
2578                                        NULL,
2579                                        NULL,
2580                                        NULL,
2581                                        NULL,
2582                                /*134*/ NULL,
2583                                        NULL,
2584                                        NULL,
2585                                        NULL,
2586                                        NULL,
2587                                /*139*/ MatSetBlockSizes_Default,
2588                                        NULL,
2589                                        NULL,
2590                                        MatFDColoringSetUp_MPIXAIJ,
2591                                        NULL,
2592                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2593 };
2594 
2595 
2596 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2597 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2598 
MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2599 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2600 {
2601   PetscInt       m,rstart,cstart,cend;
2602   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2603   const PetscInt *JJ    =NULL;
2604   PetscScalar    *values=NULL;
2605   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2606   PetscErrorCode ierr;
2607   PetscBool      nooffprocentries;
2608 
2609   PetscFunctionBegin;
2610   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2611   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2612   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2613   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2614   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2615   m      = B->rmap->n/bs;
2616   rstart = B->rmap->rstart/bs;
2617   cstart = B->cmap->rstart/bs;
2618   cend   = B->cmap->rend/bs;
2619 
2620   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2621   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2622   for (i=0; i<m; i++) {
2623     nz = ii[i+1] - ii[i];
2624     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2625     nz_max = PetscMax(nz_max,nz);
2626     dlen   = 0;
2627     olen   = 0;
2628     JJ     = jj + ii[i];
2629     for (j=0; j<nz; j++) {
2630       if (*JJ < cstart || *JJ >= cend) olen++;
2631       else dlen++;
2632       JJ++;
2633     }
2634     d_nnz[i] = dlen;
2635     o_nnz[i] = olen;
2636   }
2637   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2638   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2639 
2640   values = (PetscScalar*)V;
2641   if (!values) {
2642     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2643   }
2644   for (i=0; i<m; i++) {
2645     PetscInt          row    = i + rstart;
2646     PetscInt          ncols  = ii[i+1] - ii[i];
2647     const PetscInt    *icols = jj + ii[i];
2648     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2649       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2650       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2651     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2652       PetscInt j;
2653       for (j=0; j<ncols; j++) {
2654         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2655         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2656       }
2657     }
2658   }
2659 
2660   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2661   nooffprocentries    = B->nooffprocentries;
2662   B->nooffprocentries = PETSC_TRUE;
2663   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   B->nooffprocentries = nooffprocentries;
2666 
2667   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 /*@C
2672    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2673 
2674    Collective
2675 
2676    Input Parameters:
2677 +  B - the matrix
2678 .  bs - the block size
2679 .  i - the indices into j for the start of each local row (starts with zero)
2680 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2681 -  v - optional values in the matrix
2682 
2683    Level: advanced
2684 
2685    Notes:
2686     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2687    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2688    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2689    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2690    block column and the second index is over columns within a block.
2691 
2692    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2693 
2694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2695 @*/
MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2696 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2697 {
2698   PetscErrorCode ierr;
2699 
2700   PetscFunctionBegin;
2701   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2702   PetscValidType(B,1);
2703   PetscValidLogicalCollectiveInt(B,bs,2);
2704   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2705   PetscFunctionReturn(0);
2706 }
2707 
MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)2708 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2709 {
2710   Mat_MPIBAIJ    *b;
2711   PetscErrorCode ierr;
2712   PetscInt       i;
2713   PetscMPIInt    size;
2714 
2715   PetscFunctionBegin;
2716   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2717   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2718   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2719   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2720 
2721   if (d_nnz) {
2722     for (i=0; i<B->rmap->n/bs; i++) {
2723       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2724     }
2725   }
2726   if (o_nnz) {
2727     for (i=0; i<B->rmap->n/bs; i++) {
2728       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2729     }
2730   }
2731 
2732   b      = (Mat_MPIBAIJ*)B->data;
2733   b->bs2 = bs*bs;
2734   b->mbs = B->rmap->n/bs;
2735   b->nbs = B->cmap->n/bs;
2736   b->Mbs = B->rmap->N/bs;
2737   b->Nbs = B->cmap->N/bs;
2738 
2739   for (i=0; i<=b->size; i++) {
2740     b->rangebs[i] = B->rmap->range[i]/bs;
2741   }
2742   b->rstartbs = B->rmap->rstart/bs;
2743   b->rendbs   = B->rmap->rend/bs;
2744   b->cstartbs = B->cmap->rstart/bs;
2745   b->cendbs   = B->cmap->rend/bs;
2746 
2747 #if defined(PETSC_USE_CTABLE)
2748   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2749 #else
2750   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2751 #endif
2752   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2753   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2754   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2755 
2756   /* Because the B will have been resized we simply destroy it and create a new one each time */
2757   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2758   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2759   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2760   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2761   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2762   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763 
2764   if (!B->preallocated) {
2765     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2766     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2767     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2768     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2769     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2770   }
2771 
2772   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2773   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2774   B->preallocated  = PETSC_TRUE;
2775   B->was_assembled = PETSC_FALSE;
2776   B->assembled     = PETSC_FALSE;
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2781 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2782 
MatConvert_MPIBAIJ_MPIAdj(Mat B,MatType newtype,MatReuse reuse,Mat * adj)2783 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2784 {
2785   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2786   PetscErrorCode ierr;
2787   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2788   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2789   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2790 
2791   PetscFunctionBegin;
2792   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2793   ii[0] = 0;
2794   for (i=0; i<M; i++) {
2795     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2796     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2797     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2798     /* remove one from count of matrix has diagonal */
2799     for (j=id[i]; j<id[i+1]; j++) {
2800       if (jd[j] == i) {ii[i+1]--;break;}
2801     }
2802   }
2803   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2804   cnt  = 0;
2805   for (i=0; i<M; i++) {
2806     for (j=io[i]; j<io[i+1]; j++) {
2807       if (garray[jo[j]] > rstart) break;
2808       jj[cnt++] = garray[jo[j]];
2809     }
2810     for (k=id[i]; k<id[i+1]; k++) {
2811       if (jd[k] != i) {
2812         jj[cnt++] = rstart + jd[k];
2813       }
2814     }
2815     for (; j<io[i+1]; j++) {
2816       jj[cnt++] = garray[jo[j]];
2817     }
2818   }
2819   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2824 
2825 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2826 
MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)2827 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2828 {
2829   PetscErrorCode ierr;
2830   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2831   Mat            B;
2832   Mat_MPIAIJ     *b;
2833 
2834   PetscFunctionBegin;
2835   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2836 
2837   if (reuse == MAT_REUSE_MATRIX) {
2838     B = *newmat;
2839   } else {
2840     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2841     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2842     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2843     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2844     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2845     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2846   }
2847   b = (Mat_MPIAIJ*) B->data;
2848 
2849   if (reuse == MAT_REUSE_MATRIX) {
2850     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2851     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2852   } else {
2853     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2854     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2855     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2856     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2857     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2858     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860   }
2861   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863 
2864   if (reuse == MAT_INPLACE_MATRIX) {
2865     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2866   } else {
2867    *newmat = B;
2868   }
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 /*MC
2873    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2874 
2875    Options Database Keys:
2876 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2877 . -mat_block_size <bs> - set the blocksize used to store the matrix
2878 - -mat_use_hash_table <fact>
2879 
2880    Level: beginner
2881 
2882    Notes:
2883     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2884     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2885 
2886 .seealso: MatCreateBAIJ
2887 M*/
2888 
2889 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2890 
MatCreate_MPIBAIJ(Mat B)2891 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2892 {
2893   Mat_MPIBAIJ    *b;
2894   PetscErrorCode ierr;
2895   PetscBool      flg = PETSC_FALSE;
2896 
2897   PetscFunctionBegin;
2898   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2899   B->data = (void*)b;
2900 
2901   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2902   B->assembled = PETSC_FALSE;
2903 
2904   B->insertmode = NOT_SET_VALUES;
2905   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2906   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2907 
2908   /* build local table of row and column ownerships */
2909   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2910 
2911   /* build cache for off array entries formed */
2912   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2913 
2914   b->donotstash  = PETSC_FALSE;
2915   b->colmap      = NULL;
2916   b->garray      = NULL;
2917   b->roworiented = PETSC_TRUE;
2918 
2919   /* stuff used in block assembly */
2920   b->barray = NULL;
2921 
2922   /* stuff used for matrix vector multiply */
2923   b->lvec  = NULL;
2924   b->Mvctx = NULL;
2925 
2926   /* stuff for MatGetRow() */
2927   b->rowindices   = NULL;
2928   b->rowvalues    = NULL;
2929   b->getrowactive = PETSC_FALSE;
2930 
2931   /* hash table stuff */
2932   b->ht           = NULL;
2933   b->hd           = NULL;
2934   b->ht_size      = 0;
2935   b->ht_flag      = PETSC_FALSE;
2936   b->ht_fact      = 0;
2937   b->ht_total_ct  = 0;
2938   b->ht_insert_ct = 0;
2939 
2940   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2941   b->ijonly = PETSC_FALSE;
2942 
2943 
2944   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2945   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2947 #if defined(PETSC_HAVE_HYPRE)
2948   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2949 #endif
2950   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2951   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2952   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2954   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2955   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2956   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2957   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2958 
2959   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2960   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2961   if (flg) {
2962     PetscReal fact = 1.39;
2963     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2964     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2965     if (fact <= 1.0) fact = 1.39;
2966     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2967     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2968   }
2969   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 /*MC
2974    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2975 
2976    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2977    and MATMPIBAIJ otherwise.
2978 
2979    Options Database Keys:
2980 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2981 
2982   Level: beginner
2983 
2984 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2985 M*/
2986 
2987 /*@C
2988    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2989    (block compressed row).  For good matrix assembly performance
2990    the user should preallocate the matrix storage by setting the parameters
2991    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2992    performance can be increased by more than a factor of 50.
2993 
2994    Collective on Mat
2995 
2996    Input Parameters:
2997 +  B - the matrix
2998 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2999           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3000 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3001            submatrix  (same for all local rows)
3002 .  d_nnz - array containing the number of block nonzeros in the various block rows
3003            of the in diagonal portion of the local (possibly different for each block
3004            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3005            set it even if it is zero.
3006 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3007            submatrix (same for all local rows).
3008 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3009            off-diagonal portion of the local submatrix (possibly different for
3010            each block row) or NULL.
3011 
3012    If the *_nnz parameter is given then the *_nz parameter is ignored
3013 
3014    Options Database Keys:
3015 +   -mat_block_size - size of the blocks to use
3016 -   -mat_use_hash_table <fact>
3017 
3018    Notes:
3019    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3020    than it must be used on all processors that share the object for that argument.
3021 
3022    Storage Information:
3023    For a square global matrix we define each processor's diagonal portion
3024    to be its local rows and the corresponding columns (a square submatrix);
3025    each processor's off-diagonal portion encompasses the remainder of the
3026    local matrix (a rectangular submatrix).
3027 
3028    The user can specify preallocated storage for the diagonal part of
3029    the local submatrix with either d_nz or d_nnz (not both).  Set
3030    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3031    memory allocation.  Likewise, specify preallocated storage for the
3032    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3033 
3034    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3035    the figure below we depict these three local rows and all columns (0-11).
3036 
3037 .vb
3038            0 1 2 3 4 5 6 7 8 9 10 11
3039           --------------------------
3040    row 3  |o o o d d d o o o o  o  o
3041    row 4  |o o o d d d o o o o  o  o
3042    row 5  |o o o d d d o o o o  o  o
3043           --------------------------
3044 .ve
3045 
3046    Thus, any entries in the d locations are stored in the d (diagonal)
3047    submatrix, and any entries in the o locations are stored in the
3048    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3049    stored simply in the MATSEQBAIJ format for compressed row storage.
3050 
3051    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3052    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3053    In general, for PDE problems in which most nonzeros are near the diagonal,
3054    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3055    or you will get TERRIBLE performance; see the users' manual chapter on
3056    matrices.
3057 
3058    You can call MatGetInfo() to get information on how effective the preallocation was;
3059    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3060    You can also run with the option -info and look for messages with the string
3061    malloc in them to see if additional memory allocation was needed.
3062 
3063    Level: intermediate
3064 
3065 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3066 @*/
MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])3067 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3068 {
3069   PetscErrorCode ierr;
3070 
3071   PetscFunctionBegin;
3072   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3073   PetscValidType(B,1);
3074   PetscValidLogicalCollectiveInt(B,bs,2);
3075   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 /*@C
3080    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3081    (block compressed row).  For good matrix assembly performance
3082    the user should preallocate the matrix storage by setting the parameters
3083    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3084    performance can be increased by more than a factor of 50.
3085 
3086    Collective
3087 
3088    Input Parameters:
3089 +  comm - MPI communicator
3090 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3091           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3092 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3093            This value should be the same as the local size used in creating the
3094            y vector for the matrix-vector product y = Ax.
3095 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3096            This value should be the same as the local size used in creating the
3097            x vector for the matrix-vector product y = Ax.
3098 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3099 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3100 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3101            submatrix  (same for all local rows)
3102 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3103            of the in diagonal portion of the local (possibly different for each block
3104            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3105            and set it even if it is zero.
3106 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3107            submatrix (same for all local rows).
3108 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3109            off-diagonal portion of the local submatrix (possibly different for
3110            each block row) or NULL.
3111 
3112    Output Parameter:
3113 .  A - the matrix
3114 
3115    Options Database Keys:
3116 +   -mat_block_size - size of the blocks to use
3117 -   -mat_use_hash_table <fact>
3118 
3119    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3120    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3121    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3122 
3123    Notes:
3124    If the *_nnz parameter is given then the *_nz parameter is ignored
3125 
3126    A nonzero block is any block that as 1 or more nonzeros in it
3127 
3128    The user MUST specify either the local or global matrix dimensions
3129    (possibly both).
3130 
3131    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3132    than it must be used on all processors that share the object for that argument.
3133 
3134    Storage Information:
3135    For a square global matrix we define each processor's diagonal portion
3136    to be its local rows and the corresponding columns (a square submatrix);
3137    each processor's off-diagonal portion encompasses the remainder of the
3138    local matrix (a rectangular submatrix).
3139 
3140    The user can specify preallocated storage for the diagonal part of
3141    the local submatrix with either d_nz or d_nnz (not both).  Set
3142    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3143    memory allocation.  Likewise, specify preallocated storage for the
3144    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3145 
3146    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3147    the figure below we depict these three local rows and all columns (0-11).
3148 
3149 .vb
3150            0 1 2 3 4 5 6 7 8 9 10 11
3151           --------------------------
3152    row 3  |o o o d d d o o o o  o  o
3153    row 4  |o o o d d d o o o o  o  o
3154    row 5  |o o o d d d o o o o  o  o
3155           --------------------------
3156 .ve
3157 
3158    Thus, any entries in the d locations are stored in the d (diagonal)
3159    submatrix, and any entries in the o locations are stored in the
3160    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3161    stored simply in the MATSEQBAIJ format for compressed row storage.
3162 
3163    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3164    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3165    In general, for PDE problems in which most nonzeros are near the diagonal,
3166    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3167    or you will get TERRIBLE performance; see the users' manual chapter on
3168    matrices.
3169 
3170    Level: intermediate
3171 
3172 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3173 @*/
MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)3174 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3175 {
3176   PetscErrorCode ierr;
3177   PetscMPIInt    size;
3178 
3179   PetscFunctionBegin;
3180   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3181   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3182   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3183   if (size > 1) {
3184     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3185     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3186   } else {
3187     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3188     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3189   }
3190   PetscFunctionReturn(0);
3191 }
3192 
MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)3193 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3194 {
3195   Mat            mat;
3196   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3197   PetscErrorCode ierr;
3198   PetscInt       len=0;
3199 
3200   PetscFunctionBegin;
3201   *newmat = NULL;
3202   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3203   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3204   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3205 
3206   mat->factortype   = matin->factortype;
3207   mat->preallocated = PETSC_TRUE;
3208   mat->assembled    = PETSC_TRUE;
3209   mat->insertmode   = NOT_SET_VALUES;
3210 
3211   a             = (Mat_MPIBAIJ*)mat->data;
3212   mat->rmap->bs = matin->rmap->bs;
3213   a->bs2        = oldmat->bs2;
3214   a->mbs        = oldmat->mbs;
3215   a->nbs        = oldmat->nbs;
3216   a->Mbs        = oldmat->Mbs;
3217   a->Nbs        = oldmat->Nbs;
3218 
3219   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3220   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3221 
3222   a->size         = oldmat->size;
3223   a->rank         = oldmat->rank;
3224   a->donotstash   = oldmat->donotstash;
3225   a->roworiented  = oldmat->roworiented;
3226   a->rowindices   = NULL;
3227   a->rowvalues    = NULL;
3228   a->getrowactive = PETSC_FALSE;
3229   a->barray       = NULL;
3230   a->rstartbs     = oldmat->rstartbs;
3231   a->rendbs       = oldmat->rendbs;
3232   a->cstartbs     = oldmat->cstartbs;
3233   a->cendbs       = oldmat->cendbs;
3234 
3235   /* hash table stuff */
3236   a->ht           = NULL;
3237   a->hd           = NULL;
3238   a->ht_size      = 0;
3239   a->ht_flag      = oldmat->ht_flag;
3240   a->ht_fact      = oldmat->ht_fact;
3241   a->ht_total_ct  = 0;
3242   a->ht_insert_ct = 0;
3243 
3244   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3245   if (oldmat->colmap) {
3246 #if defined(PETSC_USE_CTABLE)
3247     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3252 #endif
3253   } else a->colmap = NULL;
3254 
3255   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3256     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3257     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3258     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3259   } else a->garray = NULL;
3260 
3261   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3262   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3264   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3266 
3267   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3268   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3269   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3270   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3271   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3272   *newmat = mat;
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)3277 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3278 {
3279   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3280   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3281   PetscScalar    *matvals;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3286 
3287   /* read in matrix header */
3288   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3289   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3290   M  = header[1]; N = header[2]; nz = header[3];
3291   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3292   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3293   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3294 
3295   /* set block sizes from the viewer's .info file */
3296   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3297   /* set local sizes if not set already */
3298   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3299   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3300   /* set global sizes if not set already */
3301   if (mat->rmap->N < 0) mat->rmap->N = M;
3302   if (mat->cmap->N < 0) mat->cmap->N = N;
3303   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3304   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3305 
3306   /* check if the matrix sizes are correct */
3307   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3308   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3309   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3310   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3311   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3312   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3313   mbs = m/bs; nbs = n/bs;
3314 
3315   /* read in row lengths and build row indices */
3316   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3317   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3318   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3319   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3320   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3321 
3322   /* read in column indices and matrix values */
3323   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3324   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3325   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3326 
3327   { /* preallocate matrix storage */
3328     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3329     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3330     PetscBool  sbaij,done;
3331     PetscInt   *d_nnz,*o_nnz;
3332 
3333     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3334     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3335     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3336     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3337     for (i=0; i<mbs; i++) {
3338       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3339       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3340       for (k=0; k<bs; k++) {
3341         PetscInt row = bs*i + k;
3342         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3343           PetscInt col = colidxs[j];
3344           if (!sbaij || col >= row) {
3345             if (col >= cs && col < ce) {
3346               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3347             } else {
3348               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3349               if (done) o_nnz[i]++;
3350             }
3351           }
3352         }
3353       }
3354     }
3355     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3356     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3357     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3358     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3359     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3360   }
3361 
3362   /* store matrix values */
3363   for (i=0; i<m; i++) {
3364     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3365     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3366   }
3367 
3368   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3369   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3370   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3371   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 
MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)3375 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3376 {
3377   PetscErrorCode ierr;
3378   PetscBool      isbinary;
3379 
3380   PetscFunctionBegin;
3381   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3382   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3383   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 /*@
3388    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3389 
3390    Input Parameters:
3391 +  mat  - the matrix
3392 -  fact - factor
3393 
3394    Not Collective, each process can use a different factor
3395 
3396    Level: advanced
3397 
3398   Notes:
3399    This can also be set by the command line option: -mat_use_hash_table <fact>
3400 
3401 .seealso: MatSetOption()
3402 @*/
MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)3403 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3404 {
3405   PetscErrorCode ierr;
3406 
3407   PetscFunctionBegin;
3408   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3409   PetscFunctionReturn(0);
3410 }
3411 
MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)3412 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3413 {
3414   Mat_MPIBAIJ *baij;
3415 
3416   PetscFunctionBegin;
3417   baij          = (Mat_MPIBAIJ*)mat->data;
3418   baij->ht_fact = fact;
3419   PetscFunctionReturn(0);
3420 }
3421 
MatMPIBAIJGetSeqBAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])3422 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3423 {
3424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3425   PetscBool      flg;
3426   PetscErrorCode ierr;
3427 
3428   PetscFunctionBegin;
3429   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3430   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3431   if (Ad)     *Ad     = a->A;
3432   if (Ao)     *Ao     = a->B;
3433   if (colmap) *colmap = a->garray;
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 /*
3438     Special version for direct calls from Fortran (to eliminate two function call overheads
3439 */
3440 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3441 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3442 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3443 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3444 #endif
3445 
3446 /*@C
3447   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3448 
3449   Collective on Mat
3450 
3451   Input Parameters:
3452 + mat - the matrix
3453 . min - number of input rows
3454 . im - input rows
3455 . nin - number of input columns
3456 . in - input columns
3457 . v - numerical values input
3458 - addvin - INSERT_VALUES or ADD_VALUES
3459 
3460   Notes:
3461     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3462 
3463   Level: advanced
3464 
3465 .seealso:   MatSetValuesBlocked()
3466 @*/
matmpibaijsetvaluesblocked_(Mat * matin,PetscInt * min,const PetscInt im[],PetscInt * nin,const PetscInt in[],const MatScalar v[],InsertMode * addvin)3467 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3468 {
3469   /* convert input arguments to C version */
3470   Mat        mat  = *matin;
3471   PetscInt   m    = *min, n = *nin;
3472   InsertMode addv = *addvin;
3473 
3474   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3475   const MatScalar *value;
3476   MatScalar       *barray     = baij->barray;
3477   PetscBool       roworiented = baij->roworiented;
3478   PetscErrorCode  ierr;
3479   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3480   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3481   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3482 
3483   PetscFunctionBegin;
3484   /* tasks normally handled by MatSetValuesBlocked() */
3485   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3486   else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3487   if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3488   if (mat->assembled) {
3489     mat->was_assembled = PETSC_TRUE;
3490     mat->assembled     = PETSC_FALSE;
3491   }
3492   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3493 
3494 
3495   if (!barray) {
3496     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3497     baij->barray = barray;
3498   }
3499 
3500   if (roworiented) stepval = (n-1)*bs;
3501   else stepval = (m-1)*bs;
3502 
3503   for (i=0; i<m; i++) {
3504     if (im[i] < 0) continue;
3505     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3506     if (im[i] >= rstart && im[i] < rend) {
3507       row = im[i] - rstart;
3508       for (j=0; j<n; j++) {
3509         /* If NumCol = 1 then a copy is not required */
3510         if ((roworiented) && (n == 1)) {
3511           barray = (MatScalar*)v + i*bs2;
3512         } else if ((!roworiented) && (m == 1)) {
3513           barray = (MatScalar*)v + j*bs2;
3514         } else { /* Here a copy is required */
3515           if (roworiented) {
3516             value = v + i*(stepval+bs)*bs + j*bs;
3517           } else {
3518             value = v + j*(stepval+bs)*bs + i*bs;
3519           }
3520           for (ii=0; ii<bs; ii++,value+=stepval) {
3521             for (jj=0; jj<bs; jj++) {
3522               *barray++ = *value++;
3523             }
3524           }
3525           barray -=bs2;
3526         }
3527 
3528         if (in[j] >= cstart && in[j] < cend) {
3529           col  = in[j] - cstart;
3530           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3531         } else if (in[j] < 0) continue;
3532         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3533         else {
3534           if (mat->was_assembled) {
3535             if (!baij->colmap) {
3536               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3537             }
3538 
3539 #if defined(PETSC_USE_DEBUG)
3540 #if defined(PETSC_USE_CTABLE)
3541             { PetscInt data;
3542               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3543               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3544             }
3545 #else
3546             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3547 #endif
3548 #endif
3549 #if defined(PETSC_USE_CTABLE)
3550             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3551             col  = (col - 1)/bs;
3552 #else
3553             col = (baij->colmap[in[j]] - 1)/bs;
3554 #endif
3555             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3556               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3557               col  =  in[j];
3558             }
3559           } else col = in[j];
3560           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3561         }
3562       }
3563     } else {
3564       if (!baij->donotstash) {
3565         if (roworiented) {
3566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3567         } else {
3568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3569         }
3570       }
3571     }
3572   }
3573 
3574   /* task normally handled by MatSetValuesBlocked() */
3575   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3576   PetscFunctionReturn(0);
3577 }
3578 
3579 /*@
3580      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3581          CSR format the local rows.
3582 
3583    Collective
3584 
3585    Input Parameters:
3586 +  comm - MPI communicator
3587 .  bs - the block size, only a block size of 1 is supported
3588 .  m - number of local rows (Cannot be PETSC_DECIDE)
3589 .  n - This value should be the same as the local size used in creating the
3590        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3591        calculated if N is given) For square matrices n is almost always m.
3592 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3593 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3594 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3595 .   j - column indices
3596 -   a - matrix values
3597 
3598    Output Parameter:
3599 .   mat - the matrix
3600 
3601    Level: intermediate
3602 
3603    Notes:
3604        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3605      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3606      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3607 
3608      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3609      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3610      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3611      with column-major ordering within blocks.
3612 
3613        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3614 
3615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3616           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3617 @*/
MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)3618 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3619 {
3620   PetscErrorCode ierr;
3621 
3622   PetscFunctionBegin;
3623   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3624   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3625   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3626   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3627   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3628   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3629   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3630   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3631   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3632   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3633   PetscFunctionReturn(0);
3634 }
3635 
MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3636 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3637 {
3638   PetscErrorCode ierr;
3639   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3640   PetscInt       *indx;
3641   PetscScalar    *values;
3642 
3643   PetscFunctionBegin;
3644   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3645   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3646     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3647     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3648     PetscInt       *bindx,rmax=a->rmax,j;
3649     PetscMPIInt    rank,size;
3650 
3651     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3652     mbs = m/bs; Nbs = N/cbs;
3653     if (n == PETSC_DECIDE) {
3654       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3655     }
3656     nbs = n/cbs;
3657 
3658     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3659     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3660 
3661     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3662     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3663     if (rank == size-1) {
3664       /* Check sum(nbs) = Nbs */
3665       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3666     }
3667 
3668     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3669     for (i=0; i<mbs; i++) {
3670       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3671       nnz = nnz/bs;
3672       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3673       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3674       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3675     }
3676     ierr = PetscFree(bindx);CHKERRQ(ierr);
3677 
3678     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3679     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3680     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3681     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3682     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3683     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3684     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3685   }
3686 
3687   /* numeric phase */
3688   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3689   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3690 
3691   for (i=0; i<m; i++) {
3692     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3693     Ii   = i + rstart;
3694     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3695     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3696   }
3697   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   PetscFunctionReturn(0);
3700 }
3701