1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
MatPreallocateWithMats_Private(Mat B,PetscInt nm,Mat X[],PetscBool symm[],PetscBool fill)15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25   PetscValidLogicalCollectiveInt(B,nm,2);
26   for (i = 0; i < nm; i++) {
27     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30   }
31   PetscValidLogicalCollectiveBool(B,fill,5);
32   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41   for (r = rstart; r < rend; ++r) {
42     PetscInt          ncols;
43     const PetscInt    *row;
44     const PetscScalar *vals;
45 
46     for (i = 0; i < nm; i++) {
47       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49       if (symm && symm[i]) {
50         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51       }
52       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53     }
54   }
55   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
MatConvert_MPISBAIJ_Basic(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64   Mat            B;
65   PetscErrorCode ierr;
66   PetscInt       r;
67 
68   PetscFunctionBegin;
69   if (reuse != MAT_REUSE_MATRIX) {
70     PetscBool symm = PETSC_TRUE,isdense;
71     PetscInt  bs;
72 
73     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81     if (!isdense) {
82       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85     } else {
86       ierr = MatSetUp(B);CHKERRQ(ierr);
87     }
88   } else {
89     B    = *newmat;
90     ierr = MatZeroEntries(B);CHKERRQ(ierr);
91   }
92 
93   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95     PetscInt          ncols;
96     const PetscInt    *row;
97     const PetscScalar *vals;
98 
99     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102     if (A->hermitian) {
103       PetscInt i;
104       for (i = 0; i < ncols; i++) {
105         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106       }
107     } else {
108       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109     }
110 #else
111     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114   }
115   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118 
119   if (reuse == MAT_INPLACE_MATRIX) {
120     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121   } else {
122     *newmat = B;
123   }
124   PetscFunctionReturn(0);
125 }
126 
MatStoreValues_MPISBAIJ(Mat mat)127 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
128 {
129   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
MatRetrieveValues_MPISBAIJ(Mat mat)138 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
150   { \
151     brow = row/bs;  \
152     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153     rmax = aimax[brow]; nrow = ailen[brow]; \
154     bcol = col/bs; \
155     ridx = row % bs; cidx = col % bs; \
156     low  = 0; high = nrow; \
157     while (high-low > 3) { \
158       t = (low+high)/2; \
159       if (rp[t] > bcol) high = t; \
160       else              low  = t; \
161     } \
162     for (_i=low; _i<high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2*_i + bs*cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value;  \
167         else                    *bap  = value;  \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174     N = nrow++ - 1;  \
175     /* shift up all the later entries in this row */ \
176     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181     A->nonzerostate++;\
182 a_noinsert:; \
183     ailen[brow] = nrow; \
184   }
185 
186 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187   { \
188     brow = row/bs;  \
189     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190     rmax = bimax[brow]; nrow = bilen[brow]; \
191     bcol = col/bs; \
192     ridx = row % bs; cidx = col % bs; \
193     low  = 0; high = nrow; \
194     while (high-low > 3) { \
195       t = (low+high)/2; \
196       if (rp[t] > bcol) high = t; \
197       else              low  = t; \
198     } \
199     for (_i=low; _i<high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2*_i + bs*cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value;  \
204         else                    *bap  = value;  \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211     N = nrow++ - 1;  \
212     /* shift up all the later entries in this row */ \
213     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216     rp[_i]                      = bcol;  \
217     ap[bs2*_i + bs*cidx + ridx] = value;  \
218     B->nonzerostate++;\
219 b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
229   MatScalar      value;
230   PetscBool      roworiented = baij->roworiented;
231   PetscErrorCode ierr;
232   PetscInt       i,j,row,col;
233   PetscInt       rstart_orig=mat->rmap->rstart;
234   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
236 
237   /* Some Variables required in the macro */
238   Mat          A     = baij->A;
239   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
240   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241   MatScalar    *aa   =a->a;
242 
243   Mat         B     = baij->B;
244   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
245   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246   MatScalar   *ba   =b->a;
247 
248   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
249   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
250   MatScalar *ap,*bap;
251 
252   /* for stash */
253   PetscInt  n_loc, *in_loc = NULL;
254   MatScalar *v_loc = NULL;
255 
256   PetscFunctionBegin;
257   if (!baij->donotstash) {
258     if (n > baij->n_loc) {
259       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263 
264       baij->n_loc = n;
265     }
266     in_loc = baij->in_loc;
267     v_loc  = baij->v_loc;
268   }
269 
270   for (i=0; i<m; i++) {
271     if (im[i] < 0) continue;
272     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274       row = im[i] - rstart_orig;              /* local row index */
275       for (j=0; j<n; j++) {
276         if (im[i]/bs > in[j]/bs) {
277           if (a->ignore_ltriangular) {
278             continue;    /* ignore lower triangular blocks */
279           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280         }
281         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
282           col  = in[j] - cstart_orig;         /* local col index */
283           brow = row/bs; bcol = col/bs;
284           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i*n+j];
286           else             value = v[i+j*m];
287           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289         } else if (in[j] < 0) continue;
290         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291         else {  /* off-diag entry (B) */
292           if (mat->was_assembled) {
293             if (!baij->colmap) {
294               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295             }
296 #if defined(PETSC_USE_CTABLE)
297             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298             col  = col - 1;
299 #else
300             col = baij->colmap[in[j]/bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304               col  =  in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B    = baij->B;
307               b    = (Mat_SeqBAIJ*)(B)->data;
308               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309               ba   = b->a;
310             } else col += in[j]%bs;
311           } else col = in[j];
312           if (roworiented) value = v[i*n+j];
313           else             value = v[i+j*m];
314           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316         }
317       }
318     } else {  /* off processor entry */
319       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320       if (!baij->donotstash) {
321         mat->assembled = PETSC_FALSE;
322         n_loc          = 0;
323         for (j=0; j<n; j++) {
324           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325           in_loc[n_loc] = in[j];
326           if (roworiented) {
327             v_loc[n_loc] = v[i*n+j];
328           } else {
329             v_loc[n_loc] = v[j*m+i];
330           }
331           n_loc++;
332         }
333         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334       }
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
343   PetscErrorCode    ierr;
344   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
346   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347   PetscBool         roworiented=a->roworiented;
348   const PetscScalar *value     = v;
349   MatScalar         *ap,*aa = a->a,*bap;
350 
351   PetscFunctionBegin;
352   if (col < row) {
353     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355   }
356   rp   = aj + ai[row];
357   ap   = aa + bs2*ai[row];
358   rmax = imax[row];
359   nrow = ailen[row];
360   value = v;
361   low   = 0;
362   high  = nrow;
363 
364   while (high-low > 7) {
365     t = (low+high)/2;
366     if (rp[t] > col) high = t;
367     else             low  = t;
368   }
369   for (i=low; i<high; i++) {
370     if (rp[i] > col) break;
371     if (rp[i] == col) {
372       bap = ap +  bs2*i;
373       if (roworiented) {
374         if (is == ADD_VALUES) {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] += *value++;
378             }
379           }
380         } else {
381           for (ii=0; ii<bs; ii++) {
382             for (jj=ii; jj<bs2; jj+=bs) {
383               bap[jj] = *value++;
384             }
385           }
386         }
387       } else {
388         if (is == ADD_VALUES) {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++ += *value++;
392             }
393           }
394         } else {
395           for (ii=0; ii<bs; ii++) {
396             for (jj=0; jj<bs; jj++) {
397               *bap++  = *value++;
398             }
399           }
400         }
401       }
402       goto noinsert2;
403     }
404   }
405   if (nonew == 1) goto noinsert2;
406   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408   N = nrow++ - 1; high++;
409   /* shift up all the later entries in this row */
410   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412   rp[i] = col;
413   bap   = ap +  bs2*i;
414   if (roworiented) {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=ii; jj<bs2; jj+=bs) {
417         bap[jj] = *value++;
418       }
419     }
420   } else {
421     for (ii=0; ii<bs; ii++) {
422       for (jj=0; jj<bs; jj++) {
423         *bap++ = *value++;
424       }
425     }
426   }
427   noinsert2:;
428   ailen[row] = nrow;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433    This routine is exactly duplicated in mpibaij.c
434 */
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440   PetscErrorCode    ierr;
441   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442   PetscBool         roworiented=a->roworiented;
443   const PetscScalar *value     = v;
444   MatScalar         *ap,*aa = a->a,*bap;
445 
446   PetscFunctionBegin;
447   rp   = aj + ai[row];
448   ap   = aa + bs2*ai[row];
449   rmax = imax[row];
450   nrow = ailen[row];
451   low  = 0;
452   high = nrow;
453   value = v;
454   while (high-low > 7) {
455     t = (low+high)/2;
456     if (rp[t] > col) high = t;
457     else             low  = t;
458   }
459   for (i=low; i<high; i++) {
460     if (rp[i] > col) break;
461     if (rp[i] == col) {
462       bap = ap +  bs2*i;
463       if (roworiented) {
464         if (is == ADD_VALUES) {
465           for (ii=0; ii<bs; ii++) {
466             for (jj=ii; jj<bs2; jj+=bs) {
467               bap[jj] += *value++;
468             }
469           }
470         } else {
471           for (ii=0; ii<bs; ii++) {
472             for (jj=ii; jj<bs2; jj+=bs) {
473               bap[jj] = *value++;
474             }
475           }
476         }
477       } else {
478         if (is == ADD_VALUES) {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj] += value[jj];
482             }
483             bap += bs;
484           }
485         } else {
486           for (ii=0; ii<bs; ii++,value+=bs) {
487             for (jj=0; jj<bs; jj++) {
488               bap[jj]  = value[jj];
489             }
490             bap += bs;
491           }
492         }
493       }
494       goto noinsert2;
495     }
496   }
497   if (nonew == 1) goto noinsert2;
498   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500   N = nrow++ - 1; high++;
501   /* shift up all the later entries in this row */
502   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504   rp[i] = col;
505   bap   = ap +  bs2*i;
506   if (roworiented) {
507     for (ii=0; ii<bs; ii++) {
508       for (jj=ii; jj<bs2; jj+=bs) {
509         bap[jj] = *value++;
510       }
511     }
512   } else {
513     for (ii=0; ii<bs; ii++) {
514       for (jj=0; jj<bs; jj++) {
515         *bap++ = *value++;
516       }
517     }
518   }
519   noinsert2:;
520   ailen[row] = nrow;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This routine could be optimized by removing the need for the block copy below and passing stride information
526   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
531   const MatScalar *value;
532   MatScalar       *barray     =baij->barray;
533   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534   PetscErrorCode  ierr;
535   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
536   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538 
539   PetscFunctionBegin;
540   if (!barray) {
541     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542     baij->barray = barray;
543   }
544 
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553     if (im[i] >= rstart && im[i] < rend) {
554       row = im[i] - rstart;
555       for (j=0; j<n; j++) {
556         if (im[i] > in[j]) {
557           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559         }
560         /* If NumCol = 1 then a copy is not required */
561         if ((roworiented) && (n == 1)) {
562           barray = (MatScalar*) v + i*bs2;
563         } else if ((!roworiented) && (m == 1)) {
564           barray = (MatScalar*) v + j*bs2;
565         } else { /* Here a copy is required */
566           if (roworiented) {
567             value = v + i*(stepval+bs)*bs + j*bs;
568           } else {
569             value = v + j*(stepval+bs)*bs + i*bs;
570           }
571           for (ii=0; ii<bs; ii++,value+=stepval) {
572             for (jj=0; jj<bs; jj++) {
573               *barray++ = *value++;
574             }
575           }
576           barray -=bs2;
577         }
578 
579         if (in[j] >= cstart && in[j] < cend) {
580           col  = in[j] - cstart;
581           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582         } else if (in[j] < 0) continue;
583         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584         else {
585           if (mat->was_assembled) {
586             if (!baij->colmap) {
587               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592             { PetscInt data;
593               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595             }
596 #else
597             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602             col  = (col - 1)/bs;
603 #else
604             col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608               col  = in[j];
609             }
610           } else col = in[j];
611           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!baij->donotstash) {
617         if (roworiented) {
618           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal * norm)669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
672   PetscErrorCode ierr;
673   PetscReal      sum[2],*lnorm2;
674 
675   PetscFunctionBegin;
676   if (baij->size == 1) {
677     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
683       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
685       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
687       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
688     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
691       PetscReal    *rsum,*rsum2,vabs;
692       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694       MatScalar    *v;
695 
696       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698       /* Amat */
699       v = amat->a; jj = amat->j;
700       for (brow=0; brow<mbs; brow++) {
701         grow = bs*(rstart + brow);
702         nz   = amat->i[brow+1] - amat->i[brow];
703         for (bcol=0; bcol<nz; bcol++) {
704           gcol = bs*(rstart + *jj); jj++;
705           for (col=0; col<bs; col++) {
706             for (row=0; row<bs; row++) {
707               vabs            = PetscAbsScalar(*v); v++;
708               rsum[gcol+col] += vabs;
709               /* non-diagonal block */
710               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711             }
712           }
713         }
714         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715       }
716       /* Bmat */
717       v = bmat->a; jj = bmat->j;
718       for (brow=0; brow<mbs; brow++) {
719         grow = bs*(rstart + brow);
720         nz = bmat->i[brow+1] - bmat->i[brow];
721         for (bcol=0; bcol<nz; bcol++) {
722           gcol = bs*garray[*jj]; jj++;
723           for (col=0; col<bs; col++) {
724             for (row=0; row<bs; row++) {
725               vabs            = PetscAbsScalar(*v); v++;
726               rsum[gcol+col] += vabs;
727               rsum[grow+row] += vabs;
728             }
729           }
730         }
731         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732       }
733       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734       *norm = 0.0;
735       for (col=0; col<mat->cmap->N; col++) {
736         if (rsum2[col] > *norm) *norm = rsum2[col];
737       }
738       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740   }
741   PetscFunctionReturn(0);
742 }
743 
MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       nstash,reallocs;
749 
750   PetscFunctionBegin;
751   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752 
753   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
765   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
766   PetscErrorCode ierr;
767   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
768   PetscInt       *row,*col;
769   PetscBool      other_disassembled;
770   PetscMPIInt    n;
771   PetscBool      r1,r2,r3;
772   MatScalar      *val;
773 
774   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775   PetscFunctionBegin;
776   if (!baij->donotstash &&  !mat->nooffprocentries) {
777     while (1) {
778       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779       if (!flg) break;
780 
781       for (i=0; i<n;) {
782         /* Now identify the consecutive vals belonging to the same row */
783         for (j=i,rstart=row[j]; j<n; j++) {
784           if (row[j] != rstart) break;
785         }
786         if (j < n) ncols = j-i;
787         else       ncols = n-i;
788         /* Now assemble all these values with a single function call */
789         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790         i    = j;
791       }
792     }
793     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794     /* Now process the block-stash. Since the values are stashed column-oriented,
795        set the roworiented flag to column oriented, and after MatSetValues()
796        restore the original flags */
797     r1 = baij->roworiented;
798     r2 = a->roworiented;
799     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800 
801     baij->roworiented = PETSC_FALSE;
802     a->roworiented    = PETSC_FALSE;
803 
804     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805     while (1) {
806       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807       if (!flg) break;
808 
809       for (i=0; i<n;) {
810         /* Now identify the consecutive vals belonging to the same row */
811         for (j=i,rstart=row[j]; j<n; j++) {
812           if (row[j] != rstart) break;
813         }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817         i    = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821 
822     baij->roworiented = r1;
823     a->roworiented    = r2;
824 
825     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826   }
827 
828   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830 
831   /* determine if any processor has disassembled, if so we must
832      also disassemble ourselfs, in order that we may reassemble. */
833   /*
834      if nonzero structure of submatrix B cannot change then we know that
835      no processor disassembled thus we can skip this stuff
836   */
837   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839     if (mat->was_assembled && !other_disassembled) {
840       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841     }
842   }
843 
844   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846   }
847   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849 
850   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851 
852   baij->rowvalues = NULL;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
867   PetscErrorCode    ierr;
868   PetscInt          bs   = mat->rmap->bs;
869   PetscMPIInt       rank = baij->rank;
870   PetscBool         iascii,isdraw;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877   if (iascii) {
878     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880       MatInfo info;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
882       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_INFO) {
895       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
896       PetscFunctionReturn(0);
897     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898       PetscFunctionReturn(0);
899     }
900   }
901 
902   if (isdraw) {
903     PetscDraw draw;
904     PetscBool isnull;
905     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907     if (isnull) PetscFunctionReturn(0);
908   }
909 
910   {
911     /* assemble the entire matrix onto first processor. */
912     Mat          A;
913     Mat_SeqSBAIJ *Aloc;
914     Mat_SeqBAIJ  *Bloc;
915     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916     MatScalar    *a;
917     const char   *matname;
918 
919     /* Should this be the same type as mat? */
920     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921     if (!rank) {
922       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923     } else {
924       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925     }
926     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930 
931     /* copy over the A part */
932     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
934     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935 
936     for (i=0; i<mbs; i++) {
937       rvals[0] = bs*(baij->rstartbs + i);
938       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939       for (j=ai[i]; j<ai[i+1]; j++) {
940         col = (baij->cstartbs+aj[j])*bs;
941         for (k=0; k<bs; k++) {
942           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943           col++;
944           a += bs;
945         }
946       }
947     }
948     /* copy over the B part */
949     Bloc = (Mat_SeqBAIJ*)baij->B->data;
950     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
951     for (i=0; i<mbs; i++) {
952 
953       rvals[0] = bs*(baij->rstartbs + i);
954       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955       for (j=ai[i]; j<ai[i+1]; j++) {
956         col = baij->garray[aj[j]]*bs;
957         for (k=0; k<bs; k++) {
958           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959           col++;
960           a += bs;
961         }
962       }
963     }
964     ierr = PetscFree(rvals);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979     ierr = MatDestroy(&A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986 
MatView_MPISBAIJ(Mat mat,PetscViewer viewer)987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989   PetscErrorCode ierr;
990   PetscBool      iascii,isdraw,issocket,isbinary;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997   if (iascii || isdraw || issocket) {
998     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999   } else if (isbinary) {
1000     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
MatDestroy_MPISBAIJ(Mat mat)1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045 
1046   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode    ierr;
1066   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1067   PetscScalar       *from;
1068   const PetscScalar *x;
1069 
1070   PetscFunctionBegin;
1071   /* diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074 
1075   /* subdiagonal part */
1076   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1077   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1078 
1079   /* copy x into the vec slvec0 */
1080   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1081   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1082 
1083   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1084   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1085   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1086 
1087   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   /* supperdiagonal part */
1090   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)1094 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1097   PetscErrorCode    ierr;
1098   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1099   PetscScalar       *from;
1100   const PetscScalar *x;
1101 
1102   PetscFunctionBegin;
1103   /* diagonal part */
1104   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1105   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1106 
1107   /* subdiagonal part */
1108   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1109 
1110   /* copy x into the vec slvec0 */
1111   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1112   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1113 
1114   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1115   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1116   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1117 
1118   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   /* supperdiagonal part */
1121   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)1125 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode    ierr;
1129   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1130   PetscScalar       *from,zero=0.0;
1131   const PetscScalar *x;
1132 
1133   PetscFunctionBegin;
1134   /* diagonal part */
1135   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1136   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1137 
1138   /* subdiagonal part */
1139   if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1140   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1141 
1142   /* copy x into the vec slvec0 */
1143   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1144   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1145   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1146   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1147 
1148   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151 
1152   /* supperdiagonal part */
1153   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1157 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode    ierr;
1161   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1162   PetscScalar       *from,zero=0.0;
1163   const PetscScalar *x;
1164 
1165   PetscFunctionBegin;
1166   /* diagonal part */
1167   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1168   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1169 
1170   /* subdiagonal part */
1171   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1172 
1173   /* copy x into the vec slvec0 */
1174   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1175   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1177   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1178 
1179   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1180   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1181   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182 
1183   /* supperdiagonal part */
1184   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
MatGetDiagonal_MPISBAIJ(Mat A,Vec v)1192 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1193 {
1194   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1199   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
MatScale_MPISBAIJ(Mat A,PetscScalar aa)1203 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1204 {
1205   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1210   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1214 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1215 {
1216   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1217   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218   PetscErrorCode ierr;
1219   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1220   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1221   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1222 
1223   PetscFunctionBegin;
1224   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1225   mat->getrowactive = PETSC_TRUE;
1226 
1227   if (!mat->rowvalues && (idx || v)) {
1228     /*
1229         allocate enough space to hold information from the longest row.
1230     */
1231     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1232     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1233     PetscInt     max = 1,mbs = mat->mbs,tmp;
1234     for (i=0; i<mbs; i++) {
1235       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1236       if (max < tmp) max = tmp;
1237     }
1238     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1239   }
1240 
1241   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1242   lrow = row - brstart;  /* local row index */
1243 
1244   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1245   if (!v)   {pvA = NULL; pvB = NULL;}
1246   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1247   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249   nztot = nzA + nzB;
1250 
1251   cmap = mat->garray;
1252   if (v  || idx) {
1253     if (nztot) {
1254       /* Sort by increasing column numbers, assuming A and B already sorted */
1255       PetscInt imark = -1;
1256       if (v) {
1257         *v = v_p = mat->rowvalues;
1258         for (i=0; i<nzB; i++) {
1259           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1260           else break;
1261         }
1262         imark = i;
1263         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1264         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1265       }
1266       if (idx) {
1267         *idx = idx_p = mat->rowindices;
1268         if (imark > -1) {
1269           for (i=0; i<imark; i++) {
1270             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1271           }
1272         } else {
1273           for (i=0; i<nzB; i++) {
1274             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1275             else break;
1276           }
1277           imark = i;
1278         }
1279         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1280         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1281       }
1282     } else {
1283       if (idx) *idx = NULL;
1284       if (v)   *v   = NULL;
1285     }
1286   }
1287   *nz  = nztot;
1288   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1293 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1299   baij->getrowactive = PETSC_FALSE;
1300   PetscFunctionReturn(0);
1301 }
1302 
MatGetRowUpperTriangular_MPISBAIJ(Mat A)1303 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1306   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1307 
1308   PetscFunctionBegin;
1309   aA->getrow_utriangular = PETSC_TRUE;
1310   PetscFunctionReturn(0);
1311 }
MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)1312 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1315   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1316 
1317   PetscFunctionBegin;
1318   aA->getrow_utriangular = PETSC_FALSE;
1319   PetscFunctionReturn(0);
1320 }
1321 
MatRealPart_MPISBAIJ(Mat A)1322 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1329   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
MatImaginaryPart_MPISBAIJ(Mat A)1333 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1334 {
1335   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1340   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1345    Input: isrow       - distributed(parallel),
1346           iscol_local - locally owned (seq)
1347 */
ISEqual_private(IS isrow,IS iscol_local,PetscBool * flg)1348 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1349 {
1350   PetscErrorCode ierr;
1351   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1352   const PetscInt *ptr1,*ptr2;
1353 
1354   PetscFunctionBegin;
1355   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1356   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1357   if (sz1 > sz2) {
1358     *flg = PETSC_FALSE;
1359     PetscFunctionReturn(0);
1360   }
1361 
1362   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1363   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1364 
1365   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1366   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1367   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1368   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1369   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1370   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1371 
1372   nmatch=0;
1373   k     = 0;
1374   for (i=0; i<sz1; i++){
1375     for (j=k; j<sz2; j++){
1376       if (a1[i] == a2[j]) {
1377         k = j; nmatch++;
1378         break;
1379       }
1380     }
1381   }
1382   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1383   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1384   ierr = PetscFree(a1);CHKERRQ(ierr);
1385   ierr = PetscFree(a2);CHKERRQ(ierr);
1386   if (nmatch < sz1) {
1387     *flg = PETSC_FALSE;
1388   } else {
1389     *flg = PETSC_TRUE;
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1394 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1395 {
1396   PetscErrorCode ierr;
1397   IS             iscol_local;
1398   PetscInt       csize;
1399   PetscBool      isequal;
1400 
1401   PetscFunctionBegin;
1402   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1403   if (call == MAT_REUSE_MATRIX) {
1404     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1405     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1406   } else {
1407     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1408     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1409     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1410   }
1411 
1412   /* now call MatCreateSubMatrix_MPIBAIJ() */
1413   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1414   if (call == MAT_INITIAL_MATRIX) {
1415     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1416     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
MatZeroEntries_MPISBAIJ(Mat A)1421 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1422 {
1423   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1428   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1432 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1433 {
1434   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1435   Mat            A  = a->A,B = a->B;
1436   PetscErrorCode ierr;
1437   PetscLogDouble isend[5],irecv[5];
1438 
1439   PetscFunctionBegin;
1440   info->block_size = (PetscReal)matin->rmap->bs;
1441 
1442   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1443 
1444   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1445   isend[3] = info->memory;  isend[4] = info->mallocs;
1446 
1447   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1448 
1449   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1450   isend[3] += info->memory;  isend[4] += info->mallocs;
1451   if (flag == MAT_LOCAL) {
1452     info->nz_used      = isend[0];
1453     info->nz_allocated = isend[1];
1454     info->nz_unneeded  = isend[2];
1455     info->memory       = isend[3];
1456     info->mallocs      = isend[4];
1457   } else if (flag == MAT_GLOBAL_MAX) {
1458     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1459 
1460     info->nz_used      = irecv[0];
1461     info->nz_allocated = irecv[1];
1462     info->nz_unneeded  = irecv[2];
1463     info->memory       = irecv[3];
1464     info->mallocs      = irecv[4];
1465   } else if (flag == MAT_GLOBAL_SUM) {
1466     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1467 
1468     info->nz_used      = irecv[0];
1469     info->nz_allocated = irecv[1];
1470     info->nz_unneeded  = irecv[2];
1471     info->memory       = irecv[3];
1472     info->mallocs      = irecv[4];
1473   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1474   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1475   info->fill_ratio_needed = 0;
1476   info->factor_mallocs    = 0;
1477   PetscFunctionReturn(0);
1478 }
1479 
MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)1480 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1481 {
1482   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1483   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1484   PetscErrorCode ierr;
1485 
1486   PetscFunctionBegin;
1487   switch (op) {
1488   case MAT_NEW_NONZERO_LOCATIONS:
1489   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1490   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1491   case MAT_KEEP_NONZERO_PATTERN:
1492   case MAT_SUBMAT_SINGLEIS:
1493   case MAT_NEW_NONZERO_LOCATION_ERR:
1494     MatCheckPreallocated(A,1);
1495     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1496     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1497     break;
1498   case MAT_ROW_ORIENTED:
1499     MatCheckPreallocated(A,1);
1500     a->roworiented = flg;
1501 
1502     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1503     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1504     break;
1505   case MAT_NEW_DIAGONALS:
1506   case MAT_SORTED_FULL:
1507     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1508     break;
1509   case MAT_IGNORE_OFF_PROC_ENTRIES:
1510     a->donotstash = flg;
1511     break;
1512   case MAT_USE_HASH_TABLE:
1513     a->ht_flag = flg;
1514     break;
1515   case MAT_HERMITIAN:
1516     MatCheckPreallocated(A,1);
1517     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1518 #if defined(PETSC_USE_COMPLEX)
1519     if (flg) { /* need different mat-vec ops */
1520       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1521       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1522       A->ops->multtranspose    = NULL;
1523       A->ops->multtransposeadd = NULL;
1524       A->symmetric = PETSC_FALSE;
1525     }
1526 #endif
1527     break;
1528   case MAT_SPD:
1529   case MAT_SYMMETRIC:
1530     MatCheckPreallocated(A,1);
1531     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1532 #if defined(PETSC_USE_COMPLEX)
1533     if (flg) { /* restore to use default mat-vec ops */
1534       A->ops->mult             = MatMult_MPISBAIJ;
1535       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1536       A->ops->multtranspose    = MatMult_MPISBAIJ;
1537       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1538     }
1539 #endif
1540     break;
1541   case MAT_STRUCTURALLY_SYMMETRIC:
1542     MatCheckPreallocated(A,1);
1543     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1544     break;
1545   case MAT_SYMMETRY_ETERNAL:
1546     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1547     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1548     break;
1549   case MAT_IGNORE_LOWER_TRIANGULAR:
1550     aA->ignore_ltriangular = flg;
1551     break;
1552   case MAT_ERROR_LOWER_TRIANGULAR:
1553     aA->ignore_ltriangular = flg;
1554     break;
1555   case MAT_GETROW_UPPERTRIANGULAR:
1556     aA->getrow_utriangular = flg;
1557     break;
1558   default:
1559     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat * B)1564 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1565 {
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   if (reuse == MAT_INITIAL_MATRIX) {
1570     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1571   }  else if (reuse == MAT_REUSE_MATRIX) {
1572     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)1577 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1578 {
1579   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1580   Mat            a     = baij->A, b=baij->B;
1581   PetscErrorCode ierr;
1582   PetscInt       nv,m,n;
1583   PetscBool      flg;
1584 
1585   PetscFunctionBegin;
1586   if (ll != rr) {
1587     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1588     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1589   }
1590   if (!ll) PetscFunctionReturn(0);
1591 
1592   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1593   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1594 
1595   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1596   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1597 
1598   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599 
1600   /* left diagonalscale the off-diagonal part */
1601   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1602 
1603   /* scale the diagonal part */
1604   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1605 
1606   /* right diagonalscale the off-diagonal part */
1607   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
MatSetUnfactored_MPISBAIJ(Mat A)1612 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1613 {
1614   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1623 
MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool * flag)1624 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1625 {
1626   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1627   Mat            a,b,c,d;
1628   PetscBool      flg;
1629   PetscErrorCode ierr;
1630 
1631   PetscFunctionBegin;
1632   a = matA->A; b = matA->B;
1633   c = matB->A; d = matB->B;
1634 
1635   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1636   if (flg) {
1637     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1638   }
1639   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1640   PetscFunctionReturn(0);
1641 }
1642 
MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)1643 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1644 {
1645   PetscErrorCode ierr;
1646   PetscBool      isbaij;
1647 
1648   PetscFunctionBegin;
1649   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1650   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1651   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1652   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1653     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1654     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1655     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1656   } else {
1657     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1658     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1659 
1660     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1661     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1662   }
1663   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
MatSetUp_MPISBAIJ(Mat A)1667 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1668 {
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1676 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1677 {
1678   PetscErrorCode ierr;
1679   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1680   PetscBLASInt   bnz,one=1;
1681   Mat_SeqSBAIJ   *xa,*ya;
1682   Mat_SeqBAIJ    *xb,*yb;
1683 
1684   PetscFunctionBegin;
1685   if (str == SAME_NONZERO_PATTERN) {
1686     PetscScalar alpha = a;
1687     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1688     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1689     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1690     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1691     xb   = (Mat_SeqBAIJ*)xx->B->data;
1692     yb   = (Mat_SeqBAIJ*)yy->B->data;
1693     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1694     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1695     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1696   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1697     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1698     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1699     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1700   } else {
1701     Mat      B;
1702     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1703     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1704     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1705     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1706     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1707     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1708     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1709     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1710     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1711     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1712     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1713     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1714     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1715     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1716     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1717     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1718     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1719     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1720     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1721     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1722   }
1723   PetscFunctionReturn(0);
1724 }
1725 
MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * B[])1726 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1727 {
1728   PetscErrorCode ierr;
1729   PetscInt       i;
1730   PetscBool      flg;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1734   for (i=0; i<n; i++) {
1735     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1736     if (!flg) {
1737       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1738     }
1739   }
1740   PetscFunctionReturn(0);
1741 }
1742 
MatShift_MPISBAIJ(Mat Y,PetscScalar a)1743 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1744 {
1745   PetscErrorCode ierr;
1746   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1747   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1748 
1749   PetscFunctionBegin;
1750   if (!Y->preallocated) {
1751     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1752   } else if (!aij->nz) {
1753     PetscInt nonew = aij->nonew;
1754     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1755     aij->nonew = nonew;
1756   }
1757   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool * missing,PetscInt * d)1761 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1762 {
1763   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1768   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1769   if (d) {
1770     PetscInt rstart;
1771     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1772     *d += rstart/A->rmap->bs;
1773 
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat * a)1778 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1779 {
1780   PetscFunctionBegin;
1781   *a = ((Mat_MPISBAIJ*)A->data)->A;
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 /* -------------------------------------------------------------------*/
1786 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1787                                        MatGetRow_MPISBAIJ,
1788                                        MatRestoreRow_MPISBAIJ,
1789                                        MatMult_MPISBAIJ,
1790                                /*  4*/ MatMultAdd_MPISBAIJ,
1791                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1792                                        MatMultAdd_MPISBAIJ,
1793                                        NULL,
1794                                        NULL,
1795                                        NULL,
1796                                /* 10*/ NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        MatSOR_MPISBAIJ,
1800                                        MatTranspose_MPISBAIJ,
1801                                /* 15*/ MatGetInfo_MPISBAIJ,
1802                                        MatEqual_MPISBAIJ,
1803                                        MatGetDiagonal_MPISBAIJ,
1804                                        MatDiagonalScale_MPISBAIJ,
1805                                        MatNorm_MPISBAIJ,
1806                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1807                                        MatAssemblyEnd_MPISBAIJ,
1808                                        MatSetOption_MPISBAIJ,
1809                                        MatZeroEntries_MPISBAIJ,
1810                                /* 24*/ NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                /* 29*/ MatSetUp_MPISBAIJ,
1816                                        NULL,
1817                                        NULL,
1818                                        MatGetDiagonalBlock_MPISBAIJ,
1819                                        NULL,
1820                                /* 34*/ MatDuplicate_MPISBAIJ,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                /* 39*/ MatAXPY_MPISBAIJ,
1826                                        MatCreateSubMatrices_MPISBAIJ,
1827                                        MatIncreaseOverlap_MPISBAIJ,
1828                                        MatGetValues_MPISBAIJ,
1829                                        MatCopy_MPISBAIJ,
1830                                /* 44*/ NULL,
1831                                        MatScale_MPISBAIJ,
1832                                        MatShift_MPISBAIJ,
1833                                        NULL,
1834                                        NULL,
1835                                /* 49*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                /* 54*/ NULL,
1841                                        NULL,
1842                                        MatSetUnfactored_MPISBAIJ,
1843                                        NULL,
1844                                        MatSetValuesBlocked_MPISBAIJ,
1845                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                /* 64*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1856                                        NULL,
1857                                        MatConvert_MPISBAIJ_Basic,
1858                                        NULL,
1859                                        NULL,
1860                                /* 74*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                        NULL,
1865                                /* 79*/ NULL,
1866                                        NULL,
1867                                        NULL,
1868                                        NULL,
1869                                        MatLoad_MPISBAIJ,
1870                                /* 84*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        NULL,
1875                                /* 89*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                /* 94*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                /* 99*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                /*104*/ NULL,
1891                                        MatRealPart_MPISBAIJ,
1892                                        MatImaginaryPart_MPISBAIJ,
1893                                        MatGetRowUpperTriangular_MPISBAIJ,
1894                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1895                                /*109*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        MatMissingDiagonal_MPISBAIJ,
1900                                /*114*/ NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                /*119*/ NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                        NULL,
1910                                /*124*/ NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        NULL,
1915                                /*129*/ NULL,
1916                                        NULL,
1917                                        NULL,
1918                                        NULL,
1919                                        NULL,
1920                                /*134*/ NULL,
1921                                        NULL,
1922                                        NULL,
1923                                        NULL,
1924                                        NULL,
1925                                /*139*/ MatSetBlockSizes_Default,
1926                                        NULL,
1927                                        NULL,
1928                                        NULL,
1929                                        NULL,
1930                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1931 };
1932 
MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)1933 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1934 {
1935   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1936   PetscErrorCode ierr;
1937   PetscInt       i,mbs,Mbs;
1938   PetscMPIInt    size;
1939 
1940   PetscFunctionBegin;
1941   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1942   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1943   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1944   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1945   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1946   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1947 
1948   mbs = B->rmap->n/bs;
1949   Mbs = B->rmap->N/bs;
1950   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1951 
1952   B->rmap->bs = bs;
1953   b->bs2      = bs*bs;
1954   b->mbs      = mbs;
1955   b->Mbs      = Mbs;
1956   b->nbs      = B->cmap->n/bs;
1957   b->Nbs      = B->cmap->N/bs;
1958 
1959   for (i=0; i<=b->size; i++) {
1960     b->rangebs[i] = B->rmap->range[i]/bs;
1961   }
1962   b->rstartbs = B->rmap->rstart/bs;
1963   b->rendbs   = B->rmap->rend/bs;
1964 
1965   b->cstartbs = B->cmap->rstart/bs;
1966   b->cendbs   = B->cmap->rend/bs;
1967 
1968 #if defined(PETSC_USE_CTABLE)
1969   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
1970 #else
1971   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
1972 #endif
1973   ierr = PetscFree(b->garray);CHKERRQ(ierr);
1974   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
1975   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
1976   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
1977   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
1978   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
1979   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
1980   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
1981   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
1982 
1983   /* Because the B will have been resized we simply destroy it and create a new one each time */
1984   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1985   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
1986   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1987   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
1988   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1989   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1990 
1991   if (!B->preallocated) {
1992     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1993     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1994     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1995     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1996     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1997   }
1998 
1999   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2000   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2001 
2002   B->preallocated  = PETSC_TRUE;
2003   B->was_assembled = PETSC_FALSE;
2004   B->assembled     = PETSC_FALSE;
2005   PetscFunctionReturn(0);
2006 }
2007 
MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2008 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2009 {
2010   PetscInt       m,rstart,cend;
2011   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2012   const PetscInt *JJ    =NULL;
2013   PetscScalar    *values=NULL;
2014   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2015   PetscErrorCode ierr;
2016   PetscBool      nooffprocentries;
2017 
2018   PetscFunctionBegin;
2019   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2020   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2021   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2022   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2023   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2024   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2025   m      = B->rmap->n/bs;
2026   rstart = B->rmap->rstart/bs;
2027   cend   = B->cmap->rend/bs;
2028 
2029   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2030   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2031   for (i=0; i<m; i++) {
2032     nz = ii[i+1] - ii[i];
2033     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2034     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2035     JJ     = jj + ii[i];
2036     bd     = 0;
2037     for (j=0; j<nz; j++) {
2038       if (*JJ >= i + rstart) break;
2039       JJ++;
2040       bd++;
2041     }
2042     d  = 0;
2043     for (; j<nz; j++) {
2044       if (*JJ++ >= cend) break;
2045       d++;
2046     }
2047     d_nnz[i] = d;
2048     o_nnz[i] = nz - d - bd;
2049     nz       = nz - bd;
2050     nz_max = PetscMax(nz_max,nz);
2051   }
2052   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2053   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2054   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2055 
2056   values = (PetscScalar*)V;
2057   if (!values) {
2058     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2059   }
2060   for (i=0; i<m; i++) {
2061     PetscInt          row    = i + rstart;
2062     PetscInt          ncols  = ii[i+1] - ii[i];
2063     const PetscInt    *icols = jj + ii[i];
2064     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2065       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2066       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2067     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2068       PetscInt j;
2069       for (j=0; j<ncols; j++) {
2070         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2071         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2072       }
2073     }
2074   }
2075 
2076   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2077   nooffprocentries    = B->nooffprocentries;
2078   B->nooffprocentries = PETSC_TRUE;
2079   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   B->nooffprocentries = nooffprocentries;
2082 
2083   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 /*MC
2088    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2089    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2090    the matrix is stored.
2091 
2092    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2093    can call MatSetOption(Mat, MAT_HERMITIAN);
2094 
2095    Options Database Keys:
2096 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2097 
2098    Notes:
2099      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2100      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2101 
2102    Level: beginner
2103 
2104 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2105 M*/
2106 
MatCreate_MPISBAIJ(Mat B)2107 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2108 {
2109   Mat_MPISBAIJ   *b;
2110   PetscErrorCode ierr;
2111   PetscBool      flg = PETSC_FALSE;
2112 
2113   PetscFunctionBegin;
2114   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2115   B->data = (void*)b;
2116   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2117 
2118   B->ops->destroy = MatDestroy_MPISBAIJ;
2119   B->ops->view    = MatView_MPISBAIJ;
2120   B->assembled    = PETSC_FALSE;
2121   B->insertmode   = NOT_SET_VALUES;
2122 
2123   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2124   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2125 
2126   /* build local table of row and column ownerships */
2127   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2128 
2129   /* build cache for off array entries formed */
2130   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2131 
2132   b->donotstash  = PETSC_FALSE;
2133   b->colmap      = NULL;
2134   b->garray      = NULL;
2135   b->roworiented = PETSC_TRUE;
2136 
2137   /* stuff used in block assembly */
2138   b->barray = NULL;
2139 
2140   /* stuff used for matrix vector multiply */
2141   b->lvec    = NULL;
2142   b->Mvctx   = NULL;
2143   b->slvec0  = NULL;
2144   b->slvec0b = NULL;
2145   b->slvec1  = NULL;
2146   b->slvec1a = NULL;
2147   b->slvec1b = NULL;
2148   b->sMvctx  = NULL;
2149 
2150   /* stuff for MatGetRow() */
2151   b->rowindices   = NULL;
2152   b->rowvalues    = NULL;
2153   b->getrowactive = PETSC_FALSE;
2154 
2155   /* hash table stuff */
2156   b->ht           = NULL;
2157   b->hd           = NULL;
2158   b->ht_size      = 0;
2159   b->ht_flag      = PETSC_FALSE;
2160   b->ht_fact      = 0;
2161   b->ht_total_ct  = 0;
2162   b->ht_insert_ct = 0;
2163 
2164   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2165   b->ijonly = PETSC_FALSE;
2166 
2167   b->in_loc = NULL;
2168   b->v_loc  = NULL;
2169   b->n_loc  = 0;
2170 
2171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2173   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2175 #if defined(PETSC_HAVE_ELEMENTAL)
2176   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2177 #endif
2178 #if defined(PETSC_HAVE_SCALAPACK)
2179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2180 #endif
2181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2183 
2184   B->symmetric                  = PETSC_TRUE;
2185   B->structurally_symmetric     = PETSC_TRUE;
2186   B->symmetric_set              = PETSC_TRUE;
2187   B->structurally_symmetric_set = PETSC_TRUE;
2188   B->symmetric_eternal          = PETSC_TRUE;
2189 #if defined(PETSC_USE_COMPLEX)
2190   B->hermitian                  = PETSC_FALSE;
2191   B->hermitian_set              = PETSC_FALSE;
2192 #else
2193   B->hermitian                  = PETSC_TRUE;
2194   B->hermitian_set              = PETSC_TRUE;
2195 #endif
2196 
2197   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2198   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2199   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2200   if (flg) {
2201     PetscReal fact = 1.39;
2202     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2203     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2204     if (fact <= 1.0) fact = 1.39;
2205     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2206     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2207   }
2208   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 /*MC
2213    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2214 
2215    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2216    and MATMPISBAIJ otherwise.
2217 
2218    Options Database Keys:
2219 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2220 
2221   Level: beginner
2222 
2223 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2224 M*/
2225 
2226 /*@C
2227    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2228    the user should preallocate the matrix storage by setting the parameters
2229    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2230    performance can be increased by more than a factor of 50.
2231 
2232    Collective on Mat
2233 
2234    Input Parameters:
2235 +  B - the matrix
2236 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2237           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2238 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2239            submatrix  (same for all local rows)
2240 .  d_nnz - array containing the number of block nonzeros in the various block rows
2241            in the upper triangular and diagonal part of the in diagonal portion of the local
2242            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2243            for the diagonal entry and set a value even if it is zero.
2244 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2245            submatrix (same for all local rows).
2246 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2247            off-diagonal portion of the local submatrix that is right of the diagonal
2248            (possibly different for each block row) or NULL.
2249 
2250 
2251    Options Database Keys:
2252 +   -mat_no_unroll - uses code that does not unroll the loops in the
2253                      block calculations (much slower)
2254 -   -mat_block_size - size of the blocks to use
2255 
2256    Notes:
2257 
2258    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2259    than it must be used on all processors that share the object for that argument.
2260 
2261    If the *_nnz parameter is given then the *_nz parameter is ignored
2262 
2263    Storage Information:
2264    For a square global matrix we define each processor's diagonal portion
2265    to be its local rows and the corresponding columns (a square submatrix);
2266    each processor's off-diagonal portion encompasses the remainder of the
2267    local matrix (a rectangular submatrix).
2268 
2269    The user can specify preallocated storage for the diagonal part of
2270    the local submatrix with either d_nz or d_nnz (not both).  Set
2271    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2272    memory allocation.  Likewise, specify preallocated storage for the
2273    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2274 
2275    You can call MatGetInfo() to get information on how effective the preallocation was;
2276    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2277    You can also run with the option -info and look for messages with the string
2278    malloc in them to see if additional memory allocation was needed.
2279 
2280    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2281    the figure below we depict these three local rows and all columns (0-11).
2282 
2283 .vb
2284            0 1 2 3 4 5 6 7 8 9 10 11
2285           --------------------------
2286    row 3  |. . . d d d o o o o  o  o
2287    row 4  |. . . d d d o o o o  o  o
2288    row 5  |. . . d d d o o o o  o  o
2289           --------------------------
2290 .ve
2291 
2292    Thus, any entries in the d locations are stored in the d (diagonal)
2293    submatrix, and any entries in the o locations are stored in the
2294    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2295    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2296 
2297    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2298    plus the diagonal part of the d matrix,
2299    and o_nz should indicate the number of block nonzeros per row in the o matrix
2300 
2301    In general, for PDE problems in which most nonzeros are near the diagonal,
2302    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2303    or you will get TERRIBLE performance; see the users' manual chapter on
2304    matrices.
2305 
2306    Level: intermediate
2307 
2308 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2309 @*/
MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])2310 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2311 {
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2316   PetscValidType(B,1);
2317   PetscValidLogicalCollectiveInt(B,bs,2);
2318   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 /*@C
2323    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2324    (block compressed row).  For good matrix assembly performance
2325    the user should preallocate the matrix storage by setting the parameters
2326    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2327    performance can be increased by more than a factor of 50.
2328 
2329    Collective
2330 
2331    Input Parameters:
2332 +  comm - MPI communicator
2333 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2334           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2335 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2336            This value should be the same as the local size used in creating the
2337            y vector for the matrix-vector product y = Ax.
2338 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2339            This value should be the same as the local size used in creating the
2340            x vector for the matrix-vector product y = Ax.
2341 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2342 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2343 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2344            submatrix  (same for all local rows)
2345 .  d_nnz - array containing the number of block nonzeros in the various block rows
2346            in the upper triangular portion of the in diagonal portion of the local
2347            (possibly different for each block block row) or NULL.
2348            If you plan to factor the matrix you must leave room for the diagonal entry and
2349            set its value even if it is zero.
2350 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2351            submatrix (same for all local rows).
2352 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2353            off-diagonal portion of the local submatrix (possibly different for
2354            each block row) or NULL.
2355 
2356    Output Parameter:
2357 .  A - the matrix
2358 
2359    Options Database Keys:
2360 +   -mat_no_unroll - uses code that does not unroll the loops in the
2361                      block calculations (much slower)
2362 .   -mat_block_size - size of the blocks to use
2363 -   -mat_mpi - use the parallel matrix data structures even on one processor
2364                (defaults to using SeqBAIJ format on one processor)
2365 
2366    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2367    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2368    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2369 
2370    Notes:
2371    The number of rows and columns must be divisible by blocksize.
2372    This matrix type does not support complex Hermitian operation.
2373 
2374    The user MUST specify either the local or global matrix dimensions
2375    (possibly both).
2376 
2377    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2378    than it must be used on all processors that share the object for that argument.
2379 
2380    If the *_nnz parameter is given then the *_nz parameter is ignored
2381 
2382    Storage Information:
2383    For a square global matrix we define each processor's diagonal portion
2384    to be its local rows and the corresponding columns (a square submatrix);
2385    each processor's off-diagonal portion encompasses the remainder of the
2386    local matrix (a rectangular submatrix).
2387 
2388    The user can specify preallocated storage for the diagonal part of
2389    the local submatrix with either d_nz or d_nnz (not both).  Set
2390    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2391    memory allocation.  Likewise, specify preallocated storage for the
2392    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2393 
2394    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2395    the figure below we depict these three local rows and all columns (0-11).
2396 
2397 .vb
2398            0 1 2 3 4 5 6 7 8 9 10 11
2399           --------------------------
2400    row 3  |. . . d d d o o o o  o  o
2401    row 4  |. . . d d d o o o o  o  o
2402    row 5  |. . . d d d o o o o  o  o
2403           --------------------------
2404 .ve
2405 
2406    Thus, any entries in the d locations are stored in the d (diagonal)
2407    submatrix, and any entries in the o locations are stored in the
2408    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2409    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2410 
2411    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2412    plus the diagonal part of the d matrix,
2413    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2414    In general, for PDE problems in which most nonzeros are near the diagonal,
2415    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2416    or you will get TERRIBLE performance; see the users' manual chapter on
2417    matrices.
2418 
2419    Level: intermediate
2420 
2421 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2422 @*/
2423 
MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)2424 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2425 {
2426   PetscErrorCode ierr;
2427   PetscMPIInt    size;
2428 
2429   PetscFunctionBegin;
2430   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2431   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2432   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2433   if (size > 1) {
2434     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2435     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2436   } else {
2437     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2438     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2439   }
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 
MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)2444 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2445 {
2446   Mat            mat;
2447   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2448   PetscErrorCode ierr;
2449   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2450   PetscScalar    *array;
2451 
2452   PetscFunctionBegin;
2453   *newmat = NULL;
2454 
2455   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2456   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2457   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2458   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2459   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2460 
2461   mat->factortype   = matin->factortype;
2462   mat->preallocated = PETSC_TRUE;
2463   mat->assembled    = PETSC_TRUE;
2464   mat->insertmode   = NOT_SET_VALUES;
2465 
2466   a      = (Mat_MPISBAIJ*)mat->data;
2467   a->bs2 = oldmat->bs2;
2468   a->mbs = oldmat->mbs;
2469   a->nbs = oldmat->nbs;
2470   a->Mbs = oldmat->Mbs;
2471   a->Nbs = oldmat->Nbs;
2472 
2473   a->size         = oldmat->size;
2474   a->rank         = oldmat->rank;
2475   a->donotstash   = oldmat->donotstash;
2476   a->roworiented  = oldmat->roworiented;
2477   a->rowindices   = NULL;
2478   a->rowvalues    = NULL;
2479   a->getrowactive = PETSC_FALSE;
2480   a->barray       = NULL;
2481   a->rstartbs     = oldmat->rstartbs;
2482   a->rendbs       = oldmat->rendbs;
2483   a->cstartbs     = oldmat->cstartbs;
2484   a->cendbs       = oldmat->cendbs;
2485 
2486   /* hash table stuff */
2487   a->ht           = NULL;
2488   a->hd           = NULL;
2489   a->ht_size      = 0;
2490   a->ht_flag      = oldmat->ht_flag;
2491   a->ht_fact      = oldmat->ht_fact;
2492   a->ht_total_ct  = 0;
2493   a->ht_insert_ct = 0;
2494 
2495   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2496   if (oldmat->colmap) {
2497 #if defined(PETSC_USE_CTABLE)
2498     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2499 #else
2500     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2501     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2502     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2503 #endif
2504   } else a->colmap = NULL;
2505 
2506   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2507     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2508     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2509     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2510   } else a->garray = NULL;
2511 
2512   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2513   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2514   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2515   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2516   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2517 
2518   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2519   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2520   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2521   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2522 
2523   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2524   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2525   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2526   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2527   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2528   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2529   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2530   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2531   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2532   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2533   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2534   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2535   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2536 
2537   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2538   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2539   a->sMvctx = oldmat->sMvctx;
2540   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2541 
2542   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2543   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2544   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2545   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2546   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2547   *newmat = mat;
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2552 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2553 
MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)2554 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2555 {
2556   PetscErrorCode ierr;
2557   PetscBool      isbinary;
2558 
2559   PetscFunctionBegin;
2560   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2561   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2562   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 /*XXXXX@
2567    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2568 
2569    Input Parameters:
2570 .  mat  - the matrix
2571 .  fact - factor
2572 
2573    Not Collective on Mat, each process can have a different hash factor
2574 
2575    Level: advanced
2576 
2577   Notes:
2578    This can also be set by the command line option: -mat_use_hash_table fact
2579 
2580 .seealso: MatSetOption()
2581 @XXXXX*/
2582 
2583 
MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])2584 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2585 {
2586   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2587   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2588   PetscReal      atmp;
2589   PetscReal      *work,*svalues,*rvalues;
2590   PetscErrorCode ierr;
2591   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2592   PetscMPIInt    rank,size;
2593   PetscInt       *rowners_bs,dest,count,source;
2594   PetscScalar    *va;
2595   MatScalar      *ba;
2596   MPI_Status     stat;
2597 
2598   PetscFunctionBegin;
2599   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2600   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2601   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2602 
2603   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2604   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2605 
2606   bs  = A->rmap->bs;
2607   mbs = a->mbs;
2608   Mbs = a->Mbs;
2609   ba  = b->a;
2610   bi  = b->i;
2611   bj  = b->j;
2612 
2613   /* find ownerships */
2614   rowners_bs = A->rmap->range;
2615 
2616   /* each proc creates an array to be distributed */
2617   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2618 
2619   /* row_max for B */
2620   if (rank != size-1) {
2621     for (i=0; i<mbs; i++) {
2622       ncols = bi[1] - bi[0]; bi++;
2623       brow  = bs*i;
2624       for (j=0; j<ncols; j++) {
2625         bcol = bs*(*bj);
2626         for (kcol=0; kcol<bs; kcol++) {
2627           col  = bcol + kcol;                /* local col index */
2628           col += rowners_bs[rank+1];      /* global col index */
2629           for (krow=0; krow<bs; krow++) {
2630             atmp = PetscAbsScalar(*ba); ba++;
2631             row  = brow + krow;   /* local row index */
2632             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2633             if (work[col] < atmp) work[col] = atmp;
2634           }
2635         }
2636         bj++;
2637       }
2638     }
2639 
2640     /* send values to its owners */
2641     for (dest=rank+1; dest<size; dest++) {
2642       svalues = work + rowners_bs[dest];
2643       count   = rowners_bs[dest+1]-rowners_bs[dest];
2644       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2645     }
2646   }
2647 
2648   /* receive values */
2649   if (rank) {
2650     rvalues = work;
2651     count   = rowners_bs[rank+1]-rowners_bs[rank];
2652     for (source=0; source<rank; source++) {
2653       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2654       /* process values */
2655       for (i=0; i<count; i++) {
2656         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2657       }
2658     }
2659   }
2660 
2661   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2662   ierr = PetscFree(work);CHKERRQ(ierr);
2663   PetscFunctionReturn(0);
2664 }
2665 
MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2666 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2667 {
2668   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2669   PetscErrorCode    ierr;
2670   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2671   PetscScalar       *x,*ptr,*from;
2672   Vec               bb1;
2673   const PetscScalar *b;
2674 
2675   PetscFunctionBegin;
2676   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2677   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2678 
2679   if (flag == SOR_APPLY_UPPER) {
2680     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2681     PetscFunctionReturn(0);
2682   }
2683 
2684   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2685     if (flag & SOR_ZERO_INITIAL_GUESS) {
2686       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2687       its--;
2688     }
2689 
2690     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2691     while (its--) {
2692 
2693       /* lower triangular part: slvec0b = - B^T*xx */
2694       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2695 
2696       /* copy xx into slvec0a */
2697       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2698       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2699       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2700       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2701 
2702       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2703 
2704       /* copy bb into slvec1a */
2705       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2706       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2707       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2708       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2709 
2710       /* set slvec1b = 0 */
2711       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2712 
2713       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2715       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2716       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717 
2718       /* upper triangular part: bb1 = bb1 - B*x */
2719       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2720 
2721       /* local diagonal sweep */
2722       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2723     }
2724     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2725   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2726     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2727   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2728     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2729   } else if (flag & SOR_EISENSTAT) {
2730     Vec               xx1;
2731     PetscBool         hasop;
2732     const PetscScalar *diag;
2733     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2734     PetscInt          i,n;
2735 
2736     if (!mat->xx1) {
2737       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2738       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2739     }
2740     xx1 = mat->xx1;
2741     bb1 = mat->bb1;
2742 
2743     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2744 
2745     if (!mat->diag) {
2746       /* this is wrong for same matrix with new nonzero values */
2747       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2748       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2749     }
2750     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2751 
2752     if (hasop) {
2753       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2754       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2755     } else {
2756       /*
2757           These two lines are replaced by code that may be a bit faster for a good compiler
2758       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2759       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2760       */
2761       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2762       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2763       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2764       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2765       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2766       if (omega == 1.0) {
2767         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2768         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2769       } else {
2770         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2771         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2772       }
2773       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2774       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2775       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2776       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2777     }
2778 
2779     /* multiply off-diagonal portion of matrix */
2780     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2781     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2782     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2783     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2784     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2785     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2786     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2787     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2788     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2789     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2790 
2791     /* local sweep */
2792     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2793     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2794   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 /*@
2799      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2800          CSR format the local rows.
2801 
2802    Collective
2803 
2804    Input Parameters:
2805 +  comm - MPI communicator
2806 .  bs - the block size, only a block size of 1 is supported
2807 .  m - number of local rows (Cannot be PETSC_DECIDE)
2808 .  n - This value should be the same as the local size used in creating the
2809        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2810        calculated if N is given) For square matrices n is almost always m.
2811 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2812 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2813 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2814 .   j - column indices
2815 -   a - matrix values
2816 
2817    Output Parameter:
2818 .   mat - the matrix
2819 
2820    Level: intermediate
2821 
2822    Notes:
2823        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2824      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2825      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2826 
2827        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2828 
2829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2830           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2831 @*/
MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)2832 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2833 {
2834   PetscErrorCode ierr;
2835 
2836 
2837   PetscFunctionBegin;
2838   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2839   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2840   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2841   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2842   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2843   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 
2848 /*@C
2849    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2850 
2851    Collective
2852 
2853    Input Parameters:
2854 +  B - the matrix
2855 .  bs - the block size
2856 .  i - the indices into j for the start of each local row (starts with zero)
2857 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2858 -  v - optional values in the matrix
2859 
2860    Level: advanced
2861 
2862    Notes:
2863    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2864    and usually the numerical values as well
2865 
2866    Any entries below the diagonal are ignored
2867 
2868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2869 @*/
MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2870 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2871 {
2872   PetscErrorCode ierr;
2873 
2874   PetscFunctionBegin;
2875   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2876   PetscFunctionReturn(0);
2877 }
2878 
MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)2879 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2880 {
2881   PetscErrorCode ierr;
2882   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2883   PetscInt       *indx;
2884   PetscScalar    *values;
2885 
2886   PetscFunctionBegin;
2887   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2888   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2889     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2890     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2891     PetscInt       *bindx,rmax=a->rmax,j;
2892     PetscMPIInt    rank,size;
2893 
2894     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2895     mbs = m/bs; Nbs = N/cbs;
2896     if (n == PETSC_DECIDE) {
2897       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2898     }
2899     nbs = n/cbs;
2900 
2901     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2902     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2903 
2904     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2905     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2906     if (rank == size-1) {
2907       /* Check sum(nbs) = Nbs */
2908       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2909     }
2910 
2911     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2912     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2913     for (i=0; i<mbs; i++) {
2914       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2915       nnz  = nnz/bs;
2916       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2917       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2918       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2919     }
2920     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2921     ierr = PetscFree(bindx);CHKERRQ(ierr);
2922 
2923     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2924     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2925     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2926     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
2927     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
2928     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2929     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2930   }
2931 
2932   /* numeric phase */
2933   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2934   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2935 
2936   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2937   for (i=0; i<m; i++) {
2938     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2939     Ii   = i + rstart;
2940     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2941     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2942   }
2943   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2944   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948