1
2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13
14 /* This could be moved to matimpl.h */
MatPreallocateWithMats_Private(Mat B,PetscInt nm,Mat X[],PetscBool symm[],PetscBool fill)15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17 Mat preallocator;
18 PetscInt r,rstart,rend;
19 PetscInt bs,i,m,n,M,N;
20 PetscBool cong = PETSC_TRUE;
21 PetscErrorCode ierr;
22
23 PetscFunctionBegin;
24 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25 PetscValidLogicalCollectiveInt(B,nm,2);
26 for (i = 0; i < nm; i++) {
27 PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28 ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29 if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30 }
31 PetscValidLogicalCollectiveBool(B,fill,5);
32 ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33 ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34 ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35 ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36 ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37 ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38 ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39 ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40 ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41 for (r = rstart; r < rend; ++r) {
42 PetscInt ncols;
43 const PetscInt *row;
44 const PetscScalar *vals;
45
46 for (i = 0; i < nm; i++) {
47 ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48 ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49 if (symm && symm[i]) {
50 ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51 }
52 ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53 }
54 }
55 ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56 ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57 ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58 ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59 PetscFunctionReturn(0);
60 }
61
MatConvert_MPISBAIJ_Basic(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64 Mat B;
65 PetscErrorCode ierr;
66 PetscInt r;
67
68 PetscFunctionBegin;
69 if (reuse != MAT_REUSE_MATRIX) {
70 PetscBool symm = PETSC_TRUE,isdense;
71 PetscInt bs;
72
73 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75 ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76 ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77 ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81 if (!isdense) {
82 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83 ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85 } else {
86 ierr = MatSetUp(B);CHKERRQ(ierr);
87 }
88 } else {
89 B = *newmat;
90 ierr = MatZeroEntries(B);CHKERRQ(ierr);
91 }
92
93 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94 for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95 PetscInt ncols;
96 const PetscInt *row;
97 const PetscScalar *vals;
98
99 ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100 ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102 if (A->hermitian) {
103 PetscInt i;
104 for (i = 0; i < ncols; i++) {
105 ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106 }
107 } else {
108 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109 }
110 #else
111 ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113 ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114 }
115 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118
119 if (reuse == MAT_INPLACE_MATRIX) {
120 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121 } else {
122 *newmat = B;
123 }
124 PetscFunctionReturn(0);
125 }
126
MatStoreValues_MPISBAIJ(Mat mat)127 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
128 {
129 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
130 PetscErrorCode ierr;
131
132 PetscFunctionBegin;
133 ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134 ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135 PetscFunctionReturn(0);
136 }
137
MatRetrieveValues_MPISBAIJ(Mat mat)138 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ*)mat->data;
141 PetscErrorCode ierr;
142
143 PetscFunctionBegin;
144 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146 PetscFunctionReturn(0);
147 }
148
149 #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol) \
150 { \
151 brow = row/bs; \
152 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153 rmax = aimax[brow]; nrow = ailen[brow]; \
154 bcol = col/bs; \
155 ridx = row % bs; cidx = col % bs; \
156 low = 0; high = nrow; \
157 while (high-low > 3) { \
158 t = (low+high)/2; \
159 if (rp[t] > bcol) high = t; \
160 else low = t; \
161 } \
162 for (_i=low; _i<high; _i++) { \
163 if (rp[_i] > bcol) break; \
164 if (rp[_i] == bcol) { \
165 bap = ap + bs2*_i + bs*cidx + ridx; \
166 if (addv == ADD_VALUES) *bap += value; \
167 else *bap = value; \
168 goto a_noinsert; \
169 } \
170 } \
171 if (a->nonew == 1) goto a_noinsert; \
172 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174 N = nrow++ - 1; \
175 /* shift up all the later entries in this row */ \
176 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
179 rp[_i] = bcol; \
180 ap[bs2*_i + bs*cidx + ridx] = value; \
181 A->nonzerostate++;\
182 a_noinsert:; \
183 ailen[brow] = nrow; \
184 }
185
186 #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187 { \
188 brow = row/bs; \
189 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190 rmax = bimax[brow]; nrow = bilen[brow]; \
191 bcol = col/bs; \
192 ridx = row % bs; cidx = col % bs; \
193 low = 0; high = nrow; \
194 while (high-low > 3) { \
195 t = (low+high)/2; \
196 if (rp[t] > bcol) high = t; \
197 else low = t; \
198 } \
199 for (_i=low; _i<high; _i++) { \
200 if (rp[_i] > bcol) break; \
201 if (rp[_i] == bcol) { \
202 bap = ap + bs2*_i + bs*cidx + ridx; \
203 if (addv == ADD_VALUES) *bap += value; \
204 else *bap = value; \
205 goto b_noinsert; \
206 } \
207 } \
208 if (b->nonew == 1) goto b_noinsert; \
209 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211 N = nrow++ - 1; \
212 /* shift up all the later entries in this row */ \
213 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216 rp[_i] = bcol; \
217 ap[bs2*_i + bs*cidx + ridx] = value; \
218 B->nonzerostate++;\
219 b_noinsert:; \
220 bilen[brow] = nrow; \
221 }
222
223 /* Only add/insert a(i,j) with i<=j (blocks).
224 Any a(i,j) with i>j input by user is ingored or generates an error
225 */
MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
229 MatScalar value;
230 PetscBool roworiented = baij->roworiented;
231 PetscErrorCode ierr;
232 PetscInt i,j,row,col;
233 PetscInt rstart_orig=mat->rmap->rstart;
234 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
236
237 /* Some Variables required in the macro */
238 Mat A = baij->A;
239 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
240 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241 MatScalar *aa =a->a;
242
243 Mat B = baij->B;
244 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
245 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246 MatScalar *ba =b->a;
247
248 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
249 PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
250 MatScalar *ap,*bap;
251
252 /* for stash */
253 PetscInt n_loc, *in_loc = NULL;
254 MatScalar *v_loc = NULL;
255
256 PetscFunctionBegin;
257 if (!baij->donotstash) {
258 if (n > baij->n_loc) {
259 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261 ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262 ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263
264 baij->n_loc = n;
265 }
266 in_loc = baij->in_loc;
267 v_loc = baij->v_loc;
268 }
269
270 for (i=0; i<m; i++) {
271 if (im[i] < 0) continue;
272 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274 row = im[i] - rstart_orig; /* local row index */
275 for (j=0; j<n; j++) {
276 if (im[i]/bs > in[j]/bs) {
277 if (a->ignore_ltriangular) {
278 continue; /* ignore lower triangular blocks */
279 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280 }
281 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
282 col = in[j] - cstart_orig; /* local col index */
283 brow = row/bs; bcol = col/bs;
284 if (brow > bcol) continue; /* ignore lower triangular blocks of A */
285 if (roworiented) value = v[i*n+j];
286 else value = v[i+j*m];
287 MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288 /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289 } else if (in[j] < 0) continue;
290 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291 else { /* off-diag entry (B) */
292 if (mat->was_assembled) {
293 if (!baij->colmap) {
294 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295 }
296 #if defined(PETSC_USE_CTABLE)
297 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298 col = col - 1;
299 #else
300 col = baij->colmap[in[j]/bs] - 1;
301 #endif
302 if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304 col = in[j];
305 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306 B = baij->B;
307 b = (Mat_SeqBAIJ*)(B)->data;
308 bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309 ba = b->a;
310 } else col += in[j]%bs;
311 } else col = in[j];
312 if (roworiented) value = v[i*n+j];
313 else value = v[i+j*m];
314 MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316 }
317 }
318 } else { /* off processor entry */
319 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320 if (!baij->donotstash) {
321 mat->assembled = PETSC_FALSE;
322 n_loc = 0;
323 for (j=0; j<n; j++) {
324 if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325 in_loc[n_loc] = in[j];
326 if (roworiented) {
327 v_loc[n_loc] = v[i*n+j];
328 } else {
329 v_loc[n_loc] = v[j*m+i];
330 }
331 n_loc++;
332 }
333 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334 }
335 }
336 }
337 PetscFunctionReturn(0);
338 }
339
MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
343 PetscErrorCode ierr;
344 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345 PetscInt *imax =a->imax,*ai=a->i,*ailen=a->ilen;
346 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347 PetscBool roworiented=a->roworiented;
348 const PetscScalar *value = v;
349 MatScalar *ap,*aa = a->a,*bap;
350
351 PetscFunctionBegin;
352 if (col < row) {
353 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355 }
356 rp = aj + ai[row];
357 ap = aa + bs2*ai[row];
358 rmax = imax[row];
359 nrow = ailen[row];
360 value = v;
361 low = 0;
362 high = nrow;
363
364 while (high-low > 7) {
365 t = (low+high)/2;
366 if (rp[t] > col) high = t;
367 else low = t;
368 }
369 for (i=low; i<high; i++) {
370 if (rp[i] > col) break;
371 if (rp[i] == col) {
372 bap = ap + bs2*i;
373 if (roworiented) {
374 if (is == ADD_VALUES) {
375 for (ii=0; ii<bs; ii++) {
376 for (jj=ii; jj<bs2; jj+=bs) {
377 bap[jj] += *value++;
378 }
379 }
380 } else {
381 for (ii=0; ii<bs; ii++) {
382 for (jj=ii; jj<bs2; jj+=bs) {
383 bap[jj] = *value++;
384 }
385 }
386 }
387 } else {
388 if (is == ADD_VALUES) {
389 for (ii=0; ii<bs; ii++) {
390 for (jj=0; jj<bs; jj++) {
391 *bap++ += *value++;
392 }
393 }
394 } else {
395 for (ii=0; ii<bs; ii++) {
396 for (jj=0; jj<bs; jj++) {
397 *bap++ = *value++;
398 }
399 }
400 }
401 }
402 goto noinsert2;
403 }
404 }
405 if (nonew == 1) goto noinsert2;
406 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408 N = nrow++ - 1; high++;
409 /* shift up all the later entries in this row */
410 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412 rp[i] = col;
413 bap = ap + bs2*i;
414 if (roworiented) {
415 for (ii=0; ii<bs; ii++) {
416 for (jj=ii; jj<bs2; jj+=bs) {
417 bap[jj] = *value++;
418 }
419 }
420 } else {
421 for (ii=0; ii<bs; ii++) {
422 for (jj=0; jj<bs; jj++) {
423 *bap++ = *value++;
424 }
425 }
426 }
427 noinsert2:;
428 ailen[row] = nrow;
429 PetscFunctionReturn(0);
430 }
431
432 /*
433 This routine is exactly duplicated in mpibaij.c
434 */
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
438 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440 PetscErrorCode ierr;
441 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442 PetscBool roworiented=a->roworiented;
443 const PetscScalar *value = v;
444 MatScalar *ap,*aa = a->a,*bap;
445
446 PetscFunctionBegin;
447 rp = aj + ai[row];
448 ap = aa + bs2*ai[row];
449 rmax = imax[row];
450 nrow = ailen[row];
451 low = 0;
452 high = nrow;
453 value = v;
454 while (high-low > 7) {
455 t = (low+high)/2;
456 if (rp[t] > col) high = t;
457 else low = t;
458 }
459 for (i=low; i<high; i++) {
460 if (rp[i] > col) break;
461 if (rp[i] == col) {
462 bap = ap + bs2*i;
463 if (roworiented) {
464 if (is == ADD_VALUES) {
465 for (ii=0; ii<bs; ii++) {
466 for (jj=ii; jj<bs2; jj+=bs) {
467 bap[jj] += *value++;
468 }
469 }
470 } else {
471 for (ii=0; ii<bs; ii++) {
472 for (jj=ii; jj<bs2; jj+=bs) {
473 bap[jj] = *value++;
474 }
475 }
476 }
477 } else {
478 if (is == ADD_VALUES) {
479 for (ii=0; ii<bs; ii++,value+=bs) {
480 for (jj=0; jj<bs; jj++) {
481 bap[jj] += value[jj];
482 }
483 bap += bs;
484 }
485 } else {
486 for (ii=0; ii<bs; ii++,value+=bs) {
487 for (jj=0; jj<bs; jj++) {
488 bap[jj] = value[jj];
489 }
490 bap += bs;
491 }
492 }
493 }
494 goto noinsert2;
495 }
496 }
497 if (nonew == 1) goto noinsert2;
498 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500 N = nrow++ - 1; high++;
501 /* shift up all the later entries in this row */
502 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504 rp[i] = col;
505 bap = ap + bs2*i;
506 if (roworiented) {
507 for (ii=0; ii<bs; ii++) {
508 for (jj=ii; jj<bs2; jj+=bs) {
509 bap[jj] = *value++;
510 }
511 }
512 } else {
513 for (ii=0; ii<bs; ii++) {
514 for (jj=0; jj<bs; jj++) {
515 *bap++ = *value++;
516 }
517 }
518 }
519 noinsert2:;
520 ailen[row] = nrow;
521 PetscFunctionReturn(0);
522 }
523
524 /*
525 This routine could be optimized by removing the need for the block copy below and passing stride information
526 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
531 const MatScalar *value;
532 MatScalar *barray =baij->barray;
533 PetscBool roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534 PetscErrorCode ierr;
535 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
536 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538
539 PetscFunctionBegin;
540 if (!barray) {
541 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542 baij->barray = barray;
543 }
544
545 if (roworiented) {
546 stepval = (n-1)*bs;
547 } else {
548 stepval = (m-1)*bs;
549 }
550 for (i=0; i<m; i++) {
551 if (im[i] < 0) continue;
552 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553 if (im[i] >= rstart && im[i] < rend) {
554 row = im[i] - rstart;
555 for (j=0; j<n; j++) {
556 if (im[i] > in[j]) {
557 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559 }
560 /* If NumCol = 1 then a copy is not required */
561 if ((roworiented) && (n == 1)) {
562 barray = (MatScalar*) v + i*bs2;
563 } else if ((!roworiented) && (m == 1)) {
564 barray = (MatScalar*) v + j*bs2;
565 } else { /* Here a copy is required */
566 if (roworiented) {
567 value = v + i*(stepval+bs)*bs + j*bs;
568 } else {
569 value = v + j*(stepval+bs)*bs + i*bs;
570 }
571 for (ii=0; ii<bs; ii++,value+=stepval) {
572 for (jj=0; jj<bs; jj++) {
573 *barray++ = *value++;
574 }
575 }
576 barray -=bs2;
577 }
578
579 if (in[j] >= cstart && in[j] < cend) {
580 col = in[j] - cstart;
581 ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582 } else if (in[j] < 0) continue;
583 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584 else {
585 if (mat->was_assembled) {
586 if (!baij->colmap) {
587 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588 }
589
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592 { PetscInt data;
593 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595 }
596 #else
597 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602 col = (col - 1)/bs;
603 #else
604 col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608 col = in[j];
609 }
610 } else col = in[j];
611 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612 }
613 }
614 } else {
615 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616 if (!baij->donotstash) {
617 if (roworiented) {
618 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619 } else {
620 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621 }
622 }
623 }
624 }
625 PetscFunctionReturn(0);
626 }
627
MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
631 PetscErrorCode ierr;
632 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634
635 PetscFunctionBegin;
636 for (i=0; i<m; i++) {
637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639 if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640 row = idxm[i] - bsrstart;
641 for (j=0; j<n; j++) {
642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644 if (idxn[j] >= bscstart && idxn[j] < bscend) {
645 col = idxn[j] - bscstart;
646 ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647 } else {
648 if (!baij->colmap) {
649 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650 }
651 #if defined(PETSC_USE_CTABLE)
652 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653 data--;
654 #else
655 data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658 else {
659 col = data + idxn[j]%bs;
660 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661 }
662 }
663 }
664 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665 }
666 PetscFunctionReturn(0);
667 }
668
MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal * norm)669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
672 PetscErrorCode ierr;
673 PetscReal sum[2],*lnorm2;
674
675 PetscFunctionBegin;
676 if (baij->size == 1) {
677 ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678 } else {
679 if (type == NORM_FROBENIUS) {
680 ierr = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681 ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */
683 ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684 *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */
685 ierr = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686 *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
687 ierr = PetscFree(lnorm2);CHKERRQ(ierr);
688 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689 Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690 Mat_SeqBAIJ *bmat=(Mat_SeqBAIJ*)baij->B->data;
691 PetscReal *rsum,*rsum2,vabs;
692 PetscInt *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693 PetscInt brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694 MatScalar *v;
695
696 ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697 ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698 /* Amat */
699 v = amat->a; jj = amat->j;
700 for (brow=0; brow<mbs; brow++) {
701 grow = bs*(rstart + brow);
702 nz = amat->i[brow+1] - amat->i[brow];
703 for (bcol=0; bcol<nz; bcol++) {
704 gcol = bs*(rstart + *jj); jj++;
705 for (col=0; col<bs; col++) {
706 for (row=0; row<bs; row++) {
707 vabs = PetscAbsScalar(*v); v++;
708 rsum[gcol+col] += vabs;
709 /* non-diagonal block */
710 if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711 }
712 }
713 }
714 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715 }
716 /* Bmat */
717 v = bmat->a; jj = bmat->j;
718 for (brow=0; brow<mbs; brow++) {
719 grow = bs*(rstart + brow);
720 nz = bmat->i[brow+1] - bmat->i[brow];
721 for (bcol=0; bcol<nz; bcol++) {
722 gcol = bs*garray[*jj]; jj++;
723 for (col=0; col<bs; col++) {
724 for (row=0; row<bs; row++) {
725 vabs = PetscAbsScalar(*v); v++;
726 rsum[gcol+col] += vabs;
727 rsum[grow+row] += vabs;
728 }
729 }
730 }
731 ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732 }
733 ierr = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734 *norm = 0.0;
735 for (col=0; col<mat->cmap->N; col++) {
736 if (rsum2[col] > *norm) *norm = rsum2[col];
737 }
738 ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740 }
741 PetscFunctionReturn(0);
742 }
743
MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
747 PetscErrorCode ierr;
748 PetscInt nstash,reallocs;
749
750 PetscFunctionBegin;
751 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752
753 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759 PetscFunctionReturn(0);
760 }
761
MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764 Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
765 Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)baij->A->data;
766 PetscErrorCode ierr;
767 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
768 PetscInt *row,*col;
769 PetscBool other_disassembled;
770 PetscMPIInt n;
771 PetscBool r1,r2,r3;
772 MatScalar *val;
773
774 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775 PetscFunctionBegin;
776 if (!baij->donotstash && !mat->nooffprocentries) {
777 while (1) {
778 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779 if (!flg) break;
780
781 for (i=0; i<n;) {
782 /* Now identify the consecutive vals belonging to the same row */
783 for (j=i,rstart=row[j]; j<n; j++) {
784 if (row[j] != rstart) break;
785 }
786 if (j < n) ncols = j-i;
787 else ncols = n-i;
788 /* Now assemble all these values with a single function call */
789 ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790 i = j;
791 }
792 }
793 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794 /* Now process the block-stash. Since the values are stashed column-oriented,
795 set the roworiented flag to column oriented, and after MatSetValues()
796 restore the original flags */
797 r1 = baij->roworiented;
798 r2 = a->roworiented;
799 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800
801 baij->roworiented = PETSC_FALSE;
802 a->roworiented = PETSC_FALSE;
803
804 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805 while (1) {
806 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807 if (!flg) break;
808
809 for (i=0; i<n;) {
810 /* Now identify the consecutive vals belonging to the same row */
811 for (j=i,rstart=row[j]; j<n; j++) {
812 if (row[j] != rstart) break;
813 }
814 if (j < n) ncols = j-i;
815 else ncols = n-i;
816 ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817 i = j;
818 }
819 }
820 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821
822 baij->roworiented = r1;
823 a->roworiented = r2;
824
825 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826 }
827
828 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830
831 /* determine if any processor has disassembled, if so we must
832 also disassemble ourselfs, in order that we may reassemble. */
833 /*
834 if nonzero structure of submatrix B cannot change then we know that
835 no processor disassembled thus we can skip this stuff
836 */
837 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839 if (mat->was_assembled && !other_disassembled) {
840 ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841 }
842 }
843
844 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845 ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846 }
847 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849
850 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851
852 baij->rowvalues = NULL;
853
854 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858 }
859 PetscFunctionReturn(0);
860 }
861
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
867 PetscErrorCode ierr;
868 PetscInt bs = mat->rmap->bs;
869 PetscMPIInt rank = baij->rank;
870 PetscBool iascii,isdraw;
871 PetscViewer sviewer;
872 PetscViewerFormat format;
873
874 PetscFunctionBegin;
875 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877 if (iascii) {
878 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880 MatInfo info;
881 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
882 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893 PetscFunctionReturn(0);
894 } else if (format == PETSC_VIEWER_ASCII_INFO) {
895 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr);
896 PetscFunctionReturn(0);
897 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898 PetscFunctionReturn(0);
899 }
900 }
901
902 if (isdraw) {
903 PetscDraw draw;
904 PetscBool isnull;
905 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907 if (isnull) PetscFunctionReturn(0);
908 }
909
910 {
911 /* assemble the entire matrix onto first processor. */
912 Mat A;
913 Mat_SeqSBAIJ *Aloc;
914 Mat_SeqBAIJ *Bloc;
915 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916 MatScalar *a;
917 const char *matname;
918
919 /* Should this be the same type as mat? */
920 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921 if (!rank) {
922 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923 } else {
924 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925 }
926 ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927 ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930
931 /* copy over the A part */
932 Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933 ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
934 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935
936 for (i=0; i<mbs; i++) {
937 rvals[0] = bs*(baij->rstartbs + i);
938 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939 for (j=ai[i]; j<ai[i+1]; j++) {
940 col = (baij->cstartbs+aj[j])*bs;
941 for (k=0; k<bs; k++) {
942 ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943 col++;
944 a += bs;
945 }
946 }
947 }
948 /* copy over the B part */
949 Bloc = (Mat_SeqBAIJ*)baij->B->data;
950 ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
951 for (i=0; i<mbs; i++) {
952
953 rvals[0] = bs*(baij->rstartbs + i);
954 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955 for (j=ai[i]; j<ai[i+1]; j++) {
956 col = baij->garray[aj[j]]*bs;
957 for (k=0; k<bs; k++) {
958 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959 col++;
960 a += bs;
961 }
962 }
963 }
964 ierr = PetscFree(rvals);CHKERRQ(ierr);
965 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967 /*
968 Everyone has to call to draw the matrix since the graphics waits are
969 synchronized across all processors that share the PetscDraw object
970 */
971 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973 if (!rank) {
974 ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975 ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976 }
977 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979 ierr = MatDestroy(&A);CHKERRQ(ierr);
980 }
981 PetscFunctionReturn(0);
982 }
983
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986
MatView_MPISBAIJ(Mat mat,PetscViewer viewer)987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989 PetscErrorCode ierr;
990 PetscBool iascii,isdraw,issocket,isbinary;
991
992 PetscFunctionBegin;
993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997 if (iascii || isdraw || issocket) {
998 ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999 } else if (isbinary) {
1000 ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001 }
1002 PetscFunctionReturn(0);
1003 }
1004
MatDestroy_MPISBAIJ(Mat mat)1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1008 PetscErrorCode ierr;
1009
1010 PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016 ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017 ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021 ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023 ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031 ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033 ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034 ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035 ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036 ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037 ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039 ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041 ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042 ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044 ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045
1046 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1047 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059 PetscFunctionReturn(0);
1060 }
1061
MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1065 PetscErrorCode ierr;
1066 PetscInt mbs=a->mbs,bs=A->rmap->bs;
1067 PetscScalar *from;
1068 const PetscScalar *x;
1069
1070 PetscFunctionBegin;
1071 /* diagonal part */
1072 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074
1075 /* subdiagonal part */
1076 if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1077 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1078
1079 /* copy x into the vec slvec0 */
1080 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1081 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1082
1083 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1084 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1085 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1086
1087 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089 /* supperdiagonal part */
1090 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1091 PetscFunctionReturn(0);
1092 }
1093
MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)1094 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1095 {
1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1097 PetscErrorCode ierr;
1098 PetscInt mbs=a->mbs,bs=A->rmap->bs;
1099 PetscScalar *from;
1100 const PetscScalar *x;
1101
1102 PetscFunctionBegin;
1103 /* diagonal part */
1104 ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1105 ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1106
1107 /* subdiagonal part */
1108 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1109
1110 /* copy x into the vec slvec0 */
1111 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1112 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1113
1114 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1115 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1116 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1117
1118 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120 /* supperdiagonal part */
1121 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1122 PetscFunctionReturn(0);
1123 }
1124
MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)1125 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1128 PetscErrorCode ierr;
1129 PetscInt mbs=a->mbs,bs=A->rmap->bs;
1130 PetscScalar *from,zero=0.0;
1131 const PetscScalar *x;
1132
1133 PetscFunctionBegin;
1134 /* diagonal part */
1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1136 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1137
1138 /* subdiagonal part */
1139 if (!a->B->ops->multhermitiantranspose) SETERRQ1(PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s\n",((PetscObject)a->B)->type_name);
1140 ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1141
1142 /* copy x into the vec slvec0 */
1143 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1144 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1145 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1146 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1147
1148 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1150 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151
1152 /* supperdiagonal part */
1153 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1154 PetscFunctionReturn(0);
1155 }
1156
MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1157 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1160 PetscErrorCode ierr;
1161 PetscInt mbs=a->mbs,bs=A->rmap->bs;
1162 PetscScalar *from,zero=0.0;
1163 const PetscScalar *x;
1164
1165 PetscFunctionBegin;
1166 /* diagonal part */
1167 ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1168 ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1169
1170 /* subdiagonal part */
1171 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1172
1173 /* copy x into the vec slvec0 */
1174 ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1175 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1176 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1177 ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1178
1179 ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1180 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1181 ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182
1183 /* supperdiagonal part */
1184 ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1185 PetscFunctionReturn(0);
1186 }
1187
1188 /*
1189 This only works correctly for square matrices where the subblock A->A is the
1190 diagonal block
1191 */
MatGetDiagonal_MPISBAIJ(Mat A,Vec v)1192 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1193 {
1194 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1195 PetscErrorCode ierr;
1196
1197 PetscFunctionBegin;
1198 /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1199 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1200 PetscFunctionReturn(0);
1201 }
1202
MatScale_MPISBAIJ(Mat A,PetscScalar aa)1203 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1204 {
1205 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1206 PetscErrorCode ierr;
1207
1208 PetscFunctionBegin;
1209 ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1210 ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1211 PetscFunctionReturn(0);
1212 }
1213
MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1214 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1215 {
1216 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1217 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1218 PetscErrorCode ierr;
1219 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1220 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1221 PetscInt *cmap,*idx_p,cstart = mat->rstartbs;
1222
1223 PetscFunctionBegin;
1224 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1225 mat->getrowactive = PETSC_TRUE;
1226
1227 if (!mat->rowvalues && (idx || v)) {
1228 /*
1229 allocate enough space to hold information from the longest row.
1230 */
1231 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1232 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data;
1233 PetscInt max = 1,mbs = mat->mbs,tmp;
1234 for (i=0; i<mbs; i++) {
1235 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1236 if (max < tmp) max = tmp;
1237 }
1238 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1239 }
1240
1241 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1242 lrow = row - brstart; /* local row index */
1243
1244 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1245 if (!v) {pvA = NULL; pvB = NULL;}
1246 if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1247 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249 nztot = nzA + nzB;
1250
1251 cmap = mat->garray;
1252 if (v || idx) {
1253 if (nztot) {
1254 /* Sort by increasing column numbers, assuming A and B already sorted */
1255 PetscInt imark = -1;
1256 if (v) {
1257 *v = v_p = mat->rowvalues;
1258 for (i=0; i<nzB; i++) {
1259 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1260 else break;
1261 }
1262 imark = i;
1263 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1264 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1265 }
1266 if (idx) {
1267 *idx = idx_p = mat->rowindices;
1268 if (imark > -1) {
1269 for (i=0; i<imark; i++) {
1270 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1271 }
1272 } else {
1273 for (i=0; i<nzB; i++) {
1274 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1275 else break;
1276 }
1277 imark = i;
1278 }
1279 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1280 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1281 }
1282 } else {
1283 if (idx) *idx = NULL;
1284 if (v) *v = NULL;
1285 }
1286 }
1287 *nz = nztot;
1288 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290 PetscFunctionReturn(0);
1291 }
1292
MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1293 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1296
1297 PetscFunctionBegin;
1298 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1299 baij->getrowactive = PETSC_FALSE;
1300 PetscFunctionReturn(0);
1301 }
1302
MatGetRowUpperTriangular_MPISBAIJ(Mat A)1303 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1304 {
1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1306 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1307
1308 PetscFunctionBegin;
1309 aA->getrow_utriangular = PETSC_TRUE;
1310 PetscFunctionReturn(0);
1311 }
MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)1312 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1313 {
1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1315 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1316
1317 PetscFunctionBegin;
1318 aA->getrow_utriangular = PETSC_FALSE;
1319 PetscFunctionReturn(0);
1320 }
1321
MatRealPart_MPISBAIJ(Mat A)1322 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1323 {
1324 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1325 PetscErrorCode ierr;
1326
1327 PetscFunctionBegin;
1328 ierr = MatRealPart(a->A);CHKERRQ(ierr);
1329 ierr = MatRealPart(a->B);CHKERRQ(ierr);
1330 PetscFunctionReturn(0);
1331 }
1332
MatImaginaryPart_MPISBAIJ(Mat A)1333 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1334 {
1335 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1336 PetscErrorCode ierr;
1337
1338 PetscFunctionBegin;
1339 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1340 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1341 PetscFunctionReturn(0);
1342 }
1343
1344 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1345 Input: isrow - distributed(parallel),
1346 iscol_local - locally owned (seq)
1347 */
ISEqual_private(IS isrow,IS iscol_local,PetscBool * flg)1348 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool *flg)
1349 {
1350 PetscErrorCode ierr;
1351 PetscInt sz1,sz2,*a1,*a2,i,j,k,nmatch;
1352 const PetscInt *ptr1,*ptr2;
1353
1354 PetscFunctionBegin;
1355 ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1356 ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1357 if (sz1 > sz2) {
1358 *flg = PETSC_FALSE;
1359 PetscFunctionReturn(0);
1360 }
1361
1362 ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1363 ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1364
1365 ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1366 ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1367 ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1368 ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1369 ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1370 ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1371
1372 nmatch=0;
1373 k = 0;
1374 for (i=0; i<sz1; i++){
1375 for (j=k; j<sz2; j++){
1376 if (a1[i] == a2[j]) {
1377 k = j; nmatch++;
1378 break;
1379 }
1380 }
1381 }
1382 ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1383 ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1384 ierr = PetscFree(a1);CHKERRQ(ierr);
1385 ierr = PetscFree(a2);CHKERRQ(ierr);
1386 if (nmatch < sz1) {
1387 *flg = PETSC_FALSE;
1388 } else {
1389 *flg = PETSC_TRUE;
1390 }
1391 PetscFunctionReturn(0);
1392 }
1393
MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1394 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1395 {
1396 PetscErrorCode ierr;
1397 IS iscol_local;
1398 PetscInt csize;
1399 PetscBool isequal;
1400
1401 PetscFunctionBegin;
1402 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1403 if (call == MAT_REUSE_MATRIX) {
1404 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1405 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1406 } else {
1407 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1408 ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1409 if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1410 }
1411
1412 /* now call MatCreateSubMatrix_MPIBAIJ() */
1413 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1414 if (call == MAT_INITIAL_MATRIX) {
1415 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1416 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1417 }
1418 PetscFunctionReturn(0);
1419 }
1420
MatZeroEntries_MPISBAIJ(Mat A)1421 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1422 {
1423 Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1424 PetscErrorCode ierr;
1425
1426 PetscFunctionBegin;
1427 ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1428 ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1429 PetscFunctionReturn(0);
1430 }
1431
MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1432 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1433 {
1434 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1435 Mat A = a->A,B = a->B;
1436 PetscErrorCode ierr;
1437 PetscLogDouble isend[5],irecv[5];
1438
1439 PetscFunctionBegin;
1440 info->block_size = (PetscReal)matin->rmap->bs;
1441
1442 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1443
1444 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1445 isend[3] = info->memory; isend[4] = info->mallocs;
1446
1447 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1448
1449 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1450 isend[3] += info->memory; isend[4] += info->mallocs;
1451 if (flag == MAT_LOCAL) {
1452 info->nz_used = isend[0];
1453 info->nz_allocated = isend[1];
1454 info->nz_unneeded = isend[2];
1455 info->memory = isend[3];
1456 info->mallocs = isend[4];
1457 } else if (flag == MAT_GLOBAL_MAX) {
1458 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1459
1460 info->nz_used = irecv[0];
1461 info->nz_allocated = irecv[1];
1462 info->nz_unneeded = irecv[2];
1463 info->memory = irecv[3];
1464 info->mallocs = irecv[4];
1465 } else if (flag == MAT_GLOBAL_SUM) {
1466 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1467
1468 info->nz_used = irecv[0];
1469 info->nz_allocated = irecv[1];
1470 info->nz_unneeded = irecv[2];
1471 info->memory = irecv[3];
1472 info->mallocs = irecv[4];
1473 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1474 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1475 info->fill_ratio_needed = 0;
1476 info->factor_mallocs = 0;
1477 PetscFunctionReturn(0);
1478 }
1479
MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)1480 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1481 {
1482 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1483 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1484 PetscErrorCode ierr;
1485
1486 PetscFunctionBegin;
1487 switch (op) {
1488 case MAT_NEW_NONZERO_LOCATIONS:
1489 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1490 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1491 case MAT_KEEP_NONZERO_PATTERN:
1492 case MAT_SUBMAT_SINGLEIS:
1493 case MAT_NEW_NONZERO_LOCATION_ERR:
1494 MatCheckPreallocated(A,1);
1495 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1496 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1497 break;
1498 case MAT_ROW_ORIENTED:
1499 MatCheckPreallocated(A,1);
1500 a->roworiented = flg;
1501
1502 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1503 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1504 break;
1505 case MAT_NEW_DIAGONALS:
1506 case MAT_SORTED_FULL:
1507 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1508 break;
1509 case MAT_IGNORE_OFF_PROC_ENTRIES:
1510 a->donotstash = flg;
1511 break;
1512 case MAT_USE_HASH_TABLE:
1513 a->ht_flag = flg;
1514 break;
1515 case MAT_HERMITIAN:
1516 MatCheckPreallocated(A,1);
1517 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1518 #if defined(PETSC_USE_COMPLEX)
1519 if (flg) { /* need different mat-vec ops */
1520 A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1521 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian;
1522 A->ops->multtranspose = NULL;
1523 A->ops->multtransposeadd = NULL;
1524 A->symmetric = PETSC_FALSE;
1525 }
1526 #endif
1527 break;
1528 case MAT_SPD:
1529 case MAT_SYMMETRIC:
1530 MatCheckPreallocated(A,1);
1531 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1532 #if defined(PETSC_USE_COMPLEX)
1533 if (flg) { /* restore to use default mat-vec ops */
1534 A->ops->mult = MatMult_MPISBAIJ;
1535 A->ops->multadd = MatMultAdd_MPISBAIJ;
1536 A->ops->multtranspose = MatMult_MPISBAIJ;
1537 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1538 }
1539 #endif
1540 break;
1541 case MAT_STRUCTURALLY_SYMMETRIC:
1542 MatCheckPreallocated(A,1);
1543 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1544 break;
1545 case MAT_SYMMETRY_ETERNAL:
1546 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1547 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1548 break;
1549 case MAT_IGNORE_LOWER_TRIANGULAR:
1550 aA->ignore_ltriangular = flg;
1551 break;
1552 case MAT_ERROR_LOWER_TRIANGULAR:
1553 aA->ignore_ltriangular = flg;
1554 break;
1555 case MAT_GETROW_UPPERTRIANGULAR:
1556 aA->getrow_utriangular = flg;
1557 break;
1558 default:
1559 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1560 }
1561 PetscFunctionReturn(0);
1562 }
1563
MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat * B)1564 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1565 {
1566 PetscErrorCode ierr;
1567
1568 PetscFunctionBegin;
1569 if (reuse == MAT_INITIAL_MATRIX) {
1570 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1571 } else if (reuse == MAT_REUSE_MATRIX) {
1572 ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1573 }
1574 PetscFunctionReturn(0);
1575 }
1576
MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)1577 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1578 {
1579 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1580 Mat a = baij->A, b=baij->B;
1581 PetscErrorCode ierr;
1582 PetscInt nv,m,n;
1583 PetscBool flg;
1584
1585 PetscFunctionBegin;
1586 if (ll != rr) {
1587 ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1588 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1589 }
1590 if (!ll) PetscFunctionReturn(0);
1591
1592 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1593 if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1594
1595 ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1596 if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1597
1598 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1599
1600 /* left diagonalscale the off-diagonal part */
1601 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1602
1603 /* scale the diagonal part */
1604 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1605
1606 /* right diagonalscale the off-diagonal part */
1607 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1609 PetscFunctionReturn(0);
1610 }
1611
MatSetUnfactored_MPISBAIJ(Mat A)1612 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1613 {
1614 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1615 PetscErrorCode ierr;
1616
1617 PetscFunctionBegin;
1618 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1619 PetscFunctionReturn(0);
1620 }
1621
1622 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1623
MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool * flag)1624 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool *flag)
1625 {
1626 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1627 Mat a,b,c,d;
1628 PetscBool flg;
1629 PetscErrorCode ierr;
1630
1631 PetscFunctionBegin;
1632 a = matA->A; b = matA->B;
1633 c = matB->A; d = matB->B;
1634
1635 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1636 if (flg) {
1637 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1638 }
1639 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1640 PetscFunctionReturn(0);
1641 }
1642
MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)1643 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1644 {
1645 PetscErrorCode ierr;
1646 PetscBool isbaij;
1647
1648 PetscFunctionBegin;
1649 ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1650 if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1651 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1652 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1653 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1654 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1655 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1656 } else {
1657 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1658 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1659
1660 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1661 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1662 }
1663 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1664 PetscFunctionReturn(0);
1665 }
1666
MatSetUp_MPISBAIJ(Mat A)1667 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1668 {
1669 PetscErrorCode ierr;
1670
1671 PetscFunctionBegin;
1672 ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1673 PetscFunctionReturn(0);
1674 }
1675
MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1676 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1677 {
1678 PetscErrorCode ierr;
1679 Mat_MPISBAIJ *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1680 PetscBLASInt bnz,one=1;
1681 Mat_SeqSBAIJ *xa,*ya;
1682 Mat_SeqBAIJ *xb,*yb;
1683
1684 PetscFunctionBegin;
1685 if (str == SAME_NONZERO_PATTERN) {
1686 PetscScalar alpha = a;
1687 xa = (Mat_SeqSBAIJ*)xx->A->data;
1688 ya = (Mat_SeqSBAIJ*)yy->A->data;
1689 ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1690 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1691 xb = (Mat_SeqBAIJ*)xx->B->data;
1692 yb = (Mat_SeqBAIJ*)yy->B->data;
1693 ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1694 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1695 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1696 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1697 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1698 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1699 ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1700 } else {
1701 Mat B;
1702 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1703 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1704 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1705 ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1706 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1707 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1708 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1709 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1710 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1711 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1712 ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1713 ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1714 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1715 ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1716 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1717 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1718 ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1719 ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1720 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1721 ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1722 }
1723 PetscFunctionReturn(0);
1724 }
1725
MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat * B[])1726 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1727 {
1728 PetscErrorCode ierr;
1729 PetscInt i;
1730 PetscBool flg;
1731
1732 PetscFunctionBegin;
1733 ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1734 for (i=0; i<n; i++) {
1735 ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1736 if (!flg) {
1737 ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1738 }
1739 }
1740 PetscFunctionReturn(0);
1741 }
1742
MatShift_MPISBAIJ(Mat Y,PetscScalar a)1743 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1744 {
1745 PetscErrorCode ierr;
1746 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ*)Y->data;
1747 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ*)maij->A->data;
1748
1749 PetscFunctionBegin;
1750 if (!Y->preallocated) {
1751 ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1752 } else if (!aij->nz) {
1753 PetscInt nonew = aij->nonew;
1754 ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1755 aij->nonew = nonew;
1756 }
1757 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1758 PetscFunctionReturn(0);
1759 }
1760
MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool * missing,PetscInt * d)1761 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool *missing,PetscInt *d)
1762 {
1763 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1764 PetscErrorCode ierr;
1765
1766 PetscFunctionBegin;
1767 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1768 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1769 if (d) {
1770 PetscInt rstart;
1771 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1772 *d += rstart/A->rmap->bs;
1773
1774 }
1775 PetscFunctionReturn(0);
1776 }
1777
MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat * a)1778 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1779 {
1780 PetscFunctionBegin;
1781 *a = ((Mat_MPISBAIJ*)A->data)->A;
1782 PetscFunctionReturn(0);
1783 }
1784
1785 /* -------------------------------------------------------------------*/
1786 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1787 MatGetRow_MPISBAIJ,
1788 MatRestoreRow_MPISBAIJ,
1789 MatMult_MPISBAIJ,
1790 /* 4*/ MatMultAdd_MPISBAIJ,
1791 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1792 MatMultAdd_MPISBAIJ,
1793 NULL,
1794 NULL,
1795 NULL,
1796 /* 10*/ NULL,
1797 NULL,
1798 NULL,
1799 MatSOR_MPISBAIJ,
1800 MatTranspose_MPISBAIJ,
1801 /* 15*/ MatGetInfo_MPISBAIJ,
1802 MatEqual_MPISBAIJ,
1803 MatGetDiagonal_MPISBAIJ,
1804 MatDiagonalScale_MPISBAIJ,
1805 MatNorm_MPISBAIJ,
1806 /* 20*/ MatAssemblyBegin_MPISBAIJ,
1807 MatAssemblyEnd_MPISBAIJ,
1808 MatSetOption_MPISBAIJ,
1809 MatZeroEntries_MPISBAIJ,
1810 /* 24*/ NULL,
1811 NULL,
1812 NULL,
1813 NULL,
1814 NULL,
1815 /* 29*/ MatSetUp_MPISBAIJ,
1816 NULL,
1817 NULL,
1818 MatGetDiagonalBlock_MPISBAIJ,
1819 NULL,
1820 /* 34*/ MatDuplicate_MPISBAIJ,
1821 NULL,
1822 NULL,
1823 NULL,
1824 NULL,
1825 /* 39*/ MatAXPY_MPISBAIJ,
1826 MatCreateSubMatrices_MPISBAIJ,
1827 MatIncreaseOverlap_MPISBAIJ,
1828 MatGetValues_MPISBAIJ,
1829 MatCopy_MPISBAIJ,
1830 /* 44*/ NULL,
1831 MatScale_MPISBAIJ,
1832 MatShift_MPISBAIJ,
1833 NULL,
1834 NULL,
1835 /* 49*/ NULL,
1836 NULL,
1837 NULL,
1838 NULL,
1839 NULL,
1840 /* 54*/ NULL,
1841 NULL,
1842 MatSetUnfactored_MPISBAIJ,
1843 NULL,
1844 MatSetValuesBlocked_MPISBAIJ,
1845 /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1846 NULL,
1847 NULL,
1848 NULL,
1849 NULL,
1850 /* 64*/ NULL,
1851 NULL,
1852 NULL,
1853 NULL,
1854 NULL,
1855 /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1856 NULL,
1857 MatConvert_MPISBAIJ_Basic,
1858 NULL,
1859 NULL,
1860 /* 74*/ NULL,
1861 NULL,
1862 NULL,
1863 NULL,
1864 NULL,
1865 /* 79*/ NULL,
1866 NULL,
1867 NULL,
1868 NULL,
1869 MatLoad_MPISBAIJ,
1870 /* 84*/ NULL,
1871 NULL,
1872 NULL,
1873 NULL,
1874 NULL,
1875 /* 89*/ NULL,
1876 NULL,
1877 NULL,
1878 NULL,
1879 NULL,
1880 /* 94*/ NULL,
1881 NULL,
1882 NULL,
1883 NULL,
1884 NULL,
1885 /* 99*/ NULL,
1886 NULL,
1887 NULL,
1888 NULL,
1889 NULL,
1890 /*104*/ NULL,
1891 MatRealPart_MPISBAIJ,
1892 MatImaginaryPart_MPISBAIJ,
1893 MatGetRowUpperTriangular_MPISBAIJ,
1894 MatRestoreRowUpperTriangular_MPISBAIJ,
1895 /*109*/ NULL,
1896 NULL,
1897 NULL,
1898 NULL,
1899 MatMissingDiagonal_MPISBAIJ,
1900 /*114*/ NULL,
1901 NULL,
1902 NULL,
1903 NULL,
1904 NULL,
1905 /*119*/ NULL,
1906 NULL,
1907 NULL,
1908 NULL,
1909 NULL,
1910 /*124*/ NULL,
1911 NULL,
1912 NULL,
1913 NULL,
1914 NULL,
1915 /*129*/ NULL,
1916 NULL,
1917 NULL,
1918 NULL,
1919 NULL,
1920 /*134*/ NULL,
1921 NULL,
1922 NULL,
1923 NULL,
1924 NULL,
1925 /*139*/ MatSetBlockSizes_Default,
1926 NULL,
1927 NULL,
1928 NULL,
1929 NULL,
1930 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1931 };
1932
MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)1933 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1934 {
1935 Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1936 PetscErrorCode ierr;
1937 PetscInt i,mbs,Mbs;
1938 PetscMPIInt size;
1939
1940 PetscFunctionBegin;
1941 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1942 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1943 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1944 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1945 if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1946 if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1947
1948 mbs = B->rmap->n/bs;
1949 Mbs = B->rmap->N/bs;
1950 if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1951
1952 B->rmap->bs = bs;
1953 b->bs2 = bs*bs;
1954 b->mbs = mbs;
1955 b->Mbs = Mbs;
1956 b->nbs = B->cmap->n/bs;
1957 b->Nbs = B->cmap->N/bs;
1958
1959 for (i=0; i<=b->size; i++) {
1960 b->rangebs[i] = B->rmap->range[i]/bs;
1961 }
1962 b->rstartbs = B->rmap->rstart/bs;
1963 b->rendbs = B->rmap->rend/bs;
1964
1965 b->cstartbs = B->cmap->rstart/bs;
1966 b->cendbs = B->cmap->rend/bs;
1967
1968 #if defined(PETSC_USE_CTABLE)
1969 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
1970 #else
1971 ierr = PetscFree(b->colmap);CHKERRQ(ierr);
1972 #endif
1973 ierr = PetscFree(b->garray);CHKERRQ(ierr);
1974 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
1975 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
1976 ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
1977 ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
1978 ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
1979 ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
1980 ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
1981 ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
1982
1983 /* Because the B will have been resized we simply destroy it and create a new one each time */
1984 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
1985 ierr = MatDestroy(&b->B);CHKERRQ(ierr);
1986 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1987 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
1988 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1989 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1990
1991 if (!B->preallocated) {
1992 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1993 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1994 ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1995 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1996 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1997 }
1998
1999 ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2000 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2001
2002 B->preallocated = PETSC_TRUE;
2003 B->was_assembled = PETSC_FALSE;
2004 B->assembled = PETSC_FALSE;
2005 PetscFunctionReturn(0);
2006 }
2007
MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2008 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2009 {
2010 PetscInt m,rstart,cend;
2011 PetscInt i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2012 const PetscInt *JJ =NULL;
2013 PetscScalar *values=NULL;
2014 PetscBool roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2015 PetscErrorCode ierr;
2016 PetscBool nooffprocentries;
2017
2018 PetscFunctionBegin;
2019 if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2020 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2021 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2022 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2023 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2024 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2025 m = B->rmap->n/bs;
2026 rstart = B->rmap->rstart/bs;
2027 cend = B->cmap->rend/bs;
2028
2029 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2030 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2031 for (i=0; i<m; i++) {
2032 nz = ii[i+1] - ii[i];
2033 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2034 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2035 JJ = jj + ii[i];
2036 bd = 0;
2037 for (j=0; j<nz; j++) {
2038 if (*JJ >= i + rstart) break;
2039 JJ++;
2040 bd++;
2041 }
2042 d = 0;
2043 for (; j<nz; j++) {
2044 if (*JJ++ >= cend) break;
2045 d++;
2046 }
2047 d_nnz[i] = d;
2048 o_nnz[i] = nz - d - bd;
2049 nz = nz - bd;
2050 nz_max = PetscMax(nz_max,nz);
2051 }
2052 ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2053 ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2054 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2055
2056 values = (PetscScalar*)V;
2057 if (!values) {
2058 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2059 }
2060 for (i=0; i<m; i++) {
2061 PetscInt row = i + rstart;
2062 PetscInt ncols = ii[i+1] - ii[i];
2063 const PetscInt *icols = jj + ii[i];
2064 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2065 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2066 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2067 } else { /* block ordering does not match so we can only insert one block at a time. */
2068 PetscInt j;
2069 for (j=0; j<ncols; j++) {
2070 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2071 ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2072 }
2073 }
2074 }
2075
2076 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2077 nooffprocentries = B->nooffprocentries;
2078 B->nooffprocentries = PETSC_TRUE;
2079 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081 B->nooffprocentries = nooffprocentries;
2082
2083 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2084 PetscFunctionReturn(0);
2085 }
2086
2087 /*MC
2088 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2089 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2090 the matrix is stored.
2091
2092 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2093 can call MatSetOption(Mat, MAT_HERMITIAN);
2094
2095 Options Database Keys:
2096 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2097
2098 Notes:
2099 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2100 diagonal portion of the matrix of each process has to less than or equal the number of columns.
2101
2102 Level: beginner
2103
2104 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2105 M*/
2106
MatCreate_MPISBAIJ(Mat B)2107 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2108 {
2109 Mat_MPISBAIJ *b;
2110 PetscErrorCode ierr;
2111 PetscBool flg = PETSC_FALSE;
2112
2113 PetscFunctionBegin;
2114 ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
2115 B->data = (void*)b;
2116 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2117
2118 B->ops->destroy = MatDestroy_MPISBAIJ;
2119 B->ops->view = MatView_MPISBAIJ;
2120 B->assembled = PETSC_FALSE;
2121 B->insertmode = NOT_SET_VALUES;
2122
2123 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2124 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2125
2126 /* build local table of row and column ownerships */
2127 ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2128
2129 /* build cache for off array entries formed */
2130 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2131
2132 b->donotstash = PETSC_FALSE;
2133 b->colmap = NULL;
2134 b->garray = NULL;
2135 b->roworiented = PETSC_TRUE;
2136
2137 /* stuff used in block assembly */
2138 b->barray = NULL;
2139
2140 /* stuff used for matrix vector multiply */
2141 b->lvec = NULL;
2142 b->Mvctx = NULL;
2143 b->slvec0 = NULL;
2144 b->slvec0b = NULL;
2145 b->slvec1 = NULL;
2146 b->slvec1a = NULL;
2147 b->slvec1b = NULL;
2148 b->sMvctx = NULL;
2149
2150 /* stuff for MatGetRow() */
2151 b->rowindices = NULL;
2152 b->rowvalues = NULL;
2153 b->getrowactive = PETSC_FALSE;
2154
2155 /* hash table stuff */
2156 b->ht = NULL;
2157 b->hd = NULL;
2158 b->ht_size = 0;
2159 b->ht_flag = PETSC_FALSE;
2160 b->ht_fact = 0;
2161 b->ht_total_ct = 0;
2162 b->ht_insert_ct = 0;
2163
2164 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2165 b->ijonly = PETSC_FALSE;
2166
2167 b->in_loc = NULL;
2168 b->v_loc = NULL;
2169 b->n_loc = 0;
2170
2171 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2172 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2173 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2174 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2175 #if defined(PETSC_HAVE_ELEMENTAL)
2176 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2177 #endif
2178 #if defined(PETSC_HAVE_SCALAPACK)
2179 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2180 #endif
2181 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2182 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2183
2184 B->symmetric = PETSC_TRUE;
2185 B->structurally_symmetric = PETSC_TRUE;
2186 B->symmetric_set = PETSC_TRUE;
2187 B->structurally_symmetric_set = PETSC_TRUE;
2188 B->symmetric_eternal = PETSC_TRUE;
2189 #if defined(PETSC_USE_COMPLEX)
2190 B->hermitian = PETSC_FALSE;
2191 B->hermitian_set = PETSC_FALSE;
2192 #else
2193 B->hermitian = PETSC_TRUE;
2194 B->hermitian_set = PETSC_TRUE;
2195 #endif
2196
2197 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2198 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2199 ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2200 if (flg) {
2201 PetscReal fact = 1.39;
2202 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2203 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2204 if (fact <= 1.0) fact = 1.39;
2205 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2206 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2207 }
2208 ierr = PetscOptionsEnd();CHKERRQ(ierr);
2209 PetscFunctionReturn(0);
2210 }
2211
2212 /*MC
2213 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2214
2215 This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2216 and MATMPISBAIJ otherwise.
2217
2218 Options Database Keys:
2219 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2220
2221 Level: beginner
2222
2223 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2224 M*/
2225
2226 /*@C
2227 MatMPISBAIJSetPreallocation - For good matrix assembly performance
2228 the user should preallocate the matrix storage by setting the parameters
2229 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2230 performance can be increased by more than a factor of 50.
2231
2232 Collective on Mat
2233
2234 Input Parameters:
2235 + B - the matrix
2236 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2237 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2238 . d_nz - number of block nonzeros per block row in diagonal portion of local
2239 submatrix (same for all local rows)
2240 . d_nnz - array containing the number of block nonzeros in the various block rows
2241 in the upper triangular and diagonal part of the in diagonal portion of the local
2242 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room
2243 for the diagonal entry and set a value even if it is zero.
2244 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2245 submatrix (same for all local rows).
2246 - o_nnz - array containing the number of nonzeros in the various block rows of the
2247 off-diagonal portion of the local submatrix that is right of the diagonal
2248 (possibly different for each block row) or NULL.
2249
2250
2251 Options Database Keys:
2252 + -mat_no_unroll - uses code that does not unroll the loops in the
2253 block calculations (much slower)
2254 - -mat_block_size - size of the blocks to use
2255
2256 Notes:
2257
2258 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2259 than it must be used on all processors that share the object for that argument.
2260
2261 If the *_nnz parameter is given then the *_nz parameter is ignored
2262
2263 Storage Information:
2264 For a square global matrix we define each processor's diagonal portion
2265 to be its local rows and the corresponding columns (a square submatrix);
2266 each processor's off-diagonal portion encompasses the remainder of the
2267 local matrix (a rectangular submatrix).
2268
2269 The user can specify preallocated storage for the diagonal part of
2270 the local submatrix with either d_nz or d_nnz (not both). Set
2271 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2272 memory allocation. Likewise, specify preallocated storage for the
2273 off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2274
2275 You can call MatGetInfo() to get information on how effective the preallocation was;
2276 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2277 You can also run with the option -info and look for messages with the string
2278 malloc in them to see if additional memory allocation was needed.
2279
2280 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2281 the figure below we depict these three local rows and all columns (0-11).
2282
2283 .vb
2284 0 1 2 3 4 5 6 7 8 9 10 11
2285 --------------------------
2286 row 3 |. . . d d d o o o o o o
2287 row 4 |. . . d d d o o o o o o
2288 row 5 |. . . d d d o o o o o o
2289 --------------------------
2290 .ve
2291
2292 Thus, any entries in the d locations are stored in the d (diagonal)
2293 submatrix, and any entries in the o locations are stored in the
2294 o (off-diagonal) submatrix. Note that the d matrix is stored in
2295 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2296
2297 Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2298 plus the diagonal part of the d matrix,
2299 and o_nz should indicate the number of block nonzeros per row in the o matrix
2300
2301 In general, for PDE problems in which most nonzeros are near the diagonal,
2302 one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2303 or you will get TERRIBLE performance; see the users' manual chapter on
2304 matrices.
2305
2306 Level: intermediate
2307
2308 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2309 @*/
MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])2310 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2311 {
2312 PetscErrorCode ierr;
2313
2314 PetscFunctionBegin;
2315 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2316 PetscValidType(B,1);
2317 PetscValidLogicalCollectiveInt(B,bs,2);
2318 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2319 PetscFunctionReturn(0);
2320 }
2321
2322 /*@C
2323 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2324 (block compressed row). For good matrix assembly performance
2325 the user should preallocate the matrix storage by setting the parameters
2326 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2327 performance can be increased by more than a factor of 50.
2328
2329 Collective
2330
2331 Input Parameters:
2332 + comm - MPI communicator
2333 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2334 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2335 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2336 This value should be the same as the local size used in creating the
2337 y vector for the matrix-vector product y = Ax.
2338 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2339 This value should be the same as the local size used in creating the
2340 x vector for the matrix-vector product y = Ax.
2341 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2342 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2343 . d_nz - number of block nonzeros per block row in diagonal portion of local
2344 submatrix (same for all local rows)
2345 . d_nnz - array containing the number of block nonzeros in the various block rows
2346 in the upper triangular portion of the in diagonal portion of the local
2347 (possibly different for each block block row) or NULL.
2348 If you plan to factor the matrix you must leave room for the diagonal entry and
2349 set its value even if it is zero.
2350 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2351 submatrix (same for all local rows).
2352 - o_nnz - array containing the number of nonzeros in the various block rows of the
2353 off-diagonal portion of the local submatrix (possibly different for
2354 each block row) or NULL.
2355
2356 Output Parameter:
2357 . A - the matrix
2358
2359 Options Database Keys:
2360 + -mat_no_unroll - uses code that does not unroll the loops in the
2361 block calculations (much slower)
2362 . -mat_block_size - size of the blocks to use
2363 - -mat_mpi - use the parallel matrix data structures even on one processor
2364 (defaults to using SeqBAIJ format on one processor)
2365
2366 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2367 MatXXXXSetPreallocation() paradigm instead of this routine directly.
2368 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2369
2370 Notes:
2371 The number of rows and columns must be divisible by blocksize.
2372 This matrix type does not support complex Hermitian operation.
2373
2374 The user MUST specify either the local or global matrix dimensions
2375 (possibly both).
2376
2377 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2378 than it must be used on all processors that share the object for that argument.
2379
2380 If the *_nnz parameter is given then the *_nz parameter is ignored
2381
2382 Storage Information:
2383 For a square global matrix we define each processor's diagonal portion
2384 to be its local rows and the corresponding columns (a square submatrix);
2385 each processor's off-diagonal portion encompasses the remainder of the
2386 local matrix (a rectangular submatrix).
2387
2388 The user can specify preallocated storage for the diagonal part of
2389 the local submatrix with either d_nz or d_nnz (not both). Set
2390 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2391 memory allocation. Likewise, specify preallocated storage for the
2392 off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2393
2394 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2395 the figure below we depict these three local rows and all columns (0-11).
2396
2397 .vb
2398 0 1 2 3 4 5 6 7 8 9 10 11
2399 --------------------------
2400 row 3 |. . . d d d o o o o o o
2401 row 4 |. . . d d d o o o o o o
2402 row 5 |. . . d d d o o o o o o
2403 --------------------------
2404 .ve
2405
2406 Thus, any entries in the d locations are stored in the d (diagonal)
2407 submatrix, and any entries in the o locations are stored in the
2408 o (off-diagonal) submatrix. Note that the d matrix is stored in
2409 MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2410
2411 Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2412 plus the diagonal part of the d matrix,
2413 and o_nz should indicate the number of block nonzeros per row in the o matrix.
2414 In general, for PDE problems in which most nonzeros are near the diagonal,
2415 one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2416 or you will get TERRIBLE performance; see the users' manual chapter on
2417 matrices.
2418
2419 Level: intermediate
2420
2421 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2422 @*/
2423
MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)2424 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2425 {
2426 PetscErrorCode ierr;
2427 PetscMPIInt size;
2428
2429 PetscFunctionBegin;
2430 ierr = MatCreate(comm,A);CHKERRQ(ierr);
2431 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2432 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2433 if (size > 1) {
2434 ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2435 ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2436 } else {
2437 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2438 ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2439 }
2440 PetscFunctionReturn(0);
2441 }
2442
2443
MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)2444 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2445 {
2446 Mat mat;
2447 Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2448 PetscErrorCode ierr;
2449 PetscInt len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2450 PetscScalar *array;
2451
2452 PetscFunctionBegin;
2453 *newmat = NULL;
2454
2455 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2456 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2457 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2458 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2459 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2460
2461 mat->factortype = matin->factortype;
2462 mat->preallocated = PETSC_TRUE;
2463 mat->assembled = PETSC_TRUE;
2464 mat->insertmode = NOT_SET_VALUES;
2465
2466 a = (Mat_MPISBAIJ*)mat->data;
2467 a->bs2 = oldmat->bs2;
2468 a->mbs = oldmat->mbs;
2469 a->nbs = oldmat->nbs;
2470 a->Mbs = oldmat->Mbs;
2471 a->Nbs = oldmat->Nbs;
2472
2473 a->size = oldmat->size;
2474 a->rank = oldmat->rank;
2475 a->donotstash = oldmat->donotstash;
2476 a->roworiented = oldmat->roworiented;
2477 a->rowindices = NULL;
2478 a->rowvalues = NULL;
2479 a->getrowactive = PETSC_FALSE;
2480 a->barray = NULL;
2481 a->rstartbs = oldmat->rstartbs;
2482 a->rendbs = oldmat->rendbs;
2483 a->cstartbs = oldmat->cstartbs;
2484 a->cendbs = oldmat->cendbs;
2485
2486 /* hash table stuff */
2487 a->ht = NULL;
2488 a->hd = NULL;
2489 a->ht_size = 0;
2490 a->ht_flag = oldmat->ht_flag;
2491 a->ht_fact = oldmat->ht_fact;
2492 a->ht_total_ct = 0;
2493 a->ht_insert_ct = 0;
2494
2495 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2496 if (oldmat->colmap) {
2497 #if defined(PETSC_USE_CTABLE)
2498 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2499 #else
2500 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2501 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2502 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2503 #endif
2504 } else a->colmap = NULL;
2505
2506 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2507 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2508 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2509 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2510 } else a->garray = NULL;
2511
2512 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2513 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2514 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2515 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2516 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2517
2518 ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2519 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2520 ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2521 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2522
2523 ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2524 ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2525 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2526 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2527 ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2528 ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2529 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2530 ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2531 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2532 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2533 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2534 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2535 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2536
2537 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2538 ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2539 a->sMvctx = oldmat->sMvctx;
2540 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2541
2542 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2543 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2544 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2545 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2546 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2547 *newmat = mat;
2548 PetscFunctionReturn(0);
2549 }
2550
2551 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2552 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2553
MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)2554 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2555 {
2556 PetscErrorCode ierr;
2557 PetscBool isbinary;
2558
2559 PetscFunctionBegin;
2560 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2561 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2562 ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2563 PetscFunctionReturn(0);
2564 }
2565
2566 /*XXXXX@
2567 MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2568
2569 Input Parameters:
2570 . mat - the matrix
2571 . fact - factor
2572
2573 Not Collective on Mat, each process can have a different hash factor
2574
2575 Level: advanced
2576
2577 Notes:
2578 This can also be set by the command line option: -mat_use_hash_table fact
2579
2580 .seealso: MatSetOption()
2581 @XXXXX*/
2582
2583
MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])2584 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2585 {
2586 Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2587 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(a->B)->data;
2588 PetscReal atmp;
2589 PetscReal *work,*svalues,*rvalues;
2590 PetscErrorCode ierr;
2591 PetscInt i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2592 PetscMPIInt rank,size;
2593 PetscInt *rowners_bs,dest,count,source;
2594 PetscScalar *va;
2595 MatScalar *ba;
2596 MPI_Status stat;
2597
2598 PetscFunctionBegin;
2599 if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2600 ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2601 ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2602
2603 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2604 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2605
2606 bs = A->rmap->bs;
2607 mbs = a->mbs;
2608 Mbs = a->Mbs;
2609 ba = b->a;
2610 bi = b->i;
2611 bj = b->j;
2612
2613 /* find ownerships */
2614 rowners_bs = A->rmap->range;
2615
2616 /* each proc creates an array to be distributed */
2617 ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2618
2619 /* row_max for B */
2620 if (rank != size-1) {
2621 for (i=0; i<mbs; i++) {
2622 ncols = bi[1] - bi[0]; bi++;
2623 brow = bs*i;
2624 for (j=0; j<ncols; j++) {
2625 bcol = bs*(*bj);
2626 for (kcol=0; kcol<bs; kcol++) {
2627 col = bcol + kcol; /* local col index */
2628 col += rowners_bs[rank+1]; /* global col index */
2629 for (krow=0; krow<bs; krow++) {
2630 atmp = PetscAbsScalar(*ba); ba++;
2631 row = brow + krow; /* local row index */
2632 if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2633 if (work[col] < atmp) work[col] = atmp;
2634 }
2635 }
2636 bj++;
2637 }
2638 }
2639
2640 /* send values to its owners */
2641 for (dest=rank+1; dest<size; dest++) {
2642 svalues = work + rowners_bs[dest];
2643 count = rowners_bs[dest+1]-rowners_bs[dest];
2644 ierr = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2645 }
2646 }
2647
2648 /* receive values */
2649 if (rank) {
2650 rvalues = work;
2651 count = rowners_bs[rank+1]-rowners_bs[rank];
2652 for (source=0; source<rank; source++) {
2653 ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2654 /* process values */
2655 for (i=0; i<count; i++) {
2656 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2657 }
2658 }
2659 }
2660
2661 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2662 ierr = PetscFree(work);CHKERRQ(ierr);
2663 PetscFunctionReturn(0);
2664 }
2665
MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2666 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2667 {
2668 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
2669 PetscErrorCode ierr;
2670 PetscInt mbs=mat->mbs,bs=matin->rmap->bs;
2671 PetscScalar *x,*ptr,*from;
2672 Vec bb1;
2673 const PetscScalar *b;
2674
2675 PetscFunctionBegin;
2676 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2677 if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2678
2679 if (flag == SOR_APPLY_UPPER) {
2680 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2681 PetscFunctionReturn(0);
2682 }
2683
2684 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2685 if (flag & SOR_ZERO_INITIAL_GUESS) {
2686 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2687 its--;
2688 }
2689
2690 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2691 while (its--) {
2692
2693 /* lower triangular part: slvec0b = - B^T*xx */
2694 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2695
2696 /* copy xx into slvec0a */
2697 ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2698 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2699 ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2700 ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2701
2702 ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2703
2704 /* copy bb into slvec1a */
2705 ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2706 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2707 ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2708 ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2709
2710 /* set slvec1b = 0 */
2711 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2712
2713 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2715 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2716 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717
2718 /* upper triangular part: bb1 = bb1 - B*x */
2719 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2720
2721 /* local diagonal sweep */
2722 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2723 }
2724 ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2725 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2726 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2727 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2728 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2729 } else if (flag & SOR_EISENSTAT) {
2730 Vec xx1;
2731 PetscBool hasop;
2732 const PetscScalar *diag;
2733 PetscScalar *sl,scale = (omega - 2.0)/omega;
2734 PetscInt i,n;
2735
2736 if (!mat->xx1) {
2737 ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2738 ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2739 }
2740 xx1 = mat->xx1;
2741 bb1 = mat->bb1;
2742
2743 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2744
2745 if (!mat->diag) {
2746 /* this is wrong for same matrix with new nonzero values */
2747 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2748 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2749 }
2750 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2751
2752 if (hasop) {
2753 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2754 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2755 } else {
2756 /*
2757 These two lines are replaced by code that may be a bit faster for a good compiler
2758 ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2759 ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2760 */
2761 ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2762 ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2763 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2764 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2765 ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2766 if (omega == 1.0) {
2767 for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2768 ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2769 } else {
2770 for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2771 ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2772 }
2773 ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2774 ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2775 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2776 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2777 }
2778
2779 /* multiply off-diagonal portion of matrix */
2780 ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2781 ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2782 ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2783 ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2784 ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2785 ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2786 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2787 ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2788 ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2789 ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2790
2791 /* local sweep */
2792 ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2793 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2795 PetscFunctionReturn(0);
2796 }
2797
2798 /*@
2799 MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2800 CSR format the local rows.
2801
2802 Collective
2803
2804 Input Parameters:
2805 + comm - MPI communicator
2806 . bs - the block size, only a block size of 1 is supported
2807 . m - number of local rows (Cannot be PETSC_DECIDE)
2808 . n - This value should be the same as the local size used in creating the
2809 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2810 calculated if N is given) For square matrices n is almost always m.
2811 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2812 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2813 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2814 . j - column indices
2815 - a - matrix values
2816
2817 Output Parameter:
2818 . mat - the matrix
2819
2820 Level: intermediate
2821
2822 Notes:
2823 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2824 thus you CANNOT change the matrix entries by changing the values of a[] after you have
2825 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2826
2827 The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2828
2829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2830 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2831 @*/
MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)2832 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2833 {
2834 PetscErrorCode ierr;
2835
2836
2837 PetscFunctionBegin;
2838 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2839 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2840 ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2841 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2842 ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2843 ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2844 PetscFunctionReturn(0);
2845 }
2846
2847
2848 /*@C
2849 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2850
2851 Collective
2852
2853 Input Parameters:
2854 + B - the matrix
2855 . bs - the block size
2856 . i - the indices into j for the start of each local row (starts with zero)
2857 . j - the column indices for each local row (starts with zero) these must be sorted for each row
2858 - v - optional values in the matrix
2859
2860 Level: advanced
2861
2862 Notes:
2863 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2864 and usually the numerical values as well
2865
2866 Any entries below the diagonal are ignored
2867
2868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2869 @*/
MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2870 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2871 {
2872 PetscErrorCode ierr;
2873
2874 PetscFunctionBegin;
2875 ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2876 PetscFunctionReturn(0);
2877 }
2878
MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)2879 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2880 {
2881 PetscErrorCode ierr;
2882 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
2883 PetscInt *indx;
2884 PetscScalar *values;
2885
2886 PetscFunctionBegin;
2887 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2888 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2889 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inmat->data;
2890 PetscInt *dnz,*onz,mbs,Nbs,nbs;
2891 PetscInt *bindx,rmax=a->rmax,j;
2892 PetscMPIInt rank,size;
2893
2894 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2895 mbs = m/bs; Nbs = N/cbs;
2896 if (n == PETSC_DECIDE) {
2897 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2898 }
2899 nbs = n/cbs;
2900
2901 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2902 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2903
2904 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2905 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2906 if (rank == size-1) {
2907 /* Check sum(nbs) = Nbs */
2908 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2909 }
2910
2911 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2912 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2913 for (i=0; i<mbs; i++) {
2914 ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2915 nnz = nnz/bs;
2916 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2917 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2918 ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2919 }
2920 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2921 ierr = PetscFree(bindx);CHKERRQ(ierr);
2922
2923 ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2924 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2925 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2926 ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
2927 ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
2928 ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2929 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2930 }
2931
2932 /* numeric phase */
2933 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2934 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2935
2936 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2937 for (i=0; i<m; i++) {
2938 ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2939 Ii = i + rstart;
2940 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2941 ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2942 }
2943 ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2944 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946 PetscFunctionReturn(0);
2947 }
2948