1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10
MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
14 PetscErrorCode ierr;
15 PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs;
16 PetscScalar *va,*vv;
17 Vec vB,vA;
18 const PetscScalar *vb;
19
20 PetscFunctionBegin;
21 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
22 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
23
24 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
25 if (idx) {
26 for (i=0; i<m; i++) {
27 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
28 }
29 }
30
31 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
32 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
33 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
34
35 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
36 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
37 for (i=0; i<m; i++) {
38 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
39 vv[i] = vb[i];
40 if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
41 } else {
42 vv[i] = va[i];
43 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs))
44 idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs);
45 }
46 }
47 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
48 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
49 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
50 ierr = PetscFree(idxb);CHKERRQ(ierr);
51 ierr = VecDestroy(&vA);CHKERRQ(ierr);
52 ierr = VecDestroy(&vB);CHKERRQ(ierr);
53 PetscFunctionReturn(0);
54 }
55
MatStoreValues_MPIBAIJ(Mat mat)56 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
57 {
58 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
59 PetscErrorCode ierr;
60
61 PetscFunctionBegin;
62 ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
63 ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
64 PetscFunctionReturn(0);
65 }
66
MatRetrieveValues_MPIBAIJ(Mat mat)67 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
70 PetscErrorCode ierr;
71
72 PetscFunctionBegin;
73 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75 PetscFunctionReturn(0);
76 }
77
78 /*
79 Local utility routine that creates a mapping from the global column
80 number to the local number in the off-diagonal part of the local
81 storage of the matrix. This is done in a non scalable way since the
82 length of colmap equals the global matrix length.
83 */
MatCreateColmap_MPIBAIJ_Private(Mat mat)84 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
85 {
86 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
87 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
88 PetscErrorCode ierr;
89 PetscInt nbs = B->nbs,i,bs=mat->rmap->bs;
90
91 PetscFunctionBegin;
92 #if defined(PETSC_USE_CTABLE)
93 ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
94 for (i=0; i<nbs; i++) {
95 ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
96 }
97 #else
98 ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
99 ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
100 for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
101 #endif
102 PetscFunctionReturn(0);
103 }
104
105 #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \
106 { \
107 brow = row/bs; \
108 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109 rmax = aimax[brow]; nrow = ailen[brow]; \
110 bcol = col/bs; \
111 ridx = row % bs; cidx = col % bs; \
112 low = 0; high = nrow; \
113 while (high-low > 3) { \
114 t = (low+high)/2; \
115 if (rp[t] > bcol) high = t; \
116 else low = t; \
117 } \
118 for (_i=low; _i<high; _i++) { \
119 if (rp[_i] > bcol) break; \
120 if (rp[_i] == bcol) { \
121 bap = ap + bs2*_i + bs*cidx + ridx; \
122 if (addv == ADD_VALUES) *bap += value; \
123 else *bap = value; \
124 goto a_noinsert; \
125 } \
126 } \
127 if (a->nonew == 1) goto a_noinsert; \
128 if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
129 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
130 N = nrow++ - 1; \
131 /* shift up all the later entries in this row */ \
132 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
133 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
134 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
135 rp[_i] = bcol; \
136 ap[bs2*_i + bs*cidx + ridx] = value; \
137 a_noinsert:; \
138 ailen[brow] = nrow; \
139 }
140
141 #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \
142 { \
143 brow = row/bs; \
144 rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
145 rmax = bimax[brow]; nrow = bilen[brow]; \
146 bcol = col/bs; \
147 ridx = row % bs; cidx = col % bs; \
148 low = 0; high = nrow; \
149 while (high-low > 3) { \
150 t = (low+high)/2; \
151 if (rp[t] > bcol) high = t; \
152 else low = t; \
153 } \
154 for (_i=low; _i<high; _i++) { \
155 if (rp[_i] > bcol) break; \
156 if (rp[_i] == bcol) { \
157 bap = ap + bs2*_i + bs*cidx + ridx; \
158 if (addv == ADD_VALUES) *bap += value; \
159 else *bap = value; \
160 goto b_noinsert; \
161 } \
162 } \
163 if (b->nonew == 1) goto b_noinsert; \
164 if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
165 MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
166 N = nrow++ - 1; \
167 /* shift up all the later entries in this row */ \
168 ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
169 ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
170 ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
171 rp[_i] = bcol; \
172 ap[bs2*_i + bs*cidx + ridx] = value; \
173 b_noinsert:; \
174 bilen[brow] = nrow; \
175 }
176
MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
180 MatScalar value;
181 PetscBool roworiented = baij->roworiented;
182 PetscErrorCode ierr;
183 PetscInt i,j,row,col;
184 PetscInt rstart_orig=mat->rmap->rstart;
185 PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
186 PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs;
187
188 /* Some Variables required in the macro */
189 Mat A = baij->A;
190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
191 PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
192 MatScalar *aa =a->a;
193
194 Mat B = baij->B;
195 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
196 PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
197 MatScalar *ba =b->a;
198
199 PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
200 PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
201 MatScalar *ap,*bap;
202
203 PetscFunctionBegin;
204 for (i=0; i<m; i++) {
205 if (im[i] < 0) continue;
206 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
207 if (im[i] >= rstart_orig && im[i] < rend_orig) {
208 row = im[i] - rstart_orig;
209 for (j=0; j<n; j++) {
210 if (in[j] >= cstart_orig && in[j] < cend_orig) {
211 col = in[j] - cstart_orig;
212 if (roworiented) value = v[i*n+j];
213 else value = v[i+j*m];
214 MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
215 } else if (in[j] < 0) continue;
216 else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
217 else {
218 if (mat->was_assembled) {
219 if (!baij->colmap) {
220 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
221 }
222 #if defined(PETSC_USE_CTABLE)
223 ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
224 col = col - 1;
225 #else
226 col = baij->colmap[in[j]/bs] - 1;
227 #endif
228 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
229 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
230 col = in[j];
231 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
232 B = baij->B;
233 b = (Mat_SeqBAIJ*)(B)->data;
234 bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
235 ba =b->a;
236 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
237 else col += in[j]%bs;
238 } else col = in[j];
239 if (roworiented) value = v[i*n+j];
240 else value = v[i+j*m];
241 MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
242 /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243 }
244 }
245 } else {
246 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
247 if (!baij->donotstash) {
248 mat->assembled = PETSC_FALSE;
249 if (roworiented) {
250 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
251 } else {
252 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
253 }
254 }
255 }
256 }
257 PetscFunctionReturn(0);
258 }
259
MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)260 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
261 {
262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
263 PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N;
264 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
265 PetscErrorCode ierr;
266 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
267 PetscBool roworiented=a->roworiented;
268 const PetscScalar *value = v;
269 MatScalar *ap,*aa = a->a,*bap;
270
271 PetscFunctionBegin;
272 rp = aj + ai[row];
273 ap = aa + bs2*ai[row];
274 rmax = imax[row];
275 nrow = ailen[row];
276 value = v;
277 low = 0;
278 high = nrow;
279 while (high-low > 7) {
280 t = (low+high)/2;
281 if (rp[t] > col) high = t;
282 else low = t;
283 }
284 for (i=low; i<high; i++) {
285 if (rp[i] > col) break;
286 if (rp[i] == col) {
287 bap = ap + bs2*i;
288 if (roworiented) {
289 if (is == ADD_VALUES) {
290 for (ii=0; ii<bs; ii++) {
291 for (jj=ii; jj<bs2; jj+=bs) {
292 bap[jj] += *value++;
293 }
294 }
295 } else {
296 for (ii=0; ii<bs; ii++) {
297 for (jj=ii; jj<bs2; jj+=bs) {
298 bap[jj] = *value++;
299 }
300 }
301 }
302 } else {
303 if (is == ADD_VALUES) {
304 for (ii=0; ii<bs; ii++,value+=bs) {
305 for (jj=0; jj<bs; jj++) {
306 bap[jj] += value[jj];
307 }
308 bap += bs;
309 }
310 } else {
311 for (ii=0; ii<bs; ii++,value+=bs) {
312 for (jj=0; jj<bs; jj++) {
313 bap[jj] = value[jj];
314 }
315 bap += bs;
316 }
317 }
318 }
319 goto noinsert2;
320 }
321 }
322 if (nonew == 1) goto noinsert2;
323 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
324 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
325 N = nrow++ - 1; high++;
326 /* shift up all the later entries in this row */
327 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
328 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
329 rp[i] = col;
330 bap = ap + bs2*i;
331 if (roworiented) {
332 for (ii=0; ii<bs; ii++) {
333 for (jj=ii; jj<bs2; jj+=bs) {
334 bap[jj] = *value++;
335 }
336 }
337 } else {
338 for (ii=0; ii<bs; ii++) {
339 for (jj=0; jj<bs; jj++) {
340 *bap++ = *value++;
341 }
342 }
343 }
344 noinsert2:;
345 ailen[row] = nrow;
346 PetscFunctionReturn(0);
347 }
348
349 /*
350 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
351 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
352 */
MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
356 const PetscScalar *value;
357 MatScalar *barray = baij->barray;
358 PetscBool roworiented = baij->roworiented;
359 PetscErrorCode ierr;
360 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
361 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
362 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
363
364 PetscFunctionBegin;
365 if (!barray) {
366 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
367 baij->barray = barray;
368 }
369
370 if (roworiented) stepval = (n-1)*bs;
371 else stepval = (m-1)*bs;
372
373 for (i=0; i<m; i++) {
374 if (im[i] < 0) continue;
375 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
376 if (im[i] >= rstart && im[i] < rend) {
377 row = im[i] - rstart;
378 for (j=0; j<n; j++) {
379 /* If NumCol = 1 then a copy is not required */
380 if ((roworiented) && (n == 1)) {
381 barray = (MatScalar*)v + i*bs2;
382 } else if ((!roworiented) && (m == 1)) {
383 barray = (MatScalar*)v + j*bs2;
384 } else { /* Here a copy is required */
385 if (roworiented) {
386 value = v + (i*(stepval+bs) + j)*bs;
387 } else {
388 value = v + (j*(stepval+bs) + i)*bs;
389 }
390 for (ii=0; ii<bs; ii++,value+=bs+stepval) {
391 for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
392 barray += bs;
393 }
394 barray -= bs2;
395 }
396
397 if (in[j] >= cstart && in[j] < cend) {
398 col = in[j] - cstart;
399 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
400 } else if (in[j] < 0) continue;
401 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
402 else {
403 if (mat->was_assembled) {
404 if (!baij->colmap) {
405 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
406 }
407
408 #if defined(PETSC_USE_DEBUG)
409 #if defined(PETSC_USE_CTABLE)
410 { PetscInt data;
411 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
412 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
413 }
414 #else
415 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
416 #endif
417 #endif
418 #if defined(PETSC_USE_CTABLE)
419 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
420 col = (col - 1)/bs;
421 #else
422 col = (baij->colmap[in[j]] - 1)/bs;
423 #endif
424 if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
425 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
426 col = in[j];
427 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
428 } else col = in[j];
429 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
430 }
431 }
432 } else {
433 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
434 if (!baij->donotstash) {
435 if (roworiented) {
436 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
437 } else {
438 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
439 }
440 }
441 }
442 }
443 PetscFunctionReturn(0);
444 }
445
446 #define HASH_KEY 0.6180339887
447 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
448 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)450 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
451 {
452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
453 PetscBool roworiented = baij->roworiented;
454 PetscErrorCode ierr;
455 PetscInt i,j,row,col;
456 PetscInt rstart_orig=mat->rmap->rstart;
457 PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs;
458 PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
459 PetscReal tmp;
460 MatScalar **HD = baij->hd,value;
461 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462
463 PetscFunctionBegin;
464 for (i=0; i<m; i++) {
465 if (PetscDefined(USE_DEBUG)) {
466 if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
467 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
468 }
469 row = im[i];
470 if (row >= rstart_orig && row < rend_orig) {
471 for (j=0; j<n; j++) {
472 col = in[j];
473 if (roworiented) value = v[i*n+j];
474 else value = v[i+j*m];
475 /* Look up PetscInto the Hash Table */
476 key = (row/bs)*Nbs+(col/bs)+1;
477 h1 = HASH(size,key,tmp);
478
479
480 idx = h1;
481 if (PetscDefined(USE_DEBUG)) {
482 insert_ct++;
483 total_ct++;
484 if (HT[idx] != key) {
485 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
486 if (idx == size) {
487 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
488 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
489 }
490 }
491 } else if (HT[idx] != key) {
492 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
493 if (idx == size) {
494 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
495 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
496 }
497 }
498 /* A HASH table entry is found, so insert the values at the correct address */
499 if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
500 else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
501 }
502 } else if (!baij->donotstash) {
503 if (roworiented) {
504 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
505 } else {
506 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
507 }
508 }
509 }
510 if (PetscDefined(USE_DEBUG)) {
511 baij->ht_total_ct += total_ct;
512 baij->ht_insert_ct += insert_ct;
513 }
514 PetscFunctionReturn(0);
515 }
516
MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)517 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
518 {
519 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
520 PetscBool roworiented = baij->roworiented;
521 PetscErrorCode ierr;
522 PetscInt i,j,ii,jj,row,col;
523 PetscInt rstart=baij->rstartbs;
524 PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
525 PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
526 PetscReal tmp;
527 MatScalar **HD = baij->hd,*baij_a;
528 const PetscScalar *v_t,*value;
529 PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
530
531 PetscFunctionBegin;
532 if (roworiented) stepval = (n-1)*bs;
533 else stepval = (m-1)*bs;
534
535 for (i=0; i<m; i++) {
536 if (PetscDefined(USE_DEBUG)) {
537 if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
538 if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
539 }
540 row = im[i];
541 v_t = v + i*nbs2;
542 if (row >= rstart && row < rend) {
543 for (j=0; j<n; j++) {
544 col = in[j];
545
546 /* Look up into the Hash Table */
547 key = row*Nbs+col+1;
548 h1 = HASH(size,key,tmp);
549
550 idx = h1;
551 if (PetscDefined(USE_DEBUG)) {
552 total_ct++;
553 insert_ct++;
554 if (HT[idx] != key) {
555 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
556 if (idx == size) {
557 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
558 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
559 }
560 }
561 } else if (HT[idx] != key) {
562 for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
563 if (idx == size) {
564 for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
565 if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
566 }
567 }
568 baij_a = HD[idx];
569 if (roworiented) {
570 /*value = v + i*(stepval+bs)*bs + j*bs;*/
571 /* value = v + (i*(stepval+bs)+j)*bs; */
572 value = v_t;
573 v_t += bs;
574 if (addv == ADD_VALUES) {
575 for (ii=0; ii<bs; ii++,value+=stepval) {
576 for (jj=ii; jj<bs2; jj+=bs) {
577 baij_a[jj] += *value++;
578 }
579 }
580 } else {
581 for (ii=0; ii<bs; ii++,value+=stepval) {
582 for (jj=ii; jj<bs2; jj+=bs) {
583 baij_a[jj] = *value++;
584 }
585 }
586 }
587 } else {
588 value = v + j*(stepval+bs)*bs + i*bs;
589 if (addv == ADD_VALUES) {
590 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
591 for (jj=0; jj<bs; jj++) {
592 baij_a[jj] += *value++;
593 }
594 }
595 } else {
596 for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
597 for (jj=0; jj<bs; jj++) {
598 baij_a[jj] = *value++;
599 }
600 }
601 }
602 }
603 }
604 } else {
605 if (!baij->donotstash) {
606 if (roworiented) {
607 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
608 } else {
609 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
610 }
611 }
612 }
613 }
614 if (PetscDefined(USE_DEBUG)) {
615 baij->ht_total_ct += total_ct;
616 baij->ht_insert_ct += insert_ct;
617 }
618 PetscFunctionReturn(0);
619 }
620
MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])621 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
622 {
623 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
624 PetscErrorCode ierr;
625 PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
626 PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
627
628 PetscFunctionBegin;
629 for (i=0; i<m; i++) {
630 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
631 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
632 if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
633 row = idxm[i] - bsrstart;
634 for (j=0; j<n; j++) {
635 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
636 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
637 if (idxn[j] >= bscstart && idxn[j] < bscend) {
638 col = idxn[j] - bscstart;
639 ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
640 } else {
641 if (!baij->colmap) {
642 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
643 }
644 #if defined(PETSC_USE_CTABLE)
645 ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
646 data--;
647 #else
648 data = baij->colmap[idxn[j]/bs]-1;
649 #endif
650 if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
651 else {
652 col = data + idxn[j]%bs;
653 ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
654 }
655 }
656 }
657 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
658 }
659 PetscFunctionReturn(0);
660 }
661
MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal * nrm)662 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
663 {
664 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
665 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
666 PetscErrorCode ierr;
667 PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
668 PetscReal sum = 0.0;
669 MatScalar *v;
670
671 PetscFunctionBegin;
672 if (baij->size == 1) {
673 ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
674 } else {
675 if (type == NORM_FROBENIUS) {
676 v = amat->a;
677 nz = amat->nz*bs2;
678 for (i=0; i<nz; i++) {
679 sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
680 }
681 v = bmat->a;
682 nz = bmat->nz*bs2;
683 for (i=0; i<nz; i++) {
684 sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
685 }
686 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
687 *nrm = PetscSqrtReal(*nrm);
688 } else if (type == NORM_1) { /* max column sum */
689 PetscReal *tmp,*tmp2;
690 PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
691 ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
692 ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
693 v = amat->a; jj = amat->j;
694 for (i=0; i<amat->nz; i++) {
695 for (j=0; j<bs; j++) {
696 col = bs*(cstart + *jj) + j; /* column index */
697 for (row=0; row<bs; row++) {
698 tmp[col] += PetscAbsScalar(*v); v++;
699 }
700 }
701 jj++;
702 }
703 v = bmat->a; jj = bmat->j;
704 for (i=0; i<bmat->nz; i++) {
705 for (j=0; j<bs; j++) {
706 col = bs*garray[*jj] + j;
707 for (row=0; row<bs; row++) {
708 tmp[col] += PetscAbsScalar(*v); v++;
709 }
710 }
711 jj++;
712 }
713 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714 *nrm = 0.0;
715 for (j=0; j<mat->cmap->N; j++) {
716 if (tmp2[j] > *nrm) *nrm = tmp2[j];
717 }
718 ierr = PetscFree(tmp);CHKERRQ(ierr);
719 ierr = PetscFree(tmp2);CHKERRQ(ierr);
720 } else if (type == NORM_INFINITY) { /* max row sum */
721 PetscReal *sums;
722 ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
723 sum = 0.0;
724 for (j=0; j<amat->mbs; j++) {
725 for (row=0; row<bs; row++) sums[row] = 0.0;
726 v = amat->a + bs2*amat->i[j];
727 nz = amat->i[j+1]-amat->i[j];
728 for (i=0; i<nz; i++) {
729 for (col=0; col<bs; col++) {
730 for (row=0; row<bs; row++) {
731 sums[row] += PetscAbsScalar(*v); v++;
732 }
733 }
734 }
735 v = bmat->a + bs2*bmat->i[j];
736 nz = bmat->i[j+1]-bmat->i[j];
737 for (i=0; i<nz; i++) {
738 for (col=0; col<bs; col++) {
739 for (row=0; row<bs; row++) {
740 sums[row] += PetscAbsScalar(*v); v++;
741 }
742 }
743 }
744 for (row=0; row<bs; row++) {
745 if (sums[row] > sum) sum = sums[row];
746 }
747 }
748 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
749 ierr = PetscFree(sums);CHKERRQ(ierr);
750 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
751 }
752 PetscFunctionReturn(0);
753 }
754
755 /*
756 Creates the hash table, and sets the table
757 This table is created only once.
758 If new entried need to be added to the matrix
759 then the hash table has to be destroyed and
760 recreated.
761 */
MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)762 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
763 {
764 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
765 Mat A = baij->A,B=baij->B;
766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
767 PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
768 PetscErrorCode ierr;
769 PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
770 PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
771 PetscInt *HT,key;
772 MatScalar **HD;
773 PetscReal tmp;
774 #if defined(PETSC_USE_INFO)
775 PetscInt ct=0,max=0;
776 #endif
777
778 PetscFunctionBegin;
779 if (baij->ht) PetscFunctionReturn(0);
780
781 baij->ht_size = (PetscInt)(factor*nz);
782 ht_size = baij->ht_size;
783
784 /* Allocate Memory for Hash Table */
785 ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
786 HD = baij->hd;
787 HT = baij->ht;
788
789 /* Loop Over A */
790 for (i=0; i<a->mbs; i++) {
791 for (j=ai[i]; j<ai[i+1]; j++) {
792 row = i+rstart;
793 col = aj[j]+cstart;
794
795 key = row*Nbs + col + 1;
796 h1 = HASH(ht_size,key,tmp);
797 for (k=0; k<ht_size; k++) {
798 if (!HT[(h1+k)%ht_size]) {
799 HT[(h1+k)%ht_size] = key;
800 HD[(h1+k)%ht_size] = a->a + j*bs2;
801 break;
802 #if defined(PETSC_USE_INFO)
803 } else {
804 ct++;
805 #endif
806 }
807 }
808 #if defined(PETSC_USE_INFO)
809 if (k> max) max = k;
810 #endif
811 }
812 }
813 /* Loop Over B */
814 for (i=0; i<b->mbs; i++) {
815 for (j=bi[i]; j<bi[i+1]; j++) {
816 row = i+rstart;
817 col = garray[bj[j]];
818 key = row*Nbs + col + 1;
819 h1 = HASH(ht_size,key,tmp);
820 for (k=0; k<ht_size; k++) {
821 if (!HT[(h1+k)%ht_size]) {
822 HT[(h1+k)%ht_size] = key;
823 HD[(h1+k)%ht_size] = b->a + j*bs2;
824 break;
825 #if defined(PETSC_USE_INFO)
826 } else {
827 ct++;
828 #endif
829 }
830 }
831 #if defined(PETSC_USE_INFO)
832 if (k> max) max = k;
833 #endif
834 }
835 }
836
837 /* Print Summary */
838 #if defined(PETSC_USE_INFO)
839 for (i=0,j=0; i<ht_size; i++) {
840 if (HT[i]) j++;
841 }
842 ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
843 #endif
844 PetscFunctionReturn(0);
845 }
846
MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)847 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
848 {
849 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
850 PetscErrorCode ierr;
851 PetscInt nstash,reallocs;
852
853 PetscFunctionBegin;
854 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
855
856 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
857 ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
858 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
859 ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
860 ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
861 ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
862 PetscFunctionReturn(0);
863 }
864
MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)865 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
866 {
867 Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
868 Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data;
869 PetscErrorCode ierr;
870 PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
871 PetscInt *row,*col;
872 PetscBool r1,r2,r3,other_disassembled;
873 MatScalar *val;
874 PetscMPIInt n;
875
876 PetscFunctionBegin;
877 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
878 if (!baij->donotstash && !mat->nooffprocentries) {
879 while (1) {
880 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
881 if (!flg) break;
882
883 for (i=0; i<n;) {
884 /* Now identify the consecutive vals belonging to the same row */
885 for (j=i,rstart=row[j]; j<n; j++) {
886 if (row[j] != rstart) break;
887 }
888 if (j < n) ncols = j-i;
889 else ncols = n-i;
890 /* Now assemble all these values with a single function call */
891 ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
892 i = j;
893 }
894 }
895 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
896 /* Now process the block-stash. Since the values are stashed column-oriented,
897 set the roworiented flag to column oriented, and after MatSetValues()
898 restore the original flags */
899 r1 = baij->roworiented;
900 r2 = a->roworiented;
901 r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
902
903 baij->roworiented = PETSC_FALSE;
904 a->roworiented = PETSC_FALSE;
905
906 (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
907 while (1) {
908 ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
909 if (!flg) break;
910
911 for (i=0; i<n;) {
912 /* Now identify the consecutive vals belonging to the same row */
913 for (j=i,rstart=row[j]; j<n; j++) {
914 if (row[j] != rstart) break;
915 }
916 if (j < n) ncols = j-i;
917 else ncols = n-i;
918 ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
919 i = j;
920 }
921 }
922 ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
923
924 baij->roworiented = r1;
925 a->roworiented = r2;
926
927 ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
928 }
929
930 ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931 ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932
933 /* determine if any processor has disassembled, if so we must
934 also disassemble ourselfs, in order that we may reassemble. */
935 /*
936 if nonzero structure of submatrix B cannot change then we know that
937 no processor disassembled thus we can skip this stuff
938 */
939 if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
940 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
941 if (mat->was_assembled && !other_disassembled) {
942 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943 }
944 }
945
946 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947 ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948 }
949 ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950 ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951
952 #if defined(PETSC_USE_INFO)
953 if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954 ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
955
956 baij->ht_total_ct = 0;
957 baij->ht_insert_ct = 0;
958 }
959 #endif
960 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
961 ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
962
963 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
964 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
965 }
966
967 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
968
969 baij->rowvalues = NULL;
970
971 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
972 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
973 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
974 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
975 }
976 PetscFunctionReturn(0);
977 }
978
979 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
980 #include <petscdraw.h>
MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)981 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
982 {
983 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
984 PetscErrorCode ierr;
985 PetscMPIInt rank = baij->rank;
986 PetscInt bs = mat->rmap->bs;
987 PetscBool iascii,isdraw;
988 PetscViewer sviewer;
989 PetscViewerFormat format;
990
991 PetscFunctionBegin;
992 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
993 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
994 if (iascii) {
995 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
996 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
997 MatInfo info;
998 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
999 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1000 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1001 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1002 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1003 ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1004 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1005 ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1006 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1007 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1008 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1009 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1010 ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1011 PetscFunctionReturn(0);
1012 } else if (format == PETSC_VIEWER_ASCII_INFO) {
1013 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr);
1014 PetscFunctionReturn(0);
1015 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1016 PetscFunctionReturn(0);
1017 }
1018 }
1019
1020 if (isdraw) {
1021 PetscDraw draw;
1022 PetscBool isnull;
1023 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1024 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1025 if (isnull) PetscFunctionReturn(0);
1026 }
1027
1028 {
1029 /* assemble the entire matrix onto first processor. */
1030 Mat A;
1031 Mat_SeqBAIJ *Aloc;
1032 PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1033 MatScalar *a;
1034 const char *matname;
1035
1036 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1037 /* Perhaps this should be the type of mat? */
1038 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1039 if (!rank) {
1040 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1041 } else {
1042 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1043 }
1044 ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1045 ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1046 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1047 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1048
1049 /* copy over the A part */
1050 Aloc = (Mat_SeqBAIJ*)baij->A->data;
1051 ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1052 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1053
1054 for (i=0; i<mbs; i++) {
1055 rvals[0] = bs*(baij->rstartbs + i);
1056 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1057 for (j=ai[i]; j<ai[i+1]; j++) {
1058 col = (baij->cstartbs+aj[j])*bs;
1059 for (k=0; k<bs; k++) {
1060 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1061 col++; a += bs;
1062 }
1063 }
1064 }
1065 /* copy over the B part */
1066 Aloc = (Mat_SeqBAIJ*)baij->B->data;
1067 ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1068 for (i=0; i<mbs; i++) {
1069 rvals[0] = bs*(baij->rstartbs + i);
1070 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1071 for (j=ai[i]; j<ai[i+1]; j++) {
1072 col = baij->garray[aj[j]]*bs;
1073 for (k=0; k<bs; k++) {
1074 ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1075 col++; a += bs;
1076 }
1077 }
1078 }
1079 ierr = PetscFree(rvals);CHKERRQ(ierr);
1080 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1081 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1082 /*
1083 Everyone has to call to draw the matrix since the graphics waits are
1084 synchronized across all processors that share the PetscDraw object
1085 */
1086 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1087 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1088 if (!rank) {
1089 ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1090 ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1091 }
1092 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1093 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1094 ierr = MatDestroy(&A);CHKERRQ(ierr);
1095 }
1096 PetscFunctionReturn(0);
1097 }
1098
1099 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)1100 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1101 {
1102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data;
1103 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data;
1104 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data;
1105 const PetscInt *garray = aij->garray;
1106 PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1107 PetscInt *rowlens,*colidxs;
1108 PetscScalar *matvals;
1109 PetscErrorCode ierr;
1110
1111 PetscFunctionBegin;
1112 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1113
1114 M = mat->rmap->N;
1115 N = mat->cmap->N;
1116 m = mat->rmap->n;
1117 rs = mat->rmap->rstart;
1118 cs = mat->cmap->rstart;
1119 bs = mat->rmap->bs;
1120 nz = bs*bs*(A->nz + B->nz);
1121
1122 /* write matrix header */
1123 header[0] = MAT_FILE_CLASSID;
1124 header[1] = M; header[2] = N; header[3] = nz;
1125 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1126 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1127
1128 /* fill in and store row lengths */
1129 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1130 for (cnt=0, i=0; i<A->mbs; i++)
1131 for (j=0; j<bs; j++)
1132 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1133 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1134 ierr = PetscFree(rowlens);CHKERRQ(ierr);
1135
1136 /* fill in and store column indices */
1137 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1138 for (cnt=0, i=0; i<A->mbs; i++) {
1139 for (k=0; k<bs; k++) {
1140 for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1141 if (garray[B->j[jb]] > cs/bs) break;
1142 for (l=0; l<bs; l++)
1143 colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1144 }
1145 for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1146 for (l=0; l<bs; l++)
1147 colidxs[cnt++] = bs*A->j[ja] + l + cs;
1148 for (; jb<B->i[i+1]; jb++)
1149 for (l=0; l<bs; l++)
1150 colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1151 }
1152 }
1153 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1154 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1155 ierr = PetscFree(colidxs);CHKERRQ(ierr);
1156
1157 /* fill in and store nonzero values */
1158 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1159 for (cnt=0, i=0; i<A->mbs; i++) {
1160 for (k=0; k<bs; k++) {
1161 for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1162 if (garray[B->j[jb]] > cs/bs) break;
1163 for (l=0; l<bs; l++)
1164 matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1165 }
1166 for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1167 for (l=0; l<bs; l++)
1168 matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1169 for (; jb<B->i[i+1]; jb++)
1170 for (l=0; l<bs; l++)
1171 matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1172 }
1173 }
1174 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1175 ierr = PetscFree(matvals);CHKERRQ(ierr);
1176
1177 /* write block size option to the viewer's .info file */
1178 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1179 PetscFunctionReturn(0);
1180 }
1181
MatView_MPIBAIJ(Mat mat,PetscViewer viewer)1182 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1183 {
1184 PetscErrorCode ierr;
1185 PetscBool iascii,isdraw,issocket,isbinary;
1186
1187 PetscFunctionBegin;
1188 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1189 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1190 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1191 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192 if (iascii || isdraw || issocket) {
1193 ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1194 } else if (isbinary) {
1195 ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1196 }
1197 PetscFunctionReturn(0);
1198 }
1199
MatDestroy_MPIBAIJ(Mat mat)1200 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1201 {
1202 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1203 PetscErrorCode ierr;
1204
1205 PetscFunctionBegin;
1206 #if defined(PETSC_USE_LOG)
1207 PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1208 #endif
1209 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1210 ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1211 ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1212 ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1213 #if defined(PETSC_USE_CTABLE)
1214 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1215 #else
1216 ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1217 #endif
1218 ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1219 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1220 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1221 ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1222 ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1223 ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1224 ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1225 ierr = PetscFree(mat->data);CHKERRQ(ierr);
1226
1227 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1228 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1229 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1230 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1231 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1232 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1233 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1234 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1235 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1236 #if defined(PETSC_HAVE_HYPRE)
1237 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1238 #endif
1239 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1240 PetscFunctionReturn(0);
1241 }
1242
MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)1243 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1244 {
1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1246 PetscErrorCode ierr;
1247 PetscInt nt;
1248
1249 PetscFunctionBegin;
1250 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1251 if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1252 ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1253 if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1254 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1256 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1258 PetscFunctionReturn(0);
1259 }
1260
MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1261 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1262 {
1263 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1264 PetscErrorCode ierr;
1265
1266 PetscFunctionBegin;
1267 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1269 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1271 PetscFunctionReturn(0);
1272 }
1273
MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)1274 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1275 {
1276 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1277 PetscErrorCode ierr;
1278
1279 PetscFunctionBegin;
1280 /* do nondiagonal part */
1281 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1282 /* do local part */
1283 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1284 /* add partial results together */
1285 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1286 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1287 PetscFunctionReturn(0);
1288 }
1289
MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)1290 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291 {
1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1293 PetscErrorCode ierr;
1294
1295 PetscFunctionBegin;
1296 /* do nondiagonal part */
1297 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1298 /* do local part */
1299 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1300 /* add partial results together */
1301 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1302 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1303 PetscFunctionReturn(0);
1304 }
1305
1306 /*
1307 This only works correctly for square matrices where the subblock A->A is the
1308 diagonal block
1309 */
MatGetDiagonal_MPIBAIJ(Mat A,Vec v)1310 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1311 {
1312 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1313 PetscErrorCode ierr;
1314
1315 PetscFunctionBegin;
1316 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1317 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1318 PetscFunctionReturn(0);
1319 }
1320
MatScale_MPIBAIJ(Mat A,PetscScalar aa)1321 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1322 {
1323 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1324 PetscErrorCode ierr;
1325
1326 PetscFunctionBegin;
1327 ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1328 ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1329 PetscFunctionReturn(0);
1330 }
1331
MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1332 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1335 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1336 PetscErrorCode ierr;
1337 PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1338 PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1339 PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1340
1341 PetscFunctionBegin;
1342 if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1343 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1344 mat->getrowactive = PETSC_TRUE;
1345
1346 if (!mat->rowvalues && (idx || v)) {
1347 /*
1348 allocate enough space to hold information from the longest row.
1349 */
1350 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1351 PetscInt max = 1,mbs = mat->mbs,tmp;
1352 for (i=0; i<mbs; i++) {
1353 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1354 if (max < tmp) max = tmp;
1355 }
1356 ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1357 }
1358 lrow = row - brstart;
1359
1360 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1361 if (!v) {pvA = NULL; pvB = NULL;}
1362 if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1363 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1364 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1365 nztot = nzA + nzB;
1366
1367 cmap = mat->garray;
1368 if (v || idx) {
1369 if (nztot) {
1370 /* Sort by increasing column numbers, assuming A and B already sorted */
1371 PetscInt imark = -1;
1372 if (v) {
1373 *v = v_p = mat->rowvalues;
1374 for (i=0; i<nzB; i++) {
1375 if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1376 else break;
1377 }
1378 imark = i;
1379 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1380 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1381 }
1382 if (idx) {
1383 *idx = idx_p = mat->rowindices;
1384 if (imark > -1) {
1385 for (i=0; i<imark; i++) {
1386 idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1387 }
1388 } else {
1389 for (i=0; i<nzB; i++) {
1390 if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1391 else break;
1392 }
1393 imark = i;
1394 }
1395 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1396 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1397 }
1398 } else {
1399 if (idx) *idx = NULL;
1400 if (v) *v = NULL;
1401 }
1402 }
1403 *nz = nztot;
1404 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1405 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1406 PetscFunctionReturn(0);
1407 }
1408
MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1409 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1410 {
1411 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1412
1413 PetscFunctionBegin;
1414 if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1415 baij->getrowactive = PETSC_FALSE;
1416 PetscFunctionReturn(0);
1417 }
1418
MatZeroEntries_MPIBAIJ(Mat A)1419 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1420 {
1421 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1422 PetscErrorCode ierr;
1423
1424 PetscFunctionBegin;
1425 ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1426 ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1427 PetscFunctionReturn(0);
1428 }
1429
MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo * info)1430 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1431 {
1432 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1433 Mat A = a->A,B = a->B;
1434 PetscErrorCode ierr;
1435 PetscLogDouble isend[5],irecv[5];
1436
1437 PetscFunctionBegin;
1438 info->block_size = (PetscReal)matin->rmap->bs;
1439
1440 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1441
1442 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1443 isend[3] = info->memory; isend[4] = info->mallocs;
1444
1445 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1446
1447 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1448 isend[3] += info->memory; isend[4] += info->mallocs;
1449
1450 if (flag == MAT_LOCAL) {
1451 info->nz_used = isend[0];
1452 info->nz_allocated = isend[1];
1453 info->nz_unneeded = isend[2];
1454 info->memory = isend[3];
1455 info->mallocs = isend[4];
1456 } else if (flag == MAT_GLOBAL_MAX) {
1457 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1458
1459 info->nz_used = irecv[0];
1460 info->nz_allocated = irecv[1];
1461 info->nz_unneeded = irecv[2];
1462 info->memory = irecv[3];
1463 info->mallocs = irecv[4];
1464 } else if (flag == MAT_GLOBAL_SUM) {
1465 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1466
1467 info->nz_used = irecv[0];
1468 info->nz_allocated = irecv[1];
1469 info->nz_unneeded = irecv[2];
1470 info->memory = irecv[3];
1471 info->mallocs = irecv[4];
1472 } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1473 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1474 info->fill_ratio_needed = 0;
1475 info->factor_mallocs = 0;
1476 PetscFunctionReturn(0);
1477 }
1478
MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)1479 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1480 {
1481 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1482 PetscErrorCode ierr;
1483
1484 PetscFunctionBegin;
1485 switch (op) {
1486 case MAT_NEW_NONZERO_LOCATIONS:
1487 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1488 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1489 case MAT_KEEP_NONZERO_PATTERN:
1490 case MAT_NEW_NONZERO_LOCATION_ERR:
1491 MatCheckPreallocated(A,1);
1492 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1493 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1494 break;
1495 case MAT_ROW_ORIENTED:
1496 MatCheckPreallocated(A,1);
1497 a->roworiented = flg;
1498
1499 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1500 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1501 break;
1502 case MAT_NEW_DIAGONALS:
1503 case MAT_SORTED_FULL:
1504 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505 break;
1506 case MAT_IGNORE_OFF_PROC_ENTRIES:
1507 a->donotstash = flg;
1508 break;
1509 case MAT_USE_HASH_TABLE:
1510 a->ht_flag = flg;
1511 a->ht_fact = 1.39;
1512 break;
1513 case MAT_SYMMETRIC:
1514 case MAT_STRUCTURALLY_SYMMETRIC:
1515 case MAT_HERMITIAN:
1516 case MAT_SUBMAT_SINGLEIS:
1517 case MAT_SYMMETRY_ETERNAL:
1518 MatCheckPreallocated(A,1);
1519 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1520 break;
1521 default:
1522 SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1523 }
1524 PetscFunctionReturn(0);
1525 }
1526
MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat * matout)1527 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1528 {
1529 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1530 Mat_SeqBAIJ *Aloc;
1531 Mat B;
1532 PetscErrorCode ierr;
1533 PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1534 PetscInt bs=A->rmap->bs,mbs=baij->mbs;
1535 MatScalar *a;
1536
1537 PetscFunctionBegin;
1538 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1539 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1540 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1541 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1542 /* Do not know preallocation information, but must set block size */
1543 ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1544 } else {
1545 B = *matout;
1546 }
1547
1548 /* copy over the A part */
1549 Aloc = (Mat_SeqBAIJ*)baij->A->data;
1550 ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1551 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1552
1553 for (i=0; i<mbs; i++) {
1554 rvals[0] = bs*(baij->rstartbs + i);
1555 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1556 for (j=ai[i]; j<ai[i+1]; j++) {
1557 col = (baij->cstartbs+aj[j])*bs;
1558 for (k=0; k<bs; k++) {
1559 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1560
1561 col++; a += bs;
1562 }
1563 }
1564 }
1565 /* copy over the B part */
1566 Aloc = (Mat_SeqBAIJ*)baij->B->data;
1567 ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1568 for (i=0; i<mbs; i++) {
1569 rvals[0] = bs*(baij->rstartbs + i);
1570 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1571 for (j=ai[i]; j<ai[i+1]; j++) {
1572 col = baij->garray[aj[j]]*bs;
1573 for (k=0; k<bs; k++) {
1574 ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1575 col++;
1576 a += bs;
1577 }
1578 }
1579 }
1580 ierr = PetscFree(rvals);CHKERRQ(ierr);
1581 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1582 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1583
1584 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1585 else {
1586 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1587 }
1588 PetscFunctionReturn(0);
1589 }
1590
MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)1591 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1592 {
1593 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1594 Mat a = baij->A,b = baij->B;
1595 PetscErrorCode ierr;
1596 PetscInt s1,s2,s3;
1597
1598 PetscFunctionBegin;
1599 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1600 if (rr) {
1601 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1602 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1603 /* Overlap communication with computation. */
1604 ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1605 }
1606 if (ll) {
1607 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1608 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1609 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1610 }
1611 /* scale the diagonal block */
1612 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1613
1614 if (rr) {
1615 /* Do a scatter end and then right scale the off-diagonal block */
1616 ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617 ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1618 }
1619 PetscFunctionReturn(0);
1620 }
1621
MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1622 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1623 {
1624 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1625 PetscInt *lrows;
1626 PetscInt r, len;
1627 PetscBool cong;
1628 PetscErrorCode ierr;
1629
1630 PetscFunctionBegin;
1631 /* get locally owned rows */
1632 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1633 /* fix right hand side if needed */
1634 if (x && b) {
1635 const PetscScalar *xx;
1636 PetscScalar *bb;
1637
1638 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1639 ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1640 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1641 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1642 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1643 }
1644
1645 /* actually zap the local rows */
1646 /*
1647 Zero the required rows. If the "diagonal block" of the matrix
1648 is square and the user wishes to set the diagonal we use separate
1649 code so that MatSetValues() is not called for each diagonal allocating
1650 new memory, thus calling lots of mallocs and slowing things down.
1651
1652 */
1653 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1654 ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1655 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1656 if ((diag != 0.0) && cong) {
1657 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1658 } else if (diag != 0.0) {
1659 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1660 if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1661 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1662 for (r = 0; r < len; ++r) {
1663 const PetscInt row = lrows[r] + A->rmap->rstart;
1664 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665 }
1666 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668 } else {
1669 ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1670 }
1671 ierr = PetscFree(lrows);CHKERRQ(ierr);
1672
1673 /* only change matrix nonzero state if pattern was allowed to be changed */
1674 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1675 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1676 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1677 }
1678 PetscFunctionReturn(0);
1679 }
1680
MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)1681 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1682 {
1683 Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1684 PetscErrorCode ierr;
1685 PetscMPIInt n = A->rmap->n,p = 0;
1686 PetscInt i,j,k,r,len = 0,row,col,count;
1687 PetscInt *lrows,*owners = A->rmap->range;
1688 PetscSFNode *rrows;
1689 PetscSF sf;
1690 const PetscScalar *xx;
1691 PetscScalar *bb,*mask;
1692 Vec xmask,lmask;
1693 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data;
1694 PetscInt bs = A->rmap->bs, bs2 = baij->bs2;
1695 PetscScalar *aa;
1696
1697 PetscFunctionBegin;
1698 /* Create SF where leaves are input rows and roots are owned rows */
1699 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1700 for (r = 0; r < n; ++r) lrows[r] = -1;
1701 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1702 for (r = 0; r < N; ++r) {
1703 const PetscInt idx = rows[r];
1704 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1705 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1706 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1707 }
1708 rrows[r].rank = p;
1709 rrows[r].index = rows[r] - owners[p];
1710 }
1711 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1712 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1713 /* Collect flags for rows to be zeroed */
1714 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1715 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1716 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1717 /* Compress and put in row numbers */
1718 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1719 /* zero diagonal part of matrix */
1720 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1721 /* handle off diagonal part of matrix */
1722 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1723 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1724 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1725 for (i=0; i<len; i++) bb[lrows[i]] = 1;
1726 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1727 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729 ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1730 if (x) {
1731 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1733 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1734 ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1735 }
1736 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1737 /* remove zeroed rows of off diagonal matrix */
1738 for (i = 0; i < len; ++i) {
1739 row = lrows[i];
1740 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1741 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1742 for (k = 0; k < count; ++k) {
1743 aa[0] = 0.0;
1744 aa += bs;
1745 }
1746 }
1747 /* loop over all elements of off process part of matrix zeroing removed columns*/
1748 for (i = 0; i < l->B->rmap->N; ++i) {
1749 row = i/bs;
1750 for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1751 for (k = 0; k < bs; ++k) {
1752 col = bs*baij->j[j] + k;
1753 if (PetscAbsScalar(mask[col])) {
1754 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1755 if (x) bb[i] -= aa[0]*xx[col];
1756 aa[0] = 0.0;
1757 }
1758 }
1759 }
1760 }
1761 if (x) {
1762 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1763 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1764 }
1765 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1766 ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1767 ierr = PetscFree(lrows);CHKERRQ(ierr);
1768
1769 /* only change matrix nonzero state if pattern was allowed to be changed */
1770 if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1771 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1772 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1773 }
1774 PetscFunctionReturn(0);
1775 }
1776
MatSetUnfactored_MPIBAIJ(Mat A)1777 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1778 {
1779 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1780 PetscErrorCode ierr;
1781
1782 PetscFunctionBegin;
1783 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1784 PetscFunctionReturn(0);
1785 }
1786
1787 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1788
MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool * flag)1789 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag)
1790 {
1791 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1792 Mat a,b,c,d;
1793 PetscBool flg;
1794 PetscErrorCode ierr;
1795
1796 PetscFunctionBegin;
1797 a = matA->A; b = matA->B;
1798 c = matB->A; d = matB->B;
1799
1800 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1801 if (flg) {
1802 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1803 }
1804 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1805 PetscFunctionReturn(0);
1806 }
1807
MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)1808 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1809 {
1810 PetscErrorCode ierr;
1811 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1812 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
1813
1814 PetscFunctionBegin;
1815 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1816 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1817 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1818 } else {
1819 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1820 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1821 }
1822 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1823 PetscFunctionReturn(0);
1824 }
1825
MatSetUp_MPIBAIJ(Mat A)1826 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1827 {
1828 PetscErrorCode ierr;
1829
1830 PetscFunctionBegin;
1831 ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
1832 PetscFunctionReturn(0);
1833 }
1834
MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)1835 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1836 {
1837 PetscErrorCode ierr;
1838 PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs;
1839 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data;
1840 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data;
1841
1842 PetscFunctionBegin;
1843 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1844 PetscFunctionReturn(0);
1845 }
1846
MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)1847 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1848 {
1849 PetscErrorCode ierr;
1850 Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1851 PetscBLASInt bnz,one=1;
1852 Mat_SeqBAIJ *x,*y;
1853 PetscInt bs2 = Y->rmap->bs*Y->rmap->bs;
1854
1855 PetscFunctionBegin;
1856 if (str == SAME_NONZERO_PATTERN) {
1857 PetscScalar alpha = a;
1858 x = (Mat_SeqBAIJ*)xx->A->data;
1859 y = (Mat_SeqBAIJ*)yy->A->data;
1860 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1861 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1862 x = (Mat_SeqBAIJ*)xx->B->data;
1863 y = (Mat_SeqBAIJ*)yy->B->data;
1864 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1865 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1866 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1867 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1868 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1869 } else {
1870 Mat B;
1871 PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1872 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1873 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1874 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1875 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1876 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1877 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1878 ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1879 ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1880 ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1881 ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1882 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1883 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1884 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1885 ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1886 ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1887 }
1888 PetscFunctionReturn(0);
1889 }
1890
MatRealPart_MPIBAIJ(Mat A)1891 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1892 {
1893 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1894 PetscErrorCode ierr;
1895
1896 PetscFunctionBegin;
1897 ierr = MatRealPart(a->A);CHKERRQ(ierr);
1898 ierr = MatRealPart(a->B);CHKERRQ(ierr);
1899 PetscFunctionReturn(0);
1900 }
1901
MatImaginaryPart_MPIBAIJ(Mat A)1902 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1903 {
1904 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1905 PetscErrorCode ierr;
1906
1907 PetscFunctionBegin;
1908 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1909 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1910 PetscFunctionReturn(0);
1911 }
1912
MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)1913 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1914 {
1915 PetscErrorCode ierr;
1916 IS iscol_local;
1917 PetscInt csize;
1918
1919 PetscFunctionBegin;
1920 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1921 if (call == MAT_REUSE_MATRIX) {
1922 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1923 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1924 } else {
1925 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1926 }
1927 ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1928 if (call == MAT_INITIAL_MATRIX) {
1929 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1930 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1931 }
1932 PetscFunctionReturn(0);
1933 }
1934
1935 /*
1936 Not great since it makes two copies of the submatrix, first an SeqBAIJ
1937 in local and then by concatenating the local matrices the end result.
1938 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1939 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1940 */
MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat)1941 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1942 {
1943 PetscErrorCode ierr;
1944 PetscMPIInt rank,size;
1945 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1946 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1947 Mat M,Mreuse;
1948 MatScalar *vwork,*aa;
1949 MPI_Comm comm;
1950 IS isrow_new, iscol_new;
1951 Mat_SeqBAIJ *aij;
1952
1953 PetscFunctionBegin;
1954 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1955 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1956 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1957 /* The compression and expansion should be avoided. Doesn't point
1958 out errors, might change the indices, hence buggey */
1959 ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1960 ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1961
1962 if (call == MAT_REUSE_MATRIX) {
1963 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1964 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1965 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1966 } else {
1967 ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1968 }
1969 ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1970 ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1971 /*
1972 m - number of local rows
1973 n - number of columns (same on all processors)
1974 rstart - first row in new global matrix generated
1975 */
1976 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1977 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1978 m = m/bs;
1979 n = n/bs;
1980
1981 if (call == MAT_INITIAL_MATRIX) {
1982 aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1983 ii = aij->i;
1984 jj = aij->j;
1985
1986 /*
1987 Determine the number of non-zeros in the diagonal and off-diagonal
1988 portions of the matrix in order to do correct preallocation
1989 */
1990
1991 /* first get start and end of "diagonal" columns */
1992 if (csize == PETSC_DECIDE) {
1993 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1994 if (mglobal == n*bs) { /* square matrix */
1995 nlocal = m;
1996 } else {
1997 nlocal = n/size + ((n % size) > rank);
1998 }
1999 } else {
2000 nlocal = csize/bs;
2001 }
2002 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2003 rstart = rend - nlocal;
2004 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2005
2006 /* next, compute all the lengths */
2007 ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2008 for (i=0; i<m; i++) {
2009 jend = ii[i+1] - ii[i];
2010 olen = 0;
2011 dlen = 0;
2012 for (j=0; j<jend; j++) {
2013 if (*jj < rstart || *jj >= rend) olen++;
2014 else dlen++;
2015 jj++;
2016 }
2017 olens[i] = olen;
2018 dlens[i] = dlen;
2019 }
2020 ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2021 ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2022 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2023 ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2024 ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2025 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2026 } else {
2027 PetscInt ml,nl;
2028
2029 M = *newmat;
2030 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2031 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2032 ierr = MatZeroEntries(M);CHKERRQ(ierr);
2033 /*
2034 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2035 rather than the slower MatSetValues().
2036 */
2037 M->was_assembled = PETSC_TRUE;
2038 M->assembled = PETSC_FALSE;
2039 }
2040 ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2041 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2042 aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2043 ii = aij->i;
2044 jj = aij->j;
2045 aa = aij->a;
2046 for (i=0; i<m; i++) {
2047 row = rstart/bs + i;
2048 nz = ii[i+1] - ii[i];
2049 cwork = jj; jj += nz;
2050 vwork = aa; aa += nz*bs*bs;
2051 ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2052 }
2053
2054 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2055 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2056 *newmat = M;
2057
2058 /* save submatrix used in processor for next request */
2059 if (call == MAT_INITIAL_MATRIX) {
2060 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2061 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2062 }
2063 PetscFunctionReturn(0);
2064 }
2065
MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat * B)2066 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2067 {
2068 MPI_Comm comm,pcomm;
2069 PetscInt clocal_size,nrows;
2070 const PetscInt *rows;
2071 PetscMPIInt size;
2072 IS crowp,lcolp;
2073 PetscErrorCode ierr;
2074
2075 PetscFunctionBegin;
2076 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2077 /* make a collective version of 'rowp' */
2078 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2079 if (pcomm==comm) {
2080 crowp = rowp;
2081 } else {
2082 ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2083 ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2084 ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2085 ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2086 }
2087 ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2088 /* make a local version of 'colp' */
2089 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2090 ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2091 if (size==1) {
2092 lcolp = colp;
2093 } else {
2094 ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2095 }
2096 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2097 /* now we just get the submatrix */
2098 ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2099 ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2100 /* clean up */
2101 if (pcomm!=comm) {
2102 ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2103 }
2104 if (size>1) {
2105 ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2106 }
2107 PetscFunctionReturn(0);
2108 }
2109
MatGetGhosts_MPIBAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])2110 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2111 {
2112 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2113 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
2114
2115 PetscFunctionBegin;
2116 if (nghosts) *nghosts = B->nbs;
2117 if (ghosts) *ghosts = baij->garray;
2118 PetscFunctionReturn(0);
2119 }
2120
MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat * newmat)2121 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2122 {
2123 Mat B;
2124 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2125 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2126 Mat_SeqAIJ *b;
2127 PetscErrorCode ierr;
2128 PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL;
2129 PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2130 PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2131
2132 PetscFunctionBegin;
2133 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2134 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2135
2136 /* ----------------------------------------------------------------
2137 Tell every processor the number of nonzeros per row
2138 */
2139 ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2140 for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2141 lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2142 }
2143 ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2144 displs = recvcounts + size;
2145 for (i=0; i<size; i++) {
2146 recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2147 displs[i] = A->rmap->range[i]/bs;
2148 }
2149 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2150 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2151 #else
2152 sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2153 ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2154 #endif
2155 /* ---------------------------------------------------------------
2156 Create the sequential matrix of the same type as the local block diagonal
2157 */
2158 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2159 ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2160 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2161 ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2162 b = (Mat_SeqAIJ*)B->data;
2163
2164 /*--------------------------------------------------------------------
2165 Copy my part of matrix column indices over
2166 */
2167 sendcount = ad->nz + bd->nz;
2168 jsendbuf = b->j + b->i[rstarts[rank]/bs];
2169 a_jsendbuf = ad->j;
2170 b_jsendbuf = bd->j;
2171 n = A->rmap->rend/bs - A->rmap->rstart/bs;
2172 cnt = 0;
2173 for (i=0; i<n; i++) {
2174
2175 /* put in lower diagonal portion */
2176 m = bd->i[i+1] - bd->i[i];
2177 while (m > 0) {
2178 /* is it above diagonal (in bd (compressed) numbering) */
2179 if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2180 jsendbuf[cnt++] = garray[*b_jsendbuf++];
2181 m--;
2182 }
2183
2184 /* put in diagonal portion */
2185 for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2186 jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2187 }
2188
2189 /* put in upper diagonal portion */
2190 while (m-- > 0) {
2191 jsendbuf[cnt++] = garray[*b_jsendbuf++];
2192 }
2193 }
2194 if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2195
2196 /*--------------------------------------------------------------------
2197 Gather all column indices to all processors
2198 */
2199 for (i=0; i<size; i++) {
2200 recvcounts[i] = 0;
2201 for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2202 recvcounts[i] += lens[j];
2203 }
2204 }
2205 displs[0] = 0;
2206 for (i=1; i<size; i++) {
2207 displs[i] = displs[i-1] + recvcounts[i-1];
2208 }
2209 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2210 ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2211 #else
2212 ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2213 #endif
2214 /*--------------------------------------------------------------------
2215 Assemble the matrix into useable form (note numerical values not yet set)
2216 */
2217 /* set the b->ilen (length of each row) values */
2218 ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2219 /* set the b->i indices */
2220 b->i[0] = 0;
2221 for (i=1; i<=A->rmap->N/bs; i++) {
2222 b->i[i] = b->i[i-1] + lens[i-1];
2223 }
2224 ierr = PetscFree(lens);CHKERRQ(ierr);
2225 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2226 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2227 ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2228
2229 if (A->symmetric) {
2230 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2231 } else if (A->hermitian) {
2232 ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2233 } else if (A->structurally_symmetric) {
2234 ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2235 }
2236 *newmat = B;
2237 PetscFunctionReturn(0);
2238 }
2239
MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)2240 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2241 {
2242 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
2243 PetscErrorCode ierr;
2244 Vec bb1 = NULL;
2245
2246 PetscFunctionBegin;
2247 if (flag == SOR_APPLY_UPPER) {
2248 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2249 PetscFunctionReturn(0);
2250 }
2251
2252 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2253 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2254 }
2255
2256 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2257 if (flag & SOR_ZERO_INITIAL_GUESS) {
2258 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2259 its--;
2260 }
2261
2262 while (its--) {
2263 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2264 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2265
2266 /* update rhs: bb1 = bb - B*x */
2267 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2268 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2269
2270 /* local sweep */
2271 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2272 }
2273 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2274 if (flag & SOR_ZERO_INITIAL_GUESS) {
2275 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2276 its--;
2277 }
2278 while (its--) {
2279 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2280 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2281
2282 /* update rhs: bb1 = bb - B*x */
2283 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2284 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2285
2286 /* local sweep */
2287 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2288 }
2289 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2290 if (flag & SOR_ZERO_INITIAL_GUESS) {
2291 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2292 its--;
2293 }
2294 while (its--) {
2295 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2296 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2297
2298 /* update rhs: bb1 = bb - B*x */
2299 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2300 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2301
2302 /* local sweep */
2303 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2304 }
2305 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2306
2307 ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2308 PetscFunctionReturn(0);
2309 }
2310
MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal * norms)2311 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2312 {
2313 PetscErrorCode ierr;
2314 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data;
2315 PetscInt N,i,*garray = aij->garray;
2316 PetscInt ib,jb,bs = A->rmap->bs;
2317 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2318 MatScalar *a_val = a_aij->a;
2319 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2320 MatScalar *b_val = b_aij->a;
2321 PetscReal *work;
2322
2323 PetscFunctionBegin;
2324 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2325 ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2326 if (type == NORM_2) {
2327 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2328 for (jb=0; jb<bs; jb++) {
2329 for (ib=0; ib<bs; ib++) {
2330 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2331 a_val++;
2332 }
2333 }
2334 }
2335 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2336 for (jb=0; jb<bs; jb++) {
2337 for (ib=0; ib<bs; ib++) {
2338 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2339 b_val++;
2340 }
2341 }
2342 }
2343 } else if (type == NORM_1) {
2344 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2345 for (jb=0; jb<bs; jb++) {
2346 for (ib=0; ib<bs; ib++) {
2347 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2348 a_val++;
2349 }
2350 }
2351 }
2352 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2353 for (jb=0; jb<bs; jb++) {
2354 for (ib=0; ib<bs; ib++) {
2355 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2356 b_val++;
2357 }
2358 }
2359 }
2360 } else if (type == NORM_INFINITY) {
2361 for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2362 for (jb=0; jb<bs; jb++) {
2363 for (ib=0; ib<bs; ib++) {
2364 int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2365 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2366 a_val++;
2367 }
2368 }
2369 }
2370 for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2371 for (jb=0; jb<bs; jb++) {
2372 for (ib=0; ib<bs; ib++) {
2373 int col = garray[b_aij->j[i]] * bs + jb;
2374 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2375 b_val++;
2376 }
2377 }
2378 }
2379 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2380 if (type == NORM_INFINITY) {
2381 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2382 } else {
2383 ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2384 }
2385 ierr = PetscFree(work);CHKERRQ(ierr);
2386 if (type == NORM_2) {
2387 for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2388 }
2389 PetscFunctionReturn(0);
2390 }
2391
MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar ** values)2392 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2393 {
2394 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data;
2395 PetscErrorCode ierr;
2396
2397 PetscFunctionBegin;
2398 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2399 A->factorerrortype = a->A->factorerrortype;
2400 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2401 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row;
2402 PetscFunctionReturn(0);
2403 }
2404
MatShift_MPIBAIJ(Mat Y,PetscScalar a)2405 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2406 {
2407 PetscErrorCode ierr;
2408 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data;
2409 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data;
2410
2411 PetscFunctionBegin;
2412 if (!Y->preallocated) {
2413 ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2414 } else if (!aij->nz) {
2415 PetscInt nonew = aij->nonew;
2416 ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2417 aij->nonew = nonew;
2418 }
2419 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2420 PetscFunctionReturn(0);
2421 }
2422
MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool * missing,PetscInt * d)2423 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d)
2424 {
2425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2426 PetscErrorCode ierr;
2427
2428 PetscFunctionBegin;
2429 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2430 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2431 if (d) {
2432 PetscInt rstart;
2433 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2434 *d += rstart/A->rmap->bs;
2435
2436 }
2437 PetscFunctionReturn(0);
2438 }
2439
MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat * a)2440 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2441 {
2442 PetscFunctionBegin;
2443 *a = ((Mat_MPIBAIJ*)A->data)->A;
2444 PetscFunctionReturn(0);
2445 }
2446
2447 /* -------------------------------------------------------------------*/
2448 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2449 MatGetRow_MPIBAIJ,
2450 MatRestoreRow_MPIBAIJ,
2451 MatMult_MPIBAIJ,
2452 /* 4*/ MatMultAdd_MPIBAIJ,
2453 MatMultTranspose_MPIBAIJ,
2454 MatMultTransposeAdd_MPIBAIJ,
2455 NULL,
2456 NULL,
2457 NULL,
2458 /*10*/ NULL,
2459 NULL,
2460 NULL,
2461 MatSOR_MPIBAIJ,
2462 MatTranspose_MPIBAIJ,
2463 /*15*/ MatGetInfo_MPIBAIJ,
2464 MatEqual_MPIBAIJ,
2465 MatGetDiagonal_MPIBAIJ,
2466 MatDiagonalScale_MPIBAIJ,
2467 MatNorm_MPIBAIJ,
2468 /*20*/ MatAssemblyBegin_MPIBAIJ,
2469 MatAssemblyEnd_MPIBAIJ,
2470 MatSetOption_MPIBAIJ,
2471 MatZeroEntries_MPIBAIJ,
2472 /*24*/ MatZeroRows_MPIBAIJ,
2473 NULL,
2474 NULL,
2475 NULL,
2476 NULL,
2477 /*29*/ MatSetUp_MPIBAIJ,
2478 NULL,
2479 NULL,
2480 MatGetDiagonalBlock_MPIBAIJ,
2481 NULL,
2482 /*34*/ MatDuplicate_MPIBAIJ,
2483 NULL,
2484 NULL,
2485 NULL,
2486 NULL,
2487 /*39*/ MatAXPY_MPIBAIJ,
2488 MatCreateSubMatrices_MPIBAIJ,
2489 MatIncreaseOverlap_MPIBAIJ,
2490 MatGetValues_MPIBAIJ,
2491 MatCopy_MPIBAIJ,
2492 /*44*/ NULL,
2493 MatScale_MPIBAIJ,
2494 MatShift_MPIBAIJ,
2495 NULL,
2496 MatZeroRowsColumns_MPIBAIJ,
2497 /*49*/ NULL,
2498 NULL,
2499 NULL,
2500 NULL,
2501 NULL,
2502 /*54*/ MatFDColoringCreate_MPIXAIJ,
2503 NULL,
2504 MatSetUnfactored_MPIBAIJ,
2505 MatPermute_MPIBAIJ,
2506 MatSetValuesBlocked_MPIBAIJ,
2507 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2508 MatDestroy_MPIBAIJ,
2509 MatView_MPIBAIJ,
2510 NULL,
2511 NULL,
2512 /*64*/ NULL,
2513 NULL,
2514 NULL,
2515 NULL,
2516 NULL,
2517 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2518 NULL,
2519 NULL,
2520 NULL,
2521 NULL,
2522 /*74*/ NULL,
2523 MatFDColoringApply_BAIJ,
2524 NULL,
2525 NULL,
2526 NULL,
2527 /*79*/ NULL,
2528 NULL,
2529 NULL,
2530 NULL,
2531 MatLoad_MPIBAIJ,
2532 /*84*/ NULL,
2533 NULL,
2534 NULL,
2535 NULL,
2536 NULL,
2537 /*89*/ NULL,
2538 NULL,
2539 NULL,
2540 NULL,
2541 NULL,
2542 /*94*/ NULL,
2543 NULL,
2544 NULL,
2545 NULL,
2546 NULL,
2547 /*99*/ NULL,
2548 NULL,
2549 NULL,
2550 NULL,
2551 NULL,
2552 /*104*/NULL,
2553 MatRealPart_MPIBAIJ,
2554 MatImaginaryPart_MPIBAIJ,
2555 NULL,
2556 NULL,
2557 /*109*/NULL,
2558 NULL,
2559 NULL,
2560 NULL,
2561 MatMissingDiagonal_MPIBAIJ,
2562 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2563 NULL,
2564 MatGetGhosts_MPIBAIJ,
2565 NULL,
2566 NULL,
2567 /*119*/NULL,
2568 NULL,
2569 NULL,
2570 NULL,
2571 MatGetMultiProcBlock_MPIBAIJ,
2572 /*124*/NULL,
2573 MatGetColumnNorms_MPIBAIJ,
2574 MatInvertBlockDiagonal_MPIBAIJ,
2575 NULL,
2576 NULL,
2577 /*129*/ NULL,
2578 NULL,
2579 NULL,
2580 NULL,
2581 NULL,
2582 /*134*/ NULL,
2583 NULL,
2584 NULL,
2585 NULL,
2586 NULL,
2587 /*139*/ MatSetBlockSizes_Default,
2588 NULL,
2589 NULL,
2590 MatFDColoringSetUp_MPIXAIJ,
2591 NULL,
2592 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2593 };
2594
2595
2596 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2597 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2598
MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])2599 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2600 {
2601 PetscInt m,rstart,cstart,cend;
2602 PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
2603 const PetscInt *JJ =NULL;
2604 PetscScalar *values=NULL;
2605 PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2606 PetscErrorCode ierr;
2607 PetscBool nooffprocentries;
2608
2609 PetscFunctionBegin;
2610 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2611 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2612 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2613 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2614 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2615 m = B->rmap->n/bs;
2616 rstart = B->rmap->rstart/bs;
2617 cstart = B->cmap->rstart/bs;
2618 cend = B->cmap->rend/bs;
2619
2620 if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2621 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2622 for (i=0; i<m; i++) {
2623 nz = ii[i+1] - ii[i];
2624 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2625 nz_max = PetscMax(nz_max,nz);
2626 dlen = 0;
2627 olen = 0;
2628 JJ = jj + ii[i];
2629 for (j=0; j<nz; j++) {
2630 if (*JJ < cstart || *JJ >= cend) olen++;
2631 else dlen++;
2632 JJ++;
2633 }
2634 d_nnz[i] = dlen;
2635 o_nnz[i] = olen;
2636 }
2637 ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2638 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2639
2640 values = (PetscScalar*)V;
2641 if (!values) {
2642 ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2643 }
2644 for (i=0; i<m; i++) {
2645 PetscInt row = i + rstart;
2646 PetscInt ncols = ii[i+1] - ii[i];
2647 const PetscInt *icols = jj + ii[i];
2648 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2649 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2650 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2651 } else { /* block ordering does not match so we can only insert one block at a time. */
2652 PetscInt j;
2653 for (j=0; j<ncols; j++) {
2654 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2655 ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2656 }
2657 }
2658 }
2659
2660 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2661 nooffprocentries = B->nooffprocentries;
2662 B->nooffprocentries = PETSC_TRUE;
2663 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665 B->nooffprocentries = nooffprocentries;
2666
2667 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2668 PetscFunctionReturn(0);
2669 }
2670
2671 /*@C
2672 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2673
2674 Collective
2675
2676 Input Parameters:
2677 + B - the matrix
2678 . bs - the block size
2679 . i - the indices into j for the start of each local row (starts with zero)
2680 . j - the column indices for each local row (starts with zero) these must be sorted for each row
2681 - v - optional values in the matrix
2682
2683 Level: advanced
2684
2685 Notes:
2686 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs
2687 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2688 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set
2689 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2690 block column and the second index is over columns within a block.
2691
2692 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2693
2694 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2695 @*/
MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[],const PetscScalar v[])2696 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2697 {
2698 PetscErrorCode ierr;
2699
2700 PetscFunctionBegin;
2701 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2702 PetscValidType(B,1);
2703 PetscValidLogicalCollectiveInt(B,bs,2);
2704 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2705 PetscFunctionReturn(0);
2706 }
2707
MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt * d_nnz,PetscInt o_nz,const PetscInt * o_nnz)2708 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2709 {
2710 Mat_MPIBAIJ *b;
2711 PetscErrorCode ierr;
2712 PetscInt i;
2713 PetscMPIInt size;
2714
2715 PetscFunctionBegin;
2716 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2717 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2718 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2719 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2720
2721 if (d_nnz) {
2722 for (i=0; i<B->rmap->n/bs; i++) {
2723 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2724 }
2725 }
2726 if (o_nnz) {
2727 for (i=0; i<B->rmap->n/bs; i++) {
2728 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2729 }
2730 }
2731
2732 b = (Mat_MPIBAIJ*)B->data;
2733 b->bs2 = bs*bs;
2734 b->mbs = B->rmap->n/bs;
2735 b->nbs = B->cmap->n/bs;
2736 b->Mbs = B->rmap->N/bs;
2737 b->Nbs = B->cmap->N/bs;
2738
2739 for (i=0; i<=b->size; i++) {
2740 b->rangebs[i] = B->rmap->range[i]/bs;
2741 }
2742 b->rstartbs = B->rmap->rstart/bs;
2743 b->rendbs = B->rmap->rend/bs;
2744 b->cstartbs = B->cmap->rstart/bs;
2745 b->cendbs = B->cmap->rend/bs;
2746
2747 #if defined(PETSC_USE_CTABLE)
2748 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2749 #else
2750 ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2751 #endif
2752 ierr = PetscFree(b->garray);CHKERRQ(ierr);
2753 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2754 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2755
2756 /* Because the B will have been resized we simply destroy it and create a new one each time */
2757 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2758 ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2759 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2760 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2761 ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2762 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2763
2764 if (!B->preallocated) {
2765 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2766 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2767 ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2768 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2769 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2770 }
2771
2772 ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2773 ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2774 B->preallocated = PETSC_TRUE;
2775 B->was_assembled = PETSC_FALSE;
2776 B->assembled = PETSC_FALSE;
2777 PetscFunctionReturn(0);
2778 }
2779
2780 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2781 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2782
MatConvert_MPIBAIJ_MPIAdj(Mat B,MatType newtype,MatReuse reuse,Mat * adj)2783 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2784 {
2785 Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
2786 PetscErrorCode ierr;
2787 Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2788 PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2789 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2790
2791 PetscFunctionBegin;
2792 ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2793 ii[0] = 0;
2794 for (i=0; i<M; i++) {
2795 if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2796 if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2797 ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2798 /* remove one from count of matrix has diagonal */
2799 for (j=id[i]; j<id[i+1]; j++) {
2800 if (jd[j] == i) {ii[i+1]--;break;}
2801 }
2802 }
2803 ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2804 cnt = 0;
2805 for (i=0; i<M; i++) {
2806 for (j=io[i]; j<io[i+1]; j++) {
2807 if (garray[jo[j]] > rstart) break;
2808 jj[cnt++] = garray[jo[j]];
2809 }
2810 for (k=id[i]; k<id[i+1]; k++) {
2811 if (jd[k] != i) {
2812 jj[cnt++] = rstart + jd[k];
2813 }
2814 }
2815 for (; j<io[i+1]; j++) {
2816 jj[cnt++] = garray[jo[j]];
2817 }
2818 }
2819 ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2820 PetscFunctionReturn(0);
2821 }
2822
2823 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2824
2825 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2826
MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)2827 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2828 {
2829 PetscErrorCode ierr;
2830 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
2831 Mat B;
2832 Mat_MPIAIJ *b;
2833
2834 PetscFunctionBegin;
2835 if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2836
2837 if (reuse == MAT_REUSE_MATRIX) {
2838 B = *newmat;
2839 } else {
2840 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2841 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2842 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2843 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2844 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2845 ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2846 }
2847 b = (Mat_MPIAIJ*) B->data;
2848
2849 if (reuse == MAT_REUSE_MATRIX) {
2850 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2851 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2852 } else {
2853 ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2854 ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2855 ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2856 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2857 ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2858 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2860 }
2861 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863
2864 if (reuse == MAT_INPLACE_MATRIX) {
2865 ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2866 } else {
2867 *newmat = B;
2868 }
2869 PetscFunctionReturn(0);
2870 }
2871
2872 /*MC
2873 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2874
2875 Options Database Keys:
2876 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2877 . -mat_block_size <bs> - set the blocksize used to store the matrix
2878 - -mat_use_hash_table <fact>
2879
2880 Level: beginner
2881
2882 Notes:
2883 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2884 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2885
2886 .seealso: MatCreateBAIJ
2887 M*/
2888
2889 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2890
MatCreate_MPIBAIJ(Mat B)2891 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2892 {
2893 Mat_MPIBAIJ *b;
2894 PetscErrorCode ierr;
2895 PetscBool flg = PETSC_FALSE;
2896
2897 PetscFunctionBegin;
2898 ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
2899 B->data = (void*)b;
2900
2901 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2902 B->assembled = PETSC_FALSE;
2903
2904 B->insertmode = NOT_SET_VALUES;
2905 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2906 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2907
2908 /* build local table of row and column ownerships */
2909 ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2910
2911 /* build cache for off array entries formed */
2912 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2913
2914 b->donotstash = PETSC_FALSE;
2915 b->colmap = NULL;
2916 b->garray = NULL;
2917 b->roworiented = PETSC_TRUE;
2918
2919 /* stuff used in block assembly */
2920 b->barray = NULL;
2921
2922 /* stuff used for matrix vector multiply */
2923 b->lvec = NULL;
2924 b->Mvctx = NULL;
2925
2926 /* stuff for MatGetRow() */
2927 b->rowindices = NULL;
2928 b->rowvalues = NULL;
2929 b->getrowactive = PETSC_FALSE;
2930
2931 /* hash table stuff */
2932 b->ht = NULL;
2933 b->hd = NULL;
2934 b->ht_size = 0;
2935 b->ht_flag = PETSC_FALSE;
2936 b->ht_fact = 0;
2937 b->ht_total_ct = 0;
2938 b->ht_insert_ct = 0;
2939
2940 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2941 b->ijonly = PETSC_FALSE;
2942
2943
2944 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2945 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2946 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2947 #if defined(PETSC_HAVE_HYPRE)
2948 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2949 #endif
2950 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2951 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2952 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2953 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2954 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2955 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2956 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2957 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2958
2959 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2960 ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2961 if (flg) {
2962 PetscReal fact = 1.39;
2963 ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2964 ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2965 if (fact <= 1.0) fact = 1.39;
2966 ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2967 ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2968 }
2969 ierr = PetscOptionsEnd();CHKERRQ(ierr);
2970 PetscFunctionReturn(0);
2971 }
2972
2973 /*MC
2974 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2975
2976 This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2977 and MATMPIBAIJ otherwise.
2978
2979 Options Database Keys:
2980 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2981
2982 Level: beginner
2983
2984 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2985 M*/
2986
2987 /*@C
2988 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2989 (block compressed row). For good matrix assembly performance
2990 the user should preallocate the matrix storage by setting the parameters
2991 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2992 performance can be increased by more than a factor of 50.
2993
2994 Collective on Mat
2995
2996 Input Parameters:
2997 + B - the matrix
2998 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2999 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3000 . d_nz - number of block nonzeros per block row in diagonal portion of local
3001 submatrix (same for all local rows)
3002 . d_nnz - array containing the number of block nonzeros in the various block rows
3003 of the in diagonal portion of the local (possibly different for each block
3004 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and
3005 set it even if it is zero.
3006 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
3007 submatrix (same for all local rows).
3008 - o_nnz - array containing the number of nonzeros in the various block rows of the
3009 off-diagonal portion of the local submatrix (possibly different for
3010 each block row) or NULL.
3011
3012 If the *_nnz parameter is given then the *_nz parameter is ignored
3013
3014 Options Database Keys:
3015 + -mat_block_size - size of the blocks to use
3016 - -mat_use_hash_table <fact>
3017
3018 Notes:
3019 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3020 than it must be used on all processors that share the object for that argument.
3021
3022 Storage Information:
3023 For a square global matrix we define each processor's diagonal portion
3024 to be its local rows and the corresponding columns (a square submatrix);
3025 each processor's off-diagonal portion encompasses the remainder of the
3026 local matrix (a rectangular submatrix).
3027
3028 The user can specify preallocated storage for the diagonal part of
3029 the local submatrix with either d_nz or d_nnz (not both). Set
3030 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3031 memory allocation. Likewise, specify preallocated storage for the
3032 off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3033
3034 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3035 the figure below we depict these three local rows and all columns (0-11).
3036
3037 .vb
3038 0 1 2 3 4 5 6 7 8 9 10 11
3039 --------------------------
3040 row 3 |o o o d d d o o o o o o
3041 row 4 |o o o d d d o o o o o o
3042 row 5 |o o o d d d o o o o o o
3043 --------------------------
3044 .ve
3045
3046 Thus, any entries in the d locations are stored in the d (diagonal)
3047 submatrix, and any entries in the o locations are stored in the
3048 o (off-diagonal) submatrix. Note that the d and the o submatrices are
3049 stored simply in the MATSEQBAIJ format for compressed row storage.
3050
3051 Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3052 and o_nz should indicate the number of block nonzeros per row in the o matrix.
3053 In general, for PDE problems in which most nonzeros are near the diagonal,
3054 one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3055 or you will get TERRIBLE performance; see the users' manual chapter on
3056 matrices.
3057
3058 You can call MatGetInfo() to get information on how effective the preallocation was;
3059 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3060 You can also run with the option -info and look for messages with the string
3061 malloc in them to see if additional memory allocation was needed.
3062
3063 Level: intermediate
3064
3065 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3066 @*/
MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])3067 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3068 {
3069 PetscErrorCode ierr;
3070
3071 PetscFunctionBegin;
3072 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3073 PetscValidType(B,1);
3074 PetscValidLogicalCollectiveInt(B,bs,2);
3075 ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3076 PetscFunctionReturn(0);
3077 }
3078
3079 /*@C
3080 MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3081 (block compressed row). For good matrix assembly performance
3082 the user should preallocate the matrix storage by setting the parameters
3083 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3084 performance can be increased by more than a factor of 50.
3085
3086 Collective
3087
3088 Input Parameters:
3089 + comm - MPI communicator
3090 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3091 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3092 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3093 This value should be the same as the local size used in creating the
3094 y vector for the matrix-vector product y = Ax.
3095 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3096 This value should be the same as the local size used in creating the
3097 x vector for the matrix-vector product y = Ax.
3098 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3099 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3100 . d_nz - number of nonzero blocks per block row in diagonal portion of local
3101 submatrix (same for all local rows)
3102 . d_nnz - array containing the number of nonzero blocks in the various block rows
3103 of the in diagonal portion of the local (possibly different for each block
3104 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry
3105 and set it even if it is zero.
3106 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
3107 submatrix (same for all local rows).
3108 - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3109 off-diagonal portion of the local submatrix (possibly different for
3110 each block row) or NULL.
3111
3112 Output Parameter:
3113 . A - the matrix
3114
3115 Options Database Keys:
3116 + -mat_block_size - size of the blocks to use
3117 - -mat_use_hash_table <fact>
3118
3119 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3120 MatXXXXSetPreallocation() paradigm instead of this routine directly.
3121 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3122
3123 Notes:
3124 If the *_nnz parameter is given then the *_nz parameter is ignored
3125
3126 A nonzero block is any block that as 1 or more nonzeros in it
3127
3128 The user MUST specify either the local or global matrix dimensions
3129 (possibly both).
3130
3131 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3132 than it must be used on all processors that share the object for that argument.
3133
3134 Storage Information:
3135 For a square global matrix we define each processor's diagonal portion
3136 to be its local rows and the corresponding columns (a square submatrix);
3137 each processor's off-diagonal portion encompasses the remainder of the
3138 local matrix (a rectangular submatrix).
3139
3140 The user can specify preallocated storage for the diagonal part of
3141 the local submatrix with either d_nz or d_nnz (not both). Set
3142 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3143 memory allocation. Likewise, specify preallocated storage for the
3144 off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3145
3146 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3147 the figure below we depict these three local rows and all columns (0-11).
3148
3149 .vb
3150 0 1 2 3 4 5 6 7 8 9 10 11
3151 --------------------------
3152 row 3 |o o o d d d o o o o o o
3153 row 4 |o o o d d d o o o o o o
3154 row 5 |o o o d d d o o o o o o
3155 --------------------------
3156 .ve
3157
3158 Thus, any entries in the d locations are stored in the d (diagonal)
3159 submatrix, and any entries in the o locations are stored in the
3160 o (off-diagonal) submatrix. Note that the d and the o submatrices are
3161 stored simply in the MATSEQBAIJ format for compressed row storage.
3162
3163 Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3164 and o_nz should indicate the number of block nonzeros per row in the o matrix.
3165 In general, for PDE problems in which most nonzeros are near the diagonal,
3166 one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3167 or you will get TERRIBLE performance; see the users' manual chapter on
3168 matrices.
3169
3170 Level: intermediate
3171
3172 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3173 @*/
MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)3174 PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3175 {
3176 PetscErrorCode ierr;
3177 PetscMPIInt size;
3178
3179 PetscFunctionBegin;
3180 ierr = MatCreate(comm,A);CHKERRQ(ierr);
3181 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3182 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3183 if (size > 1) {
3184 ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3185 ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3186 } else {
3187 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3188 ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3189 }
3190 PetscFunctionReturn(0);
3191 }
3192
MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)3193 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3194 {
3195 Mat mat;
3196 Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3197 PetscErrorCode ierr;
3198 PetscInt len=0;
3199
3200 PetscFunctionBegin;
3201 *newmat = NULL;
3202 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3203 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3204 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3205
3206 mat->factortype = matin->factortype;
3207 mat->preallocated = PETSC_TRUE;
3208 mat->assembled = PETSC_TRUE;
3209 mat->insertmode = NOT_SET_VALUES;
3210
3211 a = (Mat_MPIBAIJ*)mat->data;
3212 mat->rmap->bs = matin->rmap->bs;
3213 a->bs2 = oldmat->bs2;
3214 a->mbs = oldmat->mbs;
3215 a->nbs = oldmat->nbs;
3216 a->Mbs = oldmat->Mbs;
3217 a->Nbs = oldmat->Nbs;
3218
3219 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3220 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3221
3222 a->size = oldmat->size;
3223 a->rank = oldmat->rank;
3224 a->donotstash = oldmat->donotstash;
3225 a->roworiented = oldmat->roworiented;
3226 a->rowindices = NULL;
3227 a->rowvalues = NULL;
3228 a->getrowactive = PETSC_FALSE;
3229 a->barray = NULL;
3230 a->rstartbs = oldmat->rstartbs;
3231 a->rendbs = oldmat->rendbs;
3232 a->cstartbs = oldmat->cstartbs;
3233 a->cendbs = oldmat->cendbs;
3234
3235 /* hash table stuff */
3236 a->ht = NULL;
3237 a->hd = NULL;
3238 a->ht_size = 0;
3239 a->ht_flag = oldmat->ht_flag;
3240 a->ht_fact = oldmat->ht_fact;
3241 a->ht_total_ct = 0;
3242 a->ht_insert_ct = 0;
3243
3244 ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3245 if (oldmat->colmap) {
3246 #if defined(PETSC_USE_CTABLE)
3247 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249 ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3250 ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3251 ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3252 #endif
3253 } else a->colmap = NULL;
3254
3255 if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3256 ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3257 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3258 ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3259 } else a->garray = NULL;
3260
3261 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3262 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3264 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3266
3267 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3268 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3269 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3270 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3271 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3272 *newmat = mat;
3273 PetscFunctionReturn(0);
3274 }
3275
3276 /* Used for both MPIBAIJ and MPISBAIJ matrices */
MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)3277 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3278 {
3279 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3280 PetscInt *rowidxs,*colidxs,rs,cs,ce;
3281 PetscScalar *matvals;
3282 PetscErrorCode ierr;
3283
3284 PetscFunctionBegin;
3285 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3286
3287 /* read in matrix header */
3288 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3289 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3290 M = header[1]; N = header[2]; nz = header[3];
3291 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3292 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3293 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3294
3295 /* set block sizes from the viewer's .info file */
3296 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3297 /* set local sizes if not set already */
3298 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3299 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3300 /* set global sizes if not set already */
3301 if (mat->rmap->N < 0) mat->rmap->N = M;
3302 if (mat->cmap->N < 0) mat->cmap->N = N;
3303 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3304 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3305
3306 /* check if the matrix sizes are correct */
3307 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3308 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3309 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3310 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3311 ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3312 ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3313 mbs = m/bs; nbs = n/bs;
3314
3315 /* read in row lengths and build row indices */
3316 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3317 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3318 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3319 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3320 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3321
3322 /* read in column indices and matrix values */
3323 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3324 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3325 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3326
3327 { /* preallocate matrix storage */
3328 PetscBT bt; /* helper bit set to count diagonal nonzeros */
3329 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3330 PetscBool sbaij,done;
3331 PetscInt *d_nnz,*o_nnz;
3332
3333 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3334 ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3335 ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3336 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3337 for (i=0; i<mbs; i++) {
3338 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3339 ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3340 for (k=0; k<bs; k++) {
3341 PetscInt row = bs*i + k;
3342 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3343 PetscInt col = colidxs[j];
3344 if (!sbaij || col >= row) {
3345 if (col >= cs && col < ce) {
3346 if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3347 } else {
3348 ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3349 if (done) o_nnz[i]++;
3350 }
3351 }
3352 }
3353 }
3354 }
3355 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3356 ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3357 ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3358 ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3359 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3360 }
3361
3362 /* store matrix values */
3363 for (i=0; i<m; i++) {
3364 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3365 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3366 }
3367
3368 ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3369 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3370 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3371 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3372 PetscFunctionReturn(0);
3373 }
3374
MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)3375 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3376 {
3377 PetscErrorCode ierr;
3378 PetscBool isbinary;
3379
3380 PetscFunctionBegin;
3381 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3382 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3383 ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3384 PetscFunctionReturn(0);
3385 }
3386
3387 /*@
3388 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3389
3390 Input Parameters:
3391 + mat - the matrix
3392 - fact - factor
3393
3394 Not Collective, each process can use a different factor
3395
3396 Level: advanced
3397
3398 Notes:
3399 This can also be set by the command line option: -mat_use_hash_table <fact>
3400
3401 .seealso: MatSetOption()
3402 @*/
MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)3403 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3404 {
3405 PetscErrorCode ierr;
3406
3407 PetscFunctionBegin;
3408 ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3409 PetscFunctionReturn(0);
3410 }
3411
MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)3412 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3413 {
3414 Mat_MPIBAIJ *baij;
3415
3416 PetscFunctionBegin;
3417 baij = (Mat_MPIBAIJ*)mat->data;
3418 baij->ht_fact = fact;
3419 PetscFunctionReturn(0);
3420 }
3421
MatMPIBAIJGetSeqBAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])3422 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3423 {
3424 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3425 PetscBool flg;
3426 PetscErrorCode ierr;
3427
3428 PetscFunctionBegin;
3429 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3430 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3431 if (Ad) *Ad = a->A;
3432 if (Ao) *Ao = a->B;
3433 if (colmap) *colmap = a->garray;
3434 PetscFunctionReturn(0);
3435 }
3436
3437 /*
3438 Special version for direct calls from Fortran (to eliminate two function call overheads
3439 */
3440 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3441 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3442 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3443 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3444 #endif
3445
3446 /*@C
3447 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3448
3449 Collective on Mat
3450
3451 Input Parameters:
3452 + mat - the matrix
3453 . min - number of input rows
3454 . im - input rows
3455 . nin - number of input columns
3456 . in - input columns
3457 . v - numerical values input
3458 - addvin - INSERT_VALUES or ADD_VALUES
3459
3460 Notes:
3461 This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3462
3463 Level: advanced
3464
3465 .seealso: MatSetValuesBlocked()
3466 @*/
matmpibaijsetvaluesblocked_(Mat * matin,PetscInt * min,const PetscInt im[],PetscInt * nin,const PetscInt in[],const MatScalar v[],InsertMode * addvin)3467 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3468 {
3469 /* convert input arguments to C version */
3470 Mat mat = *matin;
3471 PetscInt m = *min, n = *nin;
3472 InsertMode addv = *addvin;
3473
3474 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
3475 const MatScalar *value;
3476 MatScalar *barray = baij->barray;
3477 PetscBool roworiented = baij->roworiented;
3478 PetscErrorCode ierr;
3479 PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
3480 PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3481 PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3482
3483 PetscFunctionBegin;
3484 /* tasks normally handled by MatSetValuesBlocked() */
3485 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3486 else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3487 if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3488 if (mat->assembled) {
3489 mat->was_assembled = PETSC_TRUE;
3490 mat->assembled = PETSC_FALSE;
3491 }
3492 ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3493
3494
3495 if (!barray) {
3496 ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3497 baij->barray = barray;
3498 }
3499
3500 if (roworiented) stepval = (n-1)*bs;
3501 else stepval = (m-1)*bs;
3502
3503 for (i=0; i<m; i++) {
3504 if (im[i] < 0) continue;
3505 if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3506 if (im[i] >= rstart && im[i] < rend) {
3507 row = im[i] - rstart;
3508 for (j=0; j<n; j++) {
3509 /* If NumCol = 1 then a copy is not required */
3510 if ((roworiented) && (n == 1)) {
3511 barray = (MatScalar*)v + i*bs2;
3512 } else if ((!roworiented) && (m == 1)) {
3513 barray = (MatScalar*)v + j*bs2;
3514 } else { /* Here a copy is required */
3515 if (roworiented) {
3516 value = v + i*(stepval+bs)*bs + j*bs;
3517 } else {
3518 value = v + j*(stepval+bs)*bs + i*bs;
3519 }
3520 for (ii=0; ii<bs; ii++,value+=stepval) {
3521 for (jj=0; jj<bs; jj++) {
3522 *barray++ = *value++;
3523 }
3524 }
3525 barray -=bs2;
3526 }
3527
3528 if (in[j] >= cstart && in[j] < cend) {
3529 col = in[j] - cstart;
3530 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3531 } else if (in[j] < 0) continue;
3532 else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3533 else {
3534 if (mat->was_assembled) {
3535 if (!baij->colmap) {
3536 ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3537 }
3538
3539 #if defined(PETSC_USE_DEBUG)
3540 #if defined(PETSC_USE_CTABLE)
3541 { PetscInt data;
3542 ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3543 if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3544 }
3545 #else
3546 if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3547 #endif
3548 #endif
3549 #if defined(PETSC_USE_CTABLE)
3550 ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3551 col = (col - 1)/bs;
3552 #else
3553 col = (baij->colmap[in[j]] - 1)/bs;
3554 #endif
3555 if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3556 ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3557 col = in[j];
3558 }
3559 } else col = in[j];
3560 ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3561 }
3562 }
3563 } else {
3564 if (!baij->donotstash) {
3565 if (roworiented) {
3566 ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3567 } else {
3568 ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3569 }
3570 }
3571 }
3572 }
3573
3574 /* task normally handled by MatSetValuesBlocked() */
3575 ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3576 PetscFunctionReturn(0);
3577 }
3578
3579 /*@
3580 MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3581 CSR format the local rows.
3582
3583 Collective
3584
3585 Input Parameters:
3586 + comm - MPI communicator
3587 . bs - the block size, only a block size of 1 is supported
3588 . m - number of local rows (Cannot be PETSC_DECIDE)
3589 . n - This value should be the same as the local size used in creating the
3590 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3591 calculated if N is given) For square matrices n is almost always m.
3592 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3593 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3594 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3595 . j - column indices
3596 - a - matrix values
3597
3598 Output Parameter:
3599 . mat - the matrix
3600
3601 Level: intermediate
3602
3603 Notes:
3604 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3605 thus you CANNOT change the matrix entries by changing the values of a[] after you have
3606 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3607
3608 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3609 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3610 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory
3611 with column-major ordering within blocks.
3612
3613 The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3614
3615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3616 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3617 @*/
MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)3618 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3619 {
3620 PetscErrorCode ierr;
3621
3622 PetscFunctionBegin;
3623 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3624 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3625 ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3626 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3627 ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3628 ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3629 ierr = MatSetUp(*mat);CHKERRQ(ierr);
3630 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3631 ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3632 ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3633 PetscFunctionReturn(0);
3634 }
3635
MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)3636 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3637 {
3638 PetscErrorCode ierr;
3639 PetscInt m,N,i,rstart,nnz,Ii,bs,cbs;
3640 PetscInt *indx;
3641 PetscScalar *values;
3642
3643 PetscFunctionBegin;
3644 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3645 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3646 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data;
3647 PetscInt *dnz,*onz,mbs,Nbs,nbs;
3648 PetscInt *bindx,rmax=a->rmax,j;
3649 PetscMPIInt rank,size;
3650
3651 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3652 mbs = m/bs; Nbs = N/cbs;
3653 if (n == PETSC_DECIDE) {
3654 ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3655 }
3656 nbs = n/cbs;
3657
3658 ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3659 ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3660
3661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3662 ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3663 if (rank == size-1) {
3664 /* Check sum(nbs) = Nbs */
3665 if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3666 }
3667
3668 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3669 for (i=0; i<mbs; i++) {
3670 ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3671 nnz = nnz/bs;
3672 for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3673 ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3674 ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3675 }
3676 ierr = PetscFree(bindx);CHKERRQ(ierr);
3677
3678 ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3679 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3680 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3681 ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3682 ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3683 ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3684 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3685 }
3686
3687 /* numeric phase */
3688 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3689 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3690
3691 for (i=0; i<m; i++) {
3692 ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3693 Ii = i + rstart;
3694 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3695 ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3696 }
3697 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699 PetscFunctionReturn(0);
3700 }
3701