1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/vecscatterimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8
9 /*MC
10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11
12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13 and MATMPIAIJ otherwise. As a result, for single process communicators,
14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15 for communicators controlling multiple processes. It is recommended that you call both of
16 the above preallocation routines for simplicity.
17
18 Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20
21 Developer Notes:
22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23 enough exist.
24
25 Level: beginner
26
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29
30 /*MC
31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32
33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34 and MATMPIAIJCRL otherwise. As a result, for single process communicators,
35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36 for communicators controlling multiple processes. It is recommended that you call both of
37 the above preallocation routines for simplicity.
38
39 Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41
42 Level: beginner
43
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46
MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
50 PetscErrorCode ierr;
51
52 PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54 A->boundtocpu = flg;
55 #endif
56 if (a->A) {
57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58 }
59 if (a->B) {
60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61 }
62 PetscFunctionReturn(0);
63 }
64
65
MatSetBlockSizes_MPIAIJ(Mat M,PetscInt rbs,PetscInt cbs)66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68 PetscErrorCode ierr;
69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
70
71 PetscFunctionBegin;
72 if (mat->A) {
73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75 }
76 PetscFunctionReturn(0);
77 }
78
MatFindNonzeroRows_MPIAIJ(Mat M,IS * keptrows)79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81 PetscErrorCode ierr;
82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data;
83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data;
84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data;
85 const PetscInt *ia,*ib;
86 const MatScalar *aa,*bb;
87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows;
88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart;
89
90 PetscFunctionBegin;
91 *keptrows = NULL;
92 ia = a->i;
93 ib = b->i;
94 for (i=0; i<m; i++) {
95 na = ia[i+1] - ia[i];
96 nb = ib[i+1] - ib[i];
97 if (!na && !nb) {
98 cnt++;
99 goto ok1;
100 }
101 aa = a->a + ia[i];
102 for (j=0; j<na; j++) {
103 if (aa[j] != 0.0) goto ok1;
104 }
105 bb = b->a + ib[i];
106 for (j=0; j <nb; j++) {
107 if (bb[j] != 0.0) goto ok1;
108 }
109 cnt++;
110 ok1:;
111 }
112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
113 if (!n0rows) PetscFunctionReturn(0);
114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
115 cnt = 0;
116 for (i=0; i<m; i++) {
117 na = ia[i+1] - ia[i];
118 nb = ib[i+1] - ib[i];
119 if (!na && !nb) continue;
120 aa = a->a + ia[i];
121 for (j=0; j<na;j++) {
122 if (aa[j] != 0.0) {
123 rows[cnt++] = rstart + i;
124 goto ok2;
125 }
126 }
127 bb = b->a + ib[i];
128 for (j=0; j<nb; j++) {
129 if (bb[j] != 0.0) {
130 rows[cnt++] = rstart + i;
131 goto ok2;
132 }
133 }
134 ok2:;
135 }
136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
137 PetscFunctionReturn(0);
138 }
139
MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
141 {
142 PetscErrorCode ierr;
143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data;
144 PetscBool cong;
145
146 PetscFunctionBegin;
147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
148 if (Y->assembled && cong) {
149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
150 } else {
151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
152 }
153 PetscFunctionReturn(0);
154 }
155
MatFindZeroDiagonals_MPIAIJ(Mat M,IS * zrows)156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
157 {
158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data;
159 PetscErrorCode ierr;
160 PetscInt i,rstart,nrows,*rows;
161
162 PetscFunctionBegin;
163 *zrows = NULL;
164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
166 for (i=0; i<nrows; i++) rows[i] += rstart;
167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
168 PetscFunctionReturn(0);
169 }
170
MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal * norms)171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
172 {
173 PetscErrorCode ierr;
174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data;
175 PetscInt i,n,*garray = aij->garray;
176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data;
177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data;
178 PetscReal *work;
179
180 PetscFunctionBegin;
181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
183 if (type == NORM_2) {
184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
186 }
187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
189 }
190 } else if (type == NORM_1) {
191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
193 }
194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
196 }
197 } else if (type == NORM_INFINITY) {
198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
200 }
201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
203 }
204
205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
206 if (type == NORM_INFINITY) {
207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
208 } else {
209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
210 }
211 ierr = PetscFree(work);CHKERRQ(ierr);
212 if (type == NORM_2) {
213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
214 }
215 PetscFunctionReturn(0);
216 }
217
MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS * is)218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
219 {
220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
221 IS sis,gis;
222 PetscErrorCode ierr;
223 const PetscInt *isis,*igis;
224 PetscInt n,*iis,nsis,ngis,rstart,i;
225
226 PetscFunctionBegin;
227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
233
234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
237 n = ngis + nsis;
238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
240 for (i=0; i<n; i++) iis[i] += rstart;
241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
242
243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
245 ierr = ISDestroy(&sis);CHKERRQ(ierr);
246 ierr = ISDestroy(&gis);CHKERRQ(ierr);
247 PetscFunctionReturn(0);
248 }
249
250 /*
251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from
252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
253
254 Only for square matrices
255
256 Used by a preconditioner, hence PETSC_EXTERN
257 */
MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat * inmat)258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
259 {
260 PetscMPIInt rank,size;
261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
262 PetscErrorCode ierr;
263 Mat mat;
264 Mat_SeqAIJ *gmata;
265 PetscMPIInt tag;
266 MPI_Status status;
267 PetscBool aij;
268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL;
269
270 PetscFunctionBegin;
271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
273 if (!rank) {
274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
276 }
277 if (reuse == MAT_INITIAL_MATRIX) {
278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
287
288 rowners[0] = 0;
289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
290 rstart = rowners[rank];
291 rend = rowners[rank+1];
292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
293 if (!rank) {
294 gmata = (Mat_SeqAIJ*) gmat->data;
295 /* send row lengths to all processors */
296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
297 for (i=1; i<size; i++) {
298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
299 }
300 /* determine number diagonal and off-diagonal counts */
301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
303 jj = 0;
304 for (i=0; i<m; i++) {
305 for (j=0; j<dlens[i]; j++) {
306 if (gmata->j[jj] < rstart) ld[i]++;
307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
308 jj++;
309 }
310 }
311 /* send column indices to other processes */
312 for (i=1; i<size; i++) {
313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
316 }
317
318 /* send numerical values to other processes */
319 for (i=1; i<size; i++) {
320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
322 }
323 gmataa = gmata->a;
324 gmataj = gmata->j;
325
326 } else {
327 /* receive row lengths */
328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
329 /* receive column indices */
330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
333 /* determine number diagonal and off-diagonal counts */
334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr);
335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
336 jj = 0;
337 for (i=0; i<m; i++) {
338 for (j=0; j<dlens[i]; j++) {
339 if (gmataj[jj] < rstart) ld[i]++;
340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
341 jj++;
342 }
343 }
344 /* receive numerical values */
345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr);
346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
347 }
348 /* set preallocation */
349 for (i=0; i<m; i++) {
350 dlens[i] -= olens[i];
351 }
352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
354
355 for (i=0; i<m; i++) {
356 dlens[i] += olens[i];
357 }
358 cnt = 0;
359 for (i=0; i<m; i++) {
360 row = rstart + i;
361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
362 cnt += dlens[i];
363 }
364 if (rank) {
365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
366 }
367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
368 ierr = PetscFree(rowners);CHKERRQ(ierr);
369
370 ((Mat_MPIAIJ*)(mat->data))->ld = ld;
371
372 *inmat = mat;
373 } else { /* column indices are already set; only need to move over numerical values from process 0 */
374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
376 mat = *inmat;
377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
378 if (!rank) {
379 /* send numerical values to other processes */
380 gmata = (Mat_SeqAIJ*) gmat->data;
381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
382 gmataa = gmata->a;
383 for (i=1; i<size; i++) {
384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
386 }
387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
388 } else {
389 /* receive numerical values from process 0*/
390 nz = Ad->nz + Ao->nz;
391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
393 }
394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
395 ld = ((Mat_MPIAIJ*)(mat->data))->ld;
396 ad = Ad->a;
397 ao = Ao->a;
398 if (mat->rmap->n) {
399 i = 0;
400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
402 }
403 for (i=1; i<mat->rmap->n; i++) {
404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz;
405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz;
406 }
407 i--;
408 if (mat->rmap->n) {
409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr);
410 }
411 if (rank) {
412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
413 }
414 }
415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
417 PetscFunctionReturn(0);
418 }
419
420 /*
421 Local utility routine that creates a mapping from the global column
422 number to the local number in the off-diagonal part of the local
423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
424 a slightly higher hash table cost; without it it is not scalable (each processor
425 has an order N integer array but is fast to acess.
426 */
MatCreateColmap_MPIAIJ_Private(Mat mat)427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
428 {
429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
430 PetscErrorCode ierr;
431 PetscInt n = aij->B->cmap->n,i;
432
433 PetscFunctionBegin;
434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
435 #if defined(PETSC_USE_CTABLE)
436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
437 for (i=0; i<n; i++) {
438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
439 }
440 #else
441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
444 #endif
445 PetscFunctionReturn(0);
446 }
447
448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \
449 { \
450 if (col <= lastcol1) low1 = 0; \
451 else high1 = nrow1; \
452 lastcol1 = col;\
453 while (high1-low1 > 5) { \
454 t = (low1+high1)/2; \
455 if (rp1[t] > col) high1 = t; \
456 else low1 = t; \
457 } \
458 for (_i=low1; _i<high1; _i++) { \
459 if (rp1[_i] > col) break; \
460 if (rp1[_i] == col) { \
461 if (addv == ADD_VALUES) { \
462 ap1[_i] += value; \
463 /* Not sure LogFlops will slow dow the code or not */ \
464 (void)PetscLogFlops(1.0); \
465 } \
466 else ap1[_i] = value; \
467 inserted = PETSC_TRUE; \
468 goto a_noinsert; \
469 } \
470 } \
471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \
473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
475 N = nrow1++ - 1; a->nz++; high1++; \
476 /* shift up all the later entries in this row */ \
477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
479 rp1[_i] = col; \
480 ap1[_i] = value; \
481 A->nonzerostate++;\
482 a_noinsert: ; \
483 ailen[row] = nrow1; \
484 }
485
486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
487 { \
488 if (col <= lastcol2) low2 = 0; \
489 else high2 = nrow2; \
490 lastcol2 = col; \
491 while (high2-low2 > 5) { \
492 t = (low2+high2)/2; \
493 if (rp2[t] > col) high2 = t; \
494 else low2 = t; \
495 } \
496 for (_i=low2; _i<high2; _i++) { \
497 if (rp2[_i] > col) break; \
498 if (rp2[_i] == col) { \
499 if (addv == ADD_VALUES) { \
500 ap2[_i] += value; \
501 (void)PetscLogFlops(1.0); \
502 } \
503 else ap2[_i] = value; \
504 inserted = PETSC_TRUE; \
505 goto b_noinsert; \
506 } \
507 } \
508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
512 N = nrow2++ - 1; b->nz++; high2++; \
513 /* shift up all the later entries in this row */ \
514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
516 rp2[_i] = col; \
517 ap2[_i] = value; \
518 B->nonzerostate++; \
519 b_noinsert: ; \
520 bilen[row] = nrow2; \
521 }
522
MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
524 {
525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
527 PetscErrorCode ierr;
528 PetscInt l,*garray = mat->garray,diag;
529
530 PetscFunctionBegin;
531 /* code only works for square matrices A */
532
533 /* find size of row to the left of the diagonal part */
534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
535 row = row - diag;
536 for (l=0; l<b->i[row+1]-b->i[row]; l++) {
537 if (garray[b->j[b->i[row]+l]] > diag) break;
538 }
539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
540
541 /* diagonal part */
542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
543
544 /* right of diagonal part */
545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
546 #if defined(PETSC_HAVE_DEVICE)
547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
548 #endif
549 PetscFunctionReturn(0);
550 }
551
MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
553 {
554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
555 PetscScalar value = 0.0;
556 PetscErrorCode ierr;
557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
559 PetscBool roworiented = aij->roworiented;
560
561 /* Some Variables required in the macro */
562 Mat A = aij->A;
563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
565 MatScalar *aa = a->a;
566 PetscBool ignorezeroentries = a->ignorezeroentries;
567 Mat B = aij->B;
568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
570 MatScalar *ba = b->a;
571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
572 * cannot use "#if defined" inside a macro. */
573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
574
575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
576 PetscInt nonew;
577 MatScalar *ap1,*ap2;
578
579 PetscFunctionBegin;
580 for (i=0; i<m; i++) {
581 if (im[i] < 0) continue;
582 if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
583 if (im[i] >= rstart && im[i] < rend) {
584 row = im[i] - rstart;
585 lastcol1 = -1;
586 rp1 = aj + ai[row];
587 ap1 = aa + ai[row];
588 rmax1 = aimax[row];
589 nrow1 = ailen[row];
590 low1 = 0;
591 high1 = nrow1;
592 lastcol2 = -1;
593 rp2 = bj + bi[row];
594 ap2 = ba + bi[row];
595 rmax2 = bimax[row];
596 nrow2 = bilen[row];
597 low2 = 0;
598 high2 = nrow2;
599
600 for (j=0; j<n; j++) {
601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m];
602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
603 if (in[j] >= cstart && in[j] < cend) {
604 col = in[j] - cstart;
605 nonew = a->nonew;
606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
607 #if defined(PETSC_HAVE_DEVICE)
608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
609 #endif
610 } else if (in[j] < 0) continue;
611 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
612 else {
613 if (mat->was_assembled) {
614 if (!aij->colmap) {
615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
616 }
617 #if defined(PETSC_USE_CTABLE)
618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
619 col--;
620 #else
621 col = aij->colmap[in[j]] - 1;
622 #endif
623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
625 col = in[j];
626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
627 B = aij->B;
628 b = (Mat_SeqAIJ*)B->data;
629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
630 rp2 = bj + bi[row];
631 ap2 = ba + bi[row];
632 rmax2 = bimax[row];
633 nrow2 = bilen[row];
634 low2 = 0;
635 high2 = nrow2;
636 bm = aij->B->rmap->n;
637 ba = b->a;
638 inserted = PETSC_FALSE;
639 } else if (col < 0) {
640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
643 }
644 } else col = in[j];
645 nonew = b->nonew;
646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
647 #if defined(PETSC_HAVE_DEVICE)
648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
649 #endif
650 }
651 }
652 } else {
653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
654 if (!aij->donotstash) {
655 mat->assembled = PETSC_FALSE;
656 if (roworiented) {
657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
658 } else {
659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
660 }
661 }
662 }
663 }
664 PetscFunctionReturn(0);
665 }
666
667 /*
668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
671 */
MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
673 {
674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
675 Mat A = aij->A; /* diagonal part of the matrix */
676 Mat B = aij->B; /* offdiagonal part of the matrix */
677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col;
680 PetscInt *ailen = a->ilen,*aj = a->j;
681 PetscInt *bilen = b->ilen,*bj = b->j;
682 PetscInt am = aij->A->rmap->n,j;
683 PetscInt diag_so_far = 0,dnz;
684 PetscInt offd_so_far = 0,onz;
685
686 PetscFunctionBegin;
687 /* Iterate over all rows of the matrix */
688 for (j=0; j<am; j++) {
689 dnz = onz = 0;
690 /* Iterate over all non-zero columns of the current row */
691 for (col=mat_i[j]; col<mat_i[j+1]; col++) {
692 /* If column is in the diagonal */
693 if (mat_j[col] >= cstart && mat_j[col] < cend) {
694 aj[diag_so_far++] = mat_j[col] - cstart;
695 dnz++;
696 } else { /* off-diagonal entries */
697 bj[offd_so_far++] = mat_j[col];
698 onz++;
699 }
700 }
701 ailen[j] = dnz;
702 bilen[j] = onz;
703 }
704 PetscFunctionReturn(0);
705 }
706
707 /*
708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
712 would not be true and the more complex MatSetValues_MPIAIJ has to be used.
713 */
MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
715 {
716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
717 Mat A = aij->A; /* diagonal part of the matrix */
718 Mat B = aij->B; /* offdiagonal part of the matrix */
719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend;
723 PetscInt *ailen = a->ilen,*aj = a->j;
724 PetscInt *bilen = b->ilen,*bj = b->j;
725 PetscInt am = aij->A->rmap->n,j;
726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
728 PetscScalar *aa = a->a,*ba = b->a;
729
730 PetscFunctionBegin;
731 /* Iterate over all rows of the matrix */
732 for (j=0; j<am; j++) {
733 dnz_row = onz_row = 0;
734 rowstart_offd = full_offd_i[j];
735 rowstart_diag = full_diag_i[j];
736 /* Iterate over all non-zero columns of the current row */
737 for (col=mat_i[j]; col<mat_i[j+1]; col++) {
738 /* If column is in the diagonal */
739 if (mat_j[col] >= cstart && mat_j[col] < cend) {
740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
741 aa[rowstart_diag+dnz_row] = mat_a[col];
742 dnz_row++;
743 } else { /* off-diagonal entries */
744 bj[rowstart_offd+onz_row] = mat_j[col];
745 ba[rowstart_offd+onz_row] = mat_a[col];
746 onz_row++;
747 }
748 }
749 ailen[j] = dnz_row;
750 bilen[j] = onz_row;
751 }
752 PetscFunctionReturn(0);
753 }
754
MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
756 {
757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
758 PetscErrorCode ierr;
759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
761
762 PetscFunctionBegin;
763 for (i=0; i<m; i++) {
764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
766 if (idxm[i] >= rstart && idxm[i] < rend) {
767 row = idxm[i] - rstart;
768 for (j=0; j<n; j++) {
769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
771 if (idxn[j] >= cstart && idxn[j] < cend) {
772 col = idxn[j] - cstart;
773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774 } else {
775 if (!aij->colmap) {
776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
777 }
778 #if defined(PETSC_USE_CTABLE)
779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
780 col--;
781 #else
782 col = aij->colmap[idxn[j]] - 1;
783 #endif
784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
785 else {
786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787 }
788 }
789 }
790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
791 }
792 PetscFunctionReturn(0);
793 }
794
MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)795 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
796 {
797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
798 PetscErrorCode ierr;
799 PetscInt nstash,reallocs;
800
801 PetscFunctionBegin;
802 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
803
804 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
805 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
806 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
807 PetscFunctionReturn(0);
808 }
809
MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)810 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
811 {
812 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
813 PetscErrorCode ierr;
814 PetscMPIInt n;
815 PetscInt i,j,rstart,ncols,flg;
816 PetscInt *row,*col;
817 PetscBool other_disassembled;
818 PetscScalar *val;
819
820 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
821
822 PetscFunctionBegin;
823 if (!aij->donotstash && !mat->nooffprocentries) {
824 while (1) {
825 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
826 if (!flg) break;
827
828 for (i=0; i<n;) {
829 /* Now identify the consecutive vals belonging to the same row */
830 for (j=i,rstart=row[j]; j<n; j++) {
831 if (row[j] != rstart) break;
832 }
833 if (j < n) ncols = j-i;
834 else ncols = n-i;
835 /* Now assemble all these values with a single function call */
836 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
837 i = j;
838 }
839 }
840 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
841 }
842 #if defined(PETSC_HAVE_DEVICE)
843 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
844 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
845 if (mat->boundtocpu) {
846 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
847 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
848 }
849 #endif
850 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
851 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
852
853 /* determine if any processor has disassembled, if so we must
854 also disassemble ourself, in order that we may reassemble. */
855 /*
856 if nonzero structure of submatrix B cannot change then we know that
857 no processor disassembled thus we can skip this stuff
858 */
859 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
860 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
861 if (mat->was_assembled && !other_disassembled) {
862 #if defined(PETSC_HAVE_DEVICE)
863 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
864 #endif
865 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
866 }
867 }
868 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
870 }
871 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
872 #if defined(PETSC_HAVE_DEVICE)
873 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
874 #endif
875 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
876 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
877
878 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
879
880 aij->rowvalues = NULL;
881
882 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
883
884 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
885 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
886 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
887 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
888 }
889 #if defined(PETSC_HAVE_DEVICE)
890 mat->offloadmask = PETSC_OFFLOAD_BOTH;
891 #endif
892 PetscFunctionReturn(0);
893 }
894
MatZeroEntries_MPIAIJ(Mat A)895 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
896 {
897 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
898 PetscErrorCode ierr;
899
900 PetscFunctionBegin;
901 ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
902 ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
903 PetscFunctionReturn(0);
904 }
905
MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)906 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data;
909 PetscObjectState sA, sB;
910 PetscInt *lrows;
911 PetscInt r, len;
912 PetscBool cong, lch, gch;
913 PetscErrorCode ierr;
914
915 PetscFunctionBegin;
916 /* get locally owned rows */
917 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
918 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
919 /* fix right hand side if needed */
920 if (x && b) {
921 const PetscScalar *xx;
922 PetscScalar *bb;
923
924 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
925 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
926 ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
927 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
928 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
929 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
930 }
931
932 sA = mat->A->nonzerostate;
933 sB = mat->B->nonzerostate;
934
935 if (diag != 0.0 && cong) {
936 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
937 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
938 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
939 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
940 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
941 PetscInt nnwA, nnwB;
942 PetscBool nnzA, nnzB;
943
944 nnwA = aijA->nonew;
945 nnwB = aijB->nonew;
946 nnzA = aijA->keepnonzeropattern;
947 nnzB = aijB->keepnonzeropattern;
948 if (!nnzA) {
949 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
950 aijA->nonew = 0;
951 }
952 if (!nnzB) {
953 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
954 aijB->nonew = 0;
955 }
956 /* Must zero here before the next loop */
957 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
958 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
959 for (r = 0; r < len; ++r) {
960 const PetscInt row = lrows[r] + A->rmap->rstart;
961 if (row >= A->cmap->N) continue;
962 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
963 }
964 aijA->nonew = nnwA;
965 aijB->nonew = nnwB;
966 } else {
967 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
968 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
969 }
970 ierr = PetscFree(lrows);CHKERRQ(ierr);
971 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973
974 /* reduce nonzerostate */
975 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
976 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
977 if (gch) A->nonzerostate++;
978 PetscFunctionReturn(0);
979 }
980
MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)981 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
982 {
983 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
984 PetscErrorCode ierr;
985 PetscMPIInt n = A->rmap->n;
986 PetscInt i,j,r,m,len = 0;
987 PetscInt *lrows,*owners = A->rmap->range;
988 PetscMPIInt p = 0;
989 PetscSFNode *rrows;
990 PetscSF sf;
991 const PetscScalar *xx;
992 PetscScalar *bb,*mask;
993 Vec xmask,lmask;
994 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data;
995 const PetscInt *aj, *ii,*ridx;
996 PetscScalar *aa;
997
998 PetscFunctionBegin;
999 /* Create SF where leaves are input rows and roots are owned rows */
1000 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1001 for (r = 0; r < n; ++r) lrows[r] = -1;
1002 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1003 for (r = 0; r < N; ++r) {
1004 const PetscInt idx = rows[r];
1005 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1006 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1007 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1008 }
1009 rrows[r].rank = p;
1010 rrows[r].index = rows[r] - owners[p];
1011 }
1012 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1013 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1014 /* Collect flags for rows to be zeroed */
1015 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1016 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1017 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1018 /* Compress and put in row numbers */
1019 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1020 /* zero diagonal part of matrix */
1021 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1022 /* handle off diagonal part of matrix */
1023 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026 for (i=0; i<len; i++) bb[lrows[i]] = 1;
1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031 if (x && b) { /* this code is buggy when the row and column layout don't match */
1032 PetscBool cong;
1033
1034 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1035 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
1036 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1039 ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1040 }
1041 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1042 /* remove zeroed rows of off diagonal matrix */
1043 ii = aij->i;
1044 for (i=0; i<len; i++) {
1045 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
1046 }
1047 /* loop over all elements of off process part of matrix zeroing removed columns*/
1048 if (aij->compressedrow.use) {
1049 m = aij->compressedrow.nrows;
1050 ii = aij->compressedrow.i;
1051 ridx = aij->compressedrow.rindex;
1052 for (i=0; i<m; i++) {
1053 n = ii[i+1] - ii[i];
1054 aj = aij->j + ii[i];
1055 aa = aij->a + ii[i];
1056
1057 for (j=0; j<n; j++) {
1058 if (PetscAbsScalar(mask[*aj])) {
1059 if (b) bb[*ridx] -= *aa*xx[*aj];
1060 *aa = 0.0;
1061 }
1062 aa++;
1063 aj++;
1064 }
1065 ridx++;
1066 }
1067 } else { /* do not use compressed row format */
1068 m = l->B->rmap->n;
1069 for (i=0; i<m; i++) {
1070 n = ii[i+1] - ii[i];
1071 aj = aij->j + ii[i];
1072 aa = aij->a + ii[i];
1073 for (j=0; j<n; j++) {
1074 if (PetscAbsScalar(mask[*aj])) {
1075 if (b) bb[i] -= *aa*xx[*aj];
1076 *aa = 0.0;
1077 }
1078 aa++;
1079 aj++;
1080 }
1081 }
1082 }
1083 if (x && b) {
1084 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1085 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1086 }
1087 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1088 ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1089 ierr = PetscFree(lrows);CHKERRQ(ierr);
1090
1091 /* only change matrix nonzero state if pattern was allowed to be changed */
1092 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
1093 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1094 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1095 }
1096 PetscFunctionReturn(0);
1097 }
1098
MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)1099 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1100 {
1101 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1102 PetscErrorCode ierr;
1103 PetscInt nt;
1104 VecScatter Mvctx = a->Mvctx;
1105
1106 PetscFunctionBegin;
1107 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1109 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1111 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1113 PetscFunctionReturn(0);
1114 }
1115
MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)1116 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1117 {
1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1119 PetscErrorCode ierr;
1120
1121 PetscFunctionBegin;
1122 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1123 PetscFunctionReturn(0);
1124 }
1125
MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)1126 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127 {
1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1129 PetscErrorCode ierr;
1130 VecScatter Mvctx = a->Mvctx;
1131
1132 PetscFunctionBegin;
1133 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
1134 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1136 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1138 PetscFunctionReturn(0);
1139 }
1140
MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)1141 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1142 {
1143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1144 PetscErrorCode ierr;
1145
1146 PetscFunctionBegin;
1147 /* do nondiagonal part */
1148 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1149 /* do local part */
1150 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1151 /* add partial results together */
1152 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1153 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1154 PetscFunctionReturn(0);
1155 }
1156
MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool * f)1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f)
1158 {
1159 MPI_Comm comm;
1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1162 IS Me,Notme;
1163 PetscErrorCode ierr;
1164 PetscInt M,N,first,last,*notme,i;
1165 PetscBool lf;
1166 PetscMPIInt size;
1167
1168 PetscFunctionBegin;
1169 /* Easy test: symmetric diagonal block */
1170 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1171 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1172 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1173 if (!*f) PetscFunctionReturn(0);
1174 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1175 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1176 if (size == 1) PetscFunctionReturn(0);
1177
1178 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1179 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1180 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1181 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr);
1182 for (i=0; i<first; i++) notme[i] = i;
1183 for (i=last; i<M; i++) notme[i-last+first] = i;
1184 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1185 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1186 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1187 Aoff = Aoffs[0];
1188 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1189 Boff = Boffs[0];
1190 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1191 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1192 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1193 ierr = ISDestroy(&Me);CHKERRQ(ierr);
1194 ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1195 ierr = PetscFree(notme);CHKERRQ(ierr);
1196 PetscFunctionReturn(0);
1197 }
1198
MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool * f)1199 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f)
1200 {
1201 PetscErrorCode ierr;
1202
1203 PetscFunctionBegin;
1204 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1205 PetscFunctionReturn(0);
1206 }
1207
MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)1208 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209 {
1210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1211 PetscErrorCode ierr;
1212
1213 PetscFunctionBegin;
1214 /* do nondiagonal part */
1215 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1216 /* do local part */
1217 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1218 /* add partial results together */
1219 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1221 PetscFunctionReturn(0);
1222 }
1223
1224 /*
1225 This only works correctly for square matrices where the subblock A->A is the
1226 diagonal block
1227 */
MatGetDiagonal_MPIAIJ(Mat A,Vec v)1228 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1229 {
1230 PetscErrorCode ierr;
1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1232
1233 PetscFunctionBegin;
1234 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1235 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1236 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1237 PetscFunctionReturn(0);
1238 }
1239
MatScale_MPIAIJ(Mat A,PetscScalar aa)1240 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1241 {
1242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1243 PetscErrorCode ierr;
1244
1245 PetscFunctionBegin;
1246 ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1247 ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1248 PetscFunctionReturn(0);
1249 }
1250
MatDestroy_MPIAIJ(Mat mat)1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1252 {
1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1254 PetscErrorCode ierr;
1255
1256 PetscFunctionBegin;
1257 #if defined(PETSC_USE_LOG)
1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1259 #endif
1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1264 #if defined(PETSC_USE_CTABLE)
1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1266 #else
1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1268 #endif
1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1272 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1273 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1274 ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1275 ierr = PetscFree(mat->data);CHKERRQ(ierr);
1276
1277 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1278 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1279
1280 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1290 #if defined(PETSC_HAVE_CUDA)
1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1292 #endif
1293 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1295 #endif
1296 #if defined(PETSC_HAVE_ELEMENTAL)
1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1298 #endif
1299 #if defined(PETSC_HAVE_SCALAPACK)
1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1301 #endif
1302 #if defined(PETSC_HAVE_HYPRE)
1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1305 #endif
1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1307 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1308 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1309 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1310 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1311 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1312 #if defined(PETSC_HAVE_MKL_SPARSE)
1313 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1314 #endif
1315 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1316 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1317 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1318 PetscFunctionReturn(0);
1319 }
1320
MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)1321 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1322 {
1323 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1324 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data;
1325 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data;
1326 const PetscInt *garray = aij->garray;
1327 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1328 PetscInt *rowlens;
1329 PetscInt *colidxs;
1330 PetscScalar *matvals;
1331 PetscErrorCode ierr;
1332
1333 PetscFunctionBegin;
1334 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1335
1336 M = mat->rmap->N;
1337 N = mat->cmap->N;
1338 m = mat->rmap->n;
1339 rs = mat->rmap->rstart;
1340 cs = mat->cmap->rstart;
1341 nz = A->nz + B->nz;
1342
1343 /* write matrix header */
1344 header[0] = MAT_FILE_CLASSID;
1345 header[1] = M; header[2] = N; header[3] = nz;
1346 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1347 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1348
1349 /* fill in and store row lengths */
1350 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1351 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1352 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1353 ierr = PetscFree(rowlens);CHKERRQ(ierr);
1354
1355 /* fill in and store column indices */
1356 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1357 for (cnt=0, i=0; i<m; i++) {
1358 for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1359 if (garray[B->j[jb]] > cs) break;
1360 colidxs[cnt++] = garray[B->j[jb]];
1361 }
1362 for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1363 colidxs[cnt++] = A->j[ja] + cs;
1364 for (; jb<B->i[i+1]; jb++)
1365 colidxs[cnt++] = garray[B->j[jb]];
1366 }
1367 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1368 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1369 ierr = PetscFree(colidxs);CHKERRQ(ierr);
1370
1371 /* fill in and store nonzero values */
1372 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1373 for (cnt=0, i=0; i<m; i++) {
1374 for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1375 if (garray[B->j[jb]] > cs) break;
1376 matvals[cnt++] = B->a[jb];
1377 }
1378 for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1379 matvals[cnt++] = A->a[ja];
1380 for (; jb<B->i[i+1]; jb++)
1381 matvals[cnt++] = B->a[jb];
1382 }
1383 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1384 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1385 ierr = PetscFree(matvals);CHKERRQ(ierr);
1386
1387 /* write block size option to the viewer's .info file */
1388 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1389 PetscFunctionReturn(0);
1390 }
1391
1392 #include <petscdraw.h>
MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)1393 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1394 {
1395 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1396 PetscErrorCode ierr;
1397 PetscMPIInt rank = aij->rank,size = aij->size;
1398 PetscBool isdraw,iascii,isbinary;
1399 PetscViewer sviewer;
1400 PetscViewerFormat format;
1401
1402 PetscFunctionBegin;
1403 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1404 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1406 if (iascii) {
1407 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1408 if (format == PETSC_VIEWER_LOAD_BALANCE) {
1409 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1410 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1411 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1412 for (i=0; i<(PetscInt)size; i++) {
1413 nmax = PetscMax(nmax,nz[i]);
1414 nmin = PetscMin(nmin,nz[i]);
1415 navg += nz[i];
1416 }
1417 ierr = PetscFree(nz);CHKERRQ(ierr);
1418 navg = navg/size;
1419 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1420 PetscFunctionReturn(0);
1421 }
1422 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1423 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1424 MatInfo info;
1425 PetscBool inodes;
1426
1427 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1428 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1429 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1430 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1431 if (!inodes) {
1432 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1433 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1434 } else {
1435 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1436 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1437 }
1438 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1439 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1440 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1441 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1442 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1443 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1444 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1445 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1446 PetscFunctionReturn(0);
1447 } else if (format == PETSC_VIEWER_ASCII_INFO) {
1448 PetscInt inodecount,inodelimit,*inodes;
1449 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1450 if (inodes) {
1451 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1452 } else {
1453 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1454 }
1455 PetscFunctionReturn(0);
1456 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1457 PetscFunctionReturn(0);
1458 }
1459 } else if (isbinary) {
1460 if (size == 1) {
1461 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1462 ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1463 } else {
1464 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1465 }
1466 PetscFunctionReturn(0);
1467 } else if (iascii && size == 1) {
1468 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1469 ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1470 PetscFunctionReturn(0);
1471 } else if (isdraw) {
1472 PetscDraw draw;
1473 PetscBool isnull;
1474 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1475 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1476 if (isnull) PetscFunctionReturn(0);
1477 }
1478
1479 { /* assemble the entire matrix onto first processor */
1480 Mat A = NULL, Av;
1481 IS isrow,iscol;
1482
1483 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1484 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1485 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1486 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1487 /* The commented code uses MatCreateSubMatrices instead */
1488 /*
1489 Mat *AA, A = NULL, Av;
1490 IS isrow,iscol;
1491
1492 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1493 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1494 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1495 if (!rank) {
1496 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1497 A = AA[0];
1498 Av = AA[0];
1499 }
1500 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1501 */
1502 ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1503 ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1504 /*
1505 Everyone has to call to draw the matrix since the graphics waits are
1506 synchronized across all processors that share the PetscDraw object
1507 */
1508 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1509 if (!rank) {
1510 if (((PetscObject)mat)->name) {
1511 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1512 }
1513 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1514 }
1515 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1516 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1517 ierr = MatDestroy(&A);CHKERRQ(ierr);
1518 }
1519 PetscFunctionReturn(0);
1520 }
1521
MatView_MPIAIJ(Mat mat,PetscViewer viewer)1522 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1523 {
1524 PetscErrorCode ierr;
1525 PetscBool iascii,isdraw,issocket,isbinary;
1526
1527 PetscFunctionBegin;
1528 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1529 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1530 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1531 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1532 if (iascii || isdraw || isbinary || issocket) {
1533 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1534 }
1535 PetscFunctionReturn(0);
1536 }
1537
MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)1538 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1539 {
1540 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1541 PetscErrorCode ierr;
1542 Vec bb1 = NULL;
1543 PetscBool hasop;
1544
1545 PetscFunctionBegin;
1546 if (flag == SOR_APPLY_UPPER) {
1547 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1548 PetscFunctionReturn(0);
1549 }
1550
1551 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1552 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1553 }
1554
1555 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1556 if (flag & SOR_ZERO_INITIAL_GUESS) {
1557 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1558 its--;
1559 }
1560
1561 while (its--) {
1562 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1564
1565 /* update rhs: bb1 = bb - B*x */
1566 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1567 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1568
1569 /* local sweep */
1570 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1571 }
1572 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1573 if (flag & SOR_ZERO_INITIAL_GUESS) {
1574 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1575 its--;
1576 }
1577 while (its--) {
1578 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580
1581 /* update rhs: bb1 = bb - B*x */
1582 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1583 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1584
1585 /* local sweep */
1586 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1587 }
1588 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1589 if (flag & SOR_ZERO_INITIAL_GUESS) {
1590 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1591 its--;
1592 }
1593 while (its--) {
1594 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1595 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596
1597 /* update rhs: bb1 = bb - B*x */
1598 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1599 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1600
1601 /* local sweep */
1602 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1603 }
1604 } else if (flag & SOR_EISENSTAT) {
1605 Vec xx1;
1606
1607 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1608 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1609
1610 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1611 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612 if (!mat->diag) {
1613 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1614 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1615 }
1616 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1617 if (hasop) {
1618 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1619 } else {
1620 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1621 }
1622 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1623
1624 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1625
1626 /* local sweep */
1627 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1628 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1629 ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1630 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1631
1632 ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1633
1634 matin->factorerrortype = mat->A->factorerrortype;
1635 PetscFunctionReturn(0);
1636 }
1637
MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat * B)1638 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1639 {
1640 Mat aA,aB,Aperm;
1641 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1642 PetscScalar *aa,*ba;
1643 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1644 PetscSF rowsf,sf;
1645 IS parcolp = NULL;
1646 PetscBool done;
1647 PetscErrorCode ierr;
1648
1649 PetscFunctionBegin;
1650 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1651 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1652 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1653 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1654
1655 /* Invert row permutation to find out where my rows should go */
1656 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1657 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1658 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1659 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1660 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1661 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1662
1663 /* Invert column permutation to find out where my columns should go */
1664 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1665 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1666 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1667 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1668 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1669 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1670 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1671
1672 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1673 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1674 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1675
1676 /* Find out where my gcols should go */
1677 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1678 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1679 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1680 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1681 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1682 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1683 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1684 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1685
1686 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1687 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1688 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1689 for (i=0; i<m; i++) {
1690 PetscInt row = rdest[i];
1691 PetscMPIInt rowner;
1692 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1693 for (j=ai[i]; j<ai[i+1]; j++) {
1694 PetscInt col = cdest[aj[j]];
1695 PetscMPIInt cowner;
1696 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1697 if (rowner == cowner) dnnz[i]++;
1698 else onnz[i]++;
1699 }
1700 for (j=bi[i]; j<bi[i+1]; j++) {
1701 PetscInt col = gcdest[bj[j]];
1702 PetscMPIInt cowner;
1703 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1704 if (rowner == cowner) dnnz[i]++;
1705 else onnz[i]++;
1706 }
1707 }
1708 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1709 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1710 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1711 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1712 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1713
1714 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1715 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1716 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1717 for (i=0; i<m; i++) {
1718 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1719 PetscInt j0,rowlen;
1720 rowlen = ai[i+1] - ai[i];
1721 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1722 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1723 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1724 }
1725 rowlen = bi[i+1] - bi[i];
1726 for (j0=j=0; j<rowlen; j0=j) {
1727 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1728 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1729 }
1730 }
1731 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1732 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1733 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1734 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1735 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1736 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1737 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1738 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1739 ierr = PetscFree(gcdest);CHKERRQ(ierr);
1740 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1741 *B = Aperm;
1742 PetscFunctionReturn(0);
1743 }
1744
MatGetGhosts_MPIAIJ(Mat mat,PetscInt * nghosts,const PetscInt * ghosts[])1745 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1746 {
1747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1748 PetscErrorCode ierr;
1749
1750 PetscFunctionBegin;
1751 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1752 if (ghosts) *ghosts = aij->garray;
1753 PetscFunctionReturn(0);
1754 }
1755
MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo * info)1756 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1757 {
1758 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1759 Mat A = mat->A,B = mat->B;
1760 PetscErrorCode ierr;
1761 PetscLogDouble isend[5],irecv[5];
1762
1763 PetscFunctionBegin;
1764 info->block_size = 1.0;
1765 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1766
1767 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1768 isend[3] = info->memory; isend[4] = info->mallocs;
1769
1770 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1771
1772 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1773 isend[3] += info->memory; isend[4] += info->mallocs;
1774 if (flag == MAT_LOCAL) {
1775 info->nz_used = isend[0];
1776 info->nz_allocated = isend[1];
1777 info->nz_unneeded = isend[2];
1778 info->memory = isend[3];
1779 info->mallocs = isend[4];
1780 } else if (flag == MAT_GLOBAL_MAX) {
1781 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1782
1783 info->nz_used = irecv[0];
1784 info->nz_allocated = irecv[1];
1785 info->nz_unneeded = irecv[2];
1786 info->memory = irecv[3];
1787 info->mallocs = irecv[4];
1788 } else if (flag == MAT_GLOBAL_SUM) {
1789 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1790
1791 info->nz_used = irecv[0];
1792 info->nz_allocated = irecv[1];
1793 info->nz_unneeded = irecv[2];
1794 info->memory = irecv[3];
1795 info->mallocs = irecv[4];
1796 }
1797 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1798 info->fill_ratio_needed = 0;
1799 info->factor_mallocs = 0;
1800 PetscFunctionReturn(0);
1801 }
1802
MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)1803 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1804 {
1805 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1806 PetscErrorCode ierr;
1807
1808 PetscFunctionBegin;
1809 switch (op) {
1810 case MAT_NEW_NONZERO_LOCATIONS:
1811 case MAT_NEW_NONZERO_ALLOCATION_ERR:
1812 case MAT_UNUSED_NONZERO_LOCATION_ERR:
1813 case MAT_KEEP_NONZERO_PATTERN:
1814 case MAT_NEW_NONZERO_LOCATION_ERR:
1815 case MAT_USE_INODES:
1816 case MAT_IGNORE_ZERO_ENTRIES:
1817 MatCheckPreallocated(A,1);
1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1819 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1820 break;
1821 case MAT_ROW_ORIENTED:
1822 MatCheckPreallocated(A,1);
1823 a->roworiented = flg;
1824
1825 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1826 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1827 break;
1828 case MAT_NEW_DIAGONALS:
1829 case MAT_SORTED_FULL:
1830 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1831 break;
1832 case MAT_IGNORE_OFF_PROC_ENTRIES:
1833 a->donotstash = flg;
1834 break;
1835 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1836 case MAT_SPD:
1837 case MAT_SYMMETRIC:
1838 case MAT_STRUCTURALLY_SYMMETRIC:
1839 case MAT_HERMITIAN:
1840 case MAT_SYMMETRY_ETERNAL:
1841 break;
1842 case MAT_SUBMAT_SINGLEIS:
1843 A->submat_singleis = flg;
1844 break;
1845 case MAT_STRUCTURE_ONLY:
1846 /* The option is handled directly by MatSetOption() */
1847 break;
1848 default:
1849 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1850 }
1851 PetscFunctionReturn(0);
1852 }
1853
MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1854 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1855 {
1856 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1857 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1858 PetscErrorCode ierr;
1859 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1860 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1861 PetscInt *cmap,*idx_p;
1862
1863 PetscFunctionBegin;
1864 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1865 mat->getrowactive = PETSC_TRUE;
1866
1867 if (!mat->rowvalues && (idx || v)) {
1868 /*
1869 allocate enough space to hold information from the longest row.
1870 */
1871 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1872 PetscInt max = 1,tmp;
1873 for (i=0; i<matin->rmap->n; i++) {
1874 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1875 if (max < tmp) max = tmp;
1876 }
1877 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1878 }
1879
1880 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1881 lrow = row - rstart;
1882
1883 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1884 if (!v) {pvA = NULL; pvB = NULL;}
1885 if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1886 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1887 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1888 nztot = nzA + nzB;
1889
1890 cmap = mat->garray;
1891 if (v || idx) {
1892 if (nztot) {
1893 /* Sort by increasing column numbers, assuming A and B already sorted */
1894 PetscInt imark = -1;
1895 if (v) {
1896 *v = v_p = mat->rowvalues;
1897 for (i=0; i<nzB; i++) {
1898 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1899 else break;
1900 }
1901 imark = i;
1902 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1903 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1904 }
1905 if (idx) {
1906 *idx = idx_p = mat->rowindices;
1907 if (imark > -1) {
1908 for (i=0; i<imark; i++) {
1909 idx_p[i] = cmap[cworkB[i]];
1910 }
1911 } else {
1912 for (i=0; i<nzB; i++) {
1913 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1914 else break;
1915 }
1916 imark = i;
1917 }
1918 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1919 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1920 }
1921 } else {
1922 if (idx) *idx = NULL;
1923 if (v) *v = NULL;
1924 }
1925 }
1926 *nz = nztot;
1927 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1928 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1929 PetscFunctionReturn(0);
1930 }
1931
MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1933 {
1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1935
1936 PetscFunctionBegin;
1937 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1938 aij->getrowactive = PETSC_FALSE;
1939 PetscFunctionReturn(0);
1940 }
1941
MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal * norm)1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1943 {
1944 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1945 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1946 PetscErrorCode ierr;
1947 PetscInt i,j,cstart = mat->cmap->rstart;
1948 PetscReal sum = 0.0;
1949 MatScalar *v;
1950
1951 PetscFunctionBegin;
1952 if (aij->size == 1) {
1953 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1954 } else {
1955 if (type == NORM_FROBENIUS) {
1956 v = amat->a;
1957 for (i=0; i<amat->nz; i++) {
1958 sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959 }
1960 v = bmat->a;
1961 for (i=0; i<bmat->nz; i++) {
1962 sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963 }
1964 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1965 *norm = PetscSqrtReal(*norm);
1966 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1967 } else if (type == NORM_1) { /* max column norm */
1968 PetscReal *tmp,*tmp2;
1969 PetscInt *jj,*garray = aij->garray;
1970 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1971 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1972 *norm = 0.0;
1973 v = amat->a; jj = amat->j;
1974 for (j=0; j<amat->nz; j++) {
1975 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++;
1976 }
1977 v = bmat->a; jj = bmat->j;
1978 for (j=0; j<bmat->nz; j++) {
1979 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1980 }
1981 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1982 for (j=0; j<mat->cmap->N; j++) {
1983 if (tmp2[j] > *norm) *norm = tmp2[j];
1984 }
1985 ierr = PetscFree(tmp);CHKERRQ(ierr);
1986 ierr = PetscFree(tmp2);CHKERRQ(ierr);
1987 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1988 } else if (type == NORM_INFINITY) { /* max row norm */
1989 PetscReal ntemp = 0.0;
1990 for (j=0; j<aij->A->rmap->n; j++) {
1991 v = amat->a + amat->i[j];
1992 sum = 0.0;
1993 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1994 sum += PetscAbsScalar(*v); v++;
1995 }
1996 v = bmat->a + bmat->i[j];
1997 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1998 sum += PetscAbsScalar(*v); v++;
1999 }
2000 if (sum > ntemp) ntemp = sum;
2001 }
2002 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2003 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
2004 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2005 }
2006 PetscFunctionReturn(0);
2007 }
2008
MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat * matout)2009 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2010 {
2011 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b;
2012 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
2013 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
2014 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i;
2015 PetscErrorCode ierr;
2016 Mat B,A_diag,*B_diag;
2017 const MatScalar *array;
2018
2019 PetscFunctionBegin;
2020 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2021 ai = Aloc->i; aj = Aloc->j;
2022 bi = Bloc->i; bj = Bloc->j;
2023 if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2024 PetscInt *d_nnz,*g_nnz,*o_nnz;
2025 PetscSFNode *oloc;
2026 PETSC_UNUSED PetscSF sf;
2027
2028 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2029 /* compute d_nnz for preallocation */
2030 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
2031 for (i=0; i<ai[ma]; i++) {
2032 d_nnz[aj[i]]++;
2033 }
2034 /* compute local off-diagonal contributions */
2035 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
2036 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2037 /* map those to global */
2038 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2039 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2040 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2041 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
2042 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2043 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2044 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2045
2046 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2047 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2048 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2049 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2050 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2051 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2052 } else {
2053 B = *matout;
2054 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2055 }
2056
2057 b = (Mat_MPIAIJ*)B->data;
2058 A_diag = a->A;
2059 B_diag = &b->A;
2060 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data;
2061 A_diag_ncol = A_diag->cmap->N;
2062 B_diag_ilen = sub_B_diag->ilen;
2063 B_diag_i = sub_B_diag->i;
2064
2065 /* Set ilen for diagonal of B */
2066 for (i=0; i<A_diag_ncol; i++) {
2067 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
2068 }
2069
2070 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
2071 very quickly (=without using MatSetValues), because all writes are local. */
2072 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
2073
2074 /* copy over the B part */
2075 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
2076 array = Bloc->a;
2077 row = A->rmap->rstart;
2078 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2079 cols_tmp = cols;
2080 for (i=0; i<mb; i++) {
2081 ncol = bi[i+1]-bi[i];
2082 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2083 row++;
2084 array += ncol; cols_tmp += ncol;
2085 }
2086 ierr = PetscFree(cols);CHKERRQ(ierr);
2087
2088 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2090 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2091 *matout = B;
2092 } else {
2093 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2094 }
2095 PetscFunctionReturn(0);
2096 }
2097
MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)2098 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2099 {
2100 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2101 Mat a = aij->A,b = aij->B;
2102 PetscErrorCode ierr;
2103 PetscInt s1,s2,s3;
2104
2105 PetscFunctionBegin;
2106 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2107 if (rr) {
2108 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2109 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2110 /* Overlap communication with computation. */
2111 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112 }
2113 if (ll) {
2114 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2115 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2116 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
2117 }
2118 /* scale the diagonal block */
2119 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2120
2121 if (rr) {
2122 /* Do a scatter end and then right scale the off-diagonal block */
2123 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2124 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
2125 }
2126 PetscFunctionReturn(0);
2127 }
2128
MatSetUnfactored_MPIAIJ(Mat A)2129 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2130 {
2131 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2132 PetscErrorCode ierr;
2133
2134 PetscFunctionBegin;
2135 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2136 PetscFunctionReturn(0);
2137 }
2138
MatEqual_MPIAIJ(Mat A,Mat B,PetscBool * flag)2139 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag)
2140 {
2141 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2142 Mat a,b,c,d;
2143 PetscBool flg;
2144 PetscErrorCode ierr;
2145
2146 PetscFunctionBegin;
2147 a = matA->A; b = matA->B;
2148 c = matB->A; d = matB->B;
2149
2150 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2151 if (flg) {
2152 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2153 }
2154 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2155 PetscFunctionReturn(0);
2156 }
2157
MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2159 {
2160 PetscErrorCode ierr;
2161 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2162 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data;
2163
2164 PetscFunctionBegin;
2165 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2166 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2167 /* because of the column compression in the off-processor part of the matrix a->B,
2168 the number of columns in a->B and b->B may be different, hence we cannot call
2169 the MatCopy() directly on the two parts. If need be, we can provide a more
2170 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2171 then copying the submatrices */
2172 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2173 } else {
2174 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2175 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2176 }
2177 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2178 PetscFunctionReturn(0);
2179 }
2180
MatSetUp_MPIAIJ(Mat A)2181 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2182 {
2183 PetscErrorCode ierr;
2184
2185 PetscFunctionBegin;
2186 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2187 PetscFunctionReturn(0);
2188 }
2189
2190 /*
2191 Computes the number of nonzeros per row needed for preallocation when X and Y
2192 have different nonzero structure.
2193 */
MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt * xi,const PetscInt * xj,const PetscInt * xltog,const PetscInt * yi,const PetscInt * yj,const PetscInt * yltog,PetscInt * nnz)2194 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2195 {
2196 PetscInt i,j,k,nzx,nzy;
2197
2198 PetscFunctionBegin;
2199 /* Set the number of nonzeros in the new matrix */
2200 for (i=0; i<m; i++) {
2201 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2202 nzx = xi[i+1] - xi[i];
2203 nzy = yi[i+1] - yi[i];
2204 nnz[i] = 0;
2205 for (j=0,k=0; j<nzx; j++) { /* Point in X */
2206 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2207 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */
2208 nnz[i]++;
2209 }
2210 for (; k<nzy; k++) nnz[i]++;
2211 }
2212 PetscFunctionReturn(0);
2213 }
2214
2215 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt * yltog,Mat X,const PetscInt * xltog,PetscInt * nnz)2216 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2217 {
2218 PetscErrorCode ierr;
2219 PetscInt m = Y->rmap->N;
2220 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data;
2221 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data;
2222
2223 PetscFunctionBegin;
2224 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2225 PetscFunctionReturn(0);
2226 }
2227
MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)2228 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2229 {
2230 PetscErrorCode ierr;
2231 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2232 PetscBLASInt bnz,one=1;
2233 Mat_SeqAIJ *x,*y;
2234
2235 PetscFunctionBegin;
2236 if (str == SAME_NONZERO_PATTERN) {
2237 PetscScalar alpha = a;
2238 x = (Mat_SeqAIJ*)xx->A->data;
2239 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2240 y = (Mat_SeqAIJ*)yy->A->data;
2241 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2242 x = (Mat_SeqAIJ*)xx->B->data;
2243 y = (Mat_SeqAIJ*)yy->B->data;
2244 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2245 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2246 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2247 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU
2248 will be updated */
2249 #if defined(PETSC_HAVE_DEVICE)
2250 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) {
2251 Y->offloadmask = PETSC_OFFLOAD_CPU;
2252 }
2253 #endif
2254 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2255 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2256 } else {
2257 Mat B;
2258 PetscInt *nnz_d,*nnz_o;
2259 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2260 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2261 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2262 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2263 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2264 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2265 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2266 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2267 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2268 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2269 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2270 ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2271 ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2272 }
2273 PetscFunctionReturn(0);
2274 }
2275
2276 extern PetscErrorCode MatConjugate_SeqAIJ(Mat);
2277
MatConjugate_MPIAIJ(Mat mat)2278 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2279 {
2280 #if defined(PETSC_USE_COMPLEX)
2281 PetscErrorCode ierr;
2282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2283
2284 PetscFunctionBegin;
2285 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2286 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2287 #else
2288 PetscFunctionBegin;
2289 #endif
2290 PetscFunctionReturn(0);
2291 }
2292
MatRealPart_MPIAIJ(Mat A)2293 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2294 {
2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2296 PetscErrorCode ierr;
2297
2298 PetscFunctionBegin;
2299 ierr = MatRealPart(a->A);CHKERRQ(ierr);
2300 ierr = MatRealPart(a->B);CHKERRQ(ierr);
2301 PetscFunctionReturn(0);
2302 }
2303
MatImaginaryPart_MPIAIJ(Mat A)2304 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2305 {
2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2307 PetscErrorCode ierr;
2308
2309 PetscFunctionBegin;
2310 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2311 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2312 PetscFunctionReturn(0);
2313 }
2314
MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])2315 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2316 {
2317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2318 PetscErrorCode ierr;
2319 PetscInt i,*idxb = NULL,m = A->rmap->n;
2320 PetscScalar *va,*vv;
2321 Vec vB,vA;
2322 const PetscScalar *vb;
2323
2324 PetscFunctionBegin;
2325 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2326 ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2327
2328 ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2329 if (idx) {
2330 for (i=0; i<m; i++) {
2331 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2332 }
2333 }
2334
2335 ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2336 ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2337 ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2338
2339 ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2340 ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2341 for (i=0; i<m; i++) {
2342 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2343 vv[i] = vb[i];
2344 if (idx) idx[i] = a->garray[idxb[i]];
2345 } else {
2346 vv[i] = va[i];
2347 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2348 idx[i] = a->garray[idxb[i]];
2349 }
2350 }
2351 ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2352 ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2353 ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2354 ierr = PetscFree(idxb);CHKERRQ(ierr);
2355 ierr = VecDestroy(&vA);CHKERRQ(ierr);
2356 ierr = VecDestroy(&vB);CHKERRQ(ierr);
2357 PetscFunctionReturn(0);
2358 }
2359
MatGetRowMinAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])2360 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2361 {
2362 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2363 PetscInt m = A->rmap->n,n = A->cmap->n;
2364 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend;
2365 PetscInt *cmap = mat->garray;
2366 PetscInt *diagIdx, *offdiagIdx;
2367 Vec diagV, offdiagV;
2368 PetscScalar *a, *diagA, *offdiagA, *ba;
2369 PetscInt r,j,col,ncols,*bi,*bj;
2370 PetscErrorCode ierr;
2371 Mat B = mat->B;
2372 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
2373
2374 PetscFunctionBegin;
2375 /* When a process holds entire A and other processes have no entry */
2376 if (A->cmap->N == n) {
2377 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2378 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2379 ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2380 ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2381 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2382 PetscFunctionReturn(0);
2383 } else if (n == 0) {
2384 if (m) {
2385 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2386 for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2387 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2388 }
2389 PetscFunctionReturn(0);
2390 }
2391
2392 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2393 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2394 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2395 ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2396
2397 /* Get offdiagIdx[] for implicit 0.0 */
2398 ba = b->a;
2399 bi = b->i;
2400 bj = b->j;
2401 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2402 for (r = 0; r < m; r++) {
2403 ncols = bi[r+1] - bi[r];
2404 if (ncols == A->cmap->N - n) { /* Brow is dense */
2405 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2406 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2407 offdiagA[r] = 0.0;
2408
2409 /* Find first hole in the cmap */
2410 for (j=0; j<ncols; j++) {
2411 col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2412 if (col > j && j < cstart) {
2413 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2414 break;
2415 } else if (col > j + n && j >= cstart) {
2416 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2417 break;
2418 }
2419 }
2420 if (j == ncols && ncols < A->cmap->N - n) {
2421 /* a hole is outside compressed Bcols */
2422 if (ncols == 0) {
2423 if (cstart) {
2424 offdiagIdx[r] = 0;
2425 } else offdiagIdx[r] = cend;
2426 } else { /* ncols > 0 */
2427 offdiagIdx[r] = cmap[ncols-1] + 1;
2428 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2429 }
2430 }
2431 }
2432
2433 for (j=0; j<ncols; j++) {
2434 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2435 ba++; bj++;
2436 }
2437 }
2438
2439 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2440 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2441 for (r = 0; r < m; ++r) {
2442 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2443 a[r] = diagA[r];
2444 if (idx) idx[r] = cstart + diagIdx[r];
2445 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2446 a[r] = diagA[r];
2447 if (idx) {
2448 if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2449 idx[r] = cstart + diagIdx[r];
2450 } else idx[r] = offdiagIdx[r];
2451 }
2452 } else {
2453 a[r] = offdiagA[r];
2454 if (idx) idx[r] = offdiagIdx[r];
2455 }
2456 }
2457 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2458 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2459 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2460 ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2461 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2462 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2463 PetscFunctionReturn(0);
2464 }
2465
MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])2466 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2467 {
2468 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data;
2469 PetscInt m = A->rmap->n,n = A->cmap->n;
2470 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend;
2471 PetscInt *cmap = mat->garray;
2472 PetscInt *diagIdx, *offdiagIdx;
2473 Vec diagV, offdiagV;
2474 PetscScalar *a, *diagA, *offdiagA, *ba;
2475 PetscInt r,j,col,ncols,*bi,*bj;
2476 PetscErrorCode ierr;
2477 Mat B = mat->B;
2478 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
2479
2480 PetscFunctionBegin;
2481 /* When a process holds entire A and other processes have no entry */
2482 if (A->cmap->N == n) {
2483 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2484 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2485 ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2486 ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2487 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2488 PetscFunctionReturn(0);
2489 } else if (n == 0) {
2490 if (m) {
2491 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2492 for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2493 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2494 }
2495 PetscFunctionReturn(0);
2496 }
2497
2498 ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2499 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2500 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2501 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2502
2503 /* Get offdiagIdx[] for implicit 0.0 */
2504 ba = b->a;
2505 bi = b->i;
2506 bj = b->j;
2507 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2508 for (r = 0; r < m; r++) {
2509 ncols = bi[r+1] - bi[r];
2510 if (ncols == A->cmap->N - n) { /* Brow is dense */
2511 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2512 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2513 offdiagA[r] = 0.0;
2514
2515 /* Find first hole in the cmap */
2516 for (j=0; j<ncols; j++) {
2517 col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2518 if (col > j && j < cstart) {
2519 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2520 break;
2521 } else if (col > j + n && j >= cstart) {
2522 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2523 break;
2524 }
2525 }
2526 if (j == ncols && ncols < A->cmap->N - n) {
2527 /* a hole is outside compressed Bcols */
2528 if (ncols == 0) {
2529 if (cstart) {
2530 offdiagIdx[r] = 0;
2531 } else offdiagIdx[r] = cend;
2532 } else { /* ncols > 0 */
2533 offdiagIdx[r] = cmap[ncols-1] + 1;
2534 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2535 }
2536 }
2537 }
2538
2539 for (j=0; j<ncols; j++) {
2540 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2541 ba++; bj++;
2542 }
2543 }
2544
2545 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2546 ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2547 for (r = 0; r < m; ++r) {
2548 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2549 a[r] = diagA[r];
2550 if (idx) idx[r] = cstart + diagIdx[r];
2551 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2552 a[r] = diagA[r];
2553 if (idx) {
2554 if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2555 idx[r] = cstart + diagIdx[r];
2556 } else idx[r] = offdiagIdx[r];
2557 }
2558 } else {
2559 a[r] = offdiagA[r];
2560 if (idx) idx[r] = offdiagIdx[r];
2561 }
2562 }
2563 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2564 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2565 ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2566 ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2567 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2568 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2569 PetscFunctionReturn(0);
2570 }
2571
MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])2572 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2573 {
2574 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
2575 PetscInt m = A->rmap->n,n = A->cmap->n;
2576 PetscInt cstart = A->cmap->rstart,cend = A->cmap->rend;
2577 PetscInt *cmap = mat->garray;
2578 PetscInt *diagIdx, *offdiagIdx;
2579 Vec diagV, offdiagV;
2580 PetscScalar *a, *diagA, *offdiagA, *ba;
2581 PetscInt r,j,col,ncols,*bi,*bj;
2582 PetscErrorCode ierr;
2583 Mat B = mat->B;
2584 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
2585
2586 PetscFunctionBegin;
2587 /* When a process holds entire A and other processes have no entry */
2588 if (A->cmap->N == n) {
2589 ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2590 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2591 ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2592 ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2593 ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2594 PetscFunctionReturn(0);
2595 } else if (n == 0) {
2596 if (m) {
2597 ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2598 for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2599 ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2600 }
2601 PetscFunctionReturn(0);
2602 }
2603
2604 ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2605 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2606 ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2607 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2608
2609 /* Get offdiagIdx[] for implicit 0.0 */
2610 ba = b->a;
2611 bi = b->i;
2612 bj = b->j;
2613 ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2614 for (r = 0; r < m; r++) {
2615 ncols = bi[r+1] - bi[r];
2616 if (ncols == A->cmap->N - n) { /* Brow is dense */
2617 offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2618 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2619 offdiagA[r] = 0.0;
2620
2621 /* Find first hole in the cmap */
2622 for (j=0; j<ncols; j++) {
2623 col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2624 if (col > j && j < cstart) {
2625 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2626 break;
2627 } else if (col > j + n && j >= cstart) {
2628 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2629 break;
2630 }
2631 }
2632 if (j == ncols && ncols < A->cmap->N - n) {
2633 /* a hole is outside compressed Bcols */
2634 if (ncols == 0) {
2635 if (cstart) {
2636 offdiagIdx[r] = 0;
2637 } else offdiagIdx[r] = cend;
2638 } else { /* ncols > 0 */
2639 offdiagIdx[r] = cmap[ncols-1] + 1;
2640 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2641 }
2642 }
2643 }
2644
2645 for (j=0; j<ncols; j++) {
2646 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2647 ba++; bj++;
2648 }
2649 }
2650
2651 ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2652 ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2653 for (r = 0; r < m; ++r) {
2654 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2655 a[r] = diagA[r];
2656 if (idx) idx[r] = cstart + diagIdx[r];
2657 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2658 a[r] = diagA[r];
2659 if (idx) {
2660 if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2661 idx[r] = cstart + diagIdx[r];
2662 } else idx[r] = offdiagIdx[r];
2663 }
2664 } else {
2665 a[r] = offdiagA[r];
2666 if (idx) idx[r] = offdiagIdx[r];
2667 }
2668 }
2669 ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2670 ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2671 ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2672 ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2673 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2674 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2675 PetscFunctionReturn(0);
2676 }
2677
MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat * newmat)2678 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2679 {
2680 PetscErrorCode ierr;
2681 Mat *dummy;
2682
2683 PetscFunctionBegin;
2684 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2685 *newmat = *dummy;
2686 ierr = PetscFree(dummy);CHKERRQ(ierr);
2687 PetscFunctionReturn(0);
2688 }
2689
MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar ** values)2690 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2691 {
2692 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data;
2693 PetscErrorCode ierr;
2694
2695 PetscFunctionBegin;
2696 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2697 A->factorerrortype = a->A->factorerrortype;
2698 PetscFunctionReturn(0);
2699 }
2700
MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)2701 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2702 {
2703 PetscErrorCode ierr;
2704 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data;
2705
2706 PetscFunctionBegin;
2707 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2708 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2709 if (x->assembled) {
2710 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2711 } else {
2712 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2713 }
2714 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2715 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2716 PetscFunctionReturn(0);
2717 }
2718
MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)2719 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2720 {
2721 PetscFunctionBegin;
2722 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2723 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2724 PetscFunctionReturn(0);
2725 }
2726
2727 /*@
2728 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2729
2730 Collective on Mat
2731
2732 Input Parameters:
2733 + A - the matrix
2734 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2735
2736 Level: advanced
2737
2738 @*/
MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)2739 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2740 {
2741 PetscErrorCode ierr;
2742
2743 PetscFunctionBegin;
2744 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2745 PetscFunctionReturn(0);
2746 }
2747
MatSetFromOptions_MPIAIJ(PetscOptionItems * PetscOptionsObject,Mat A)2748 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2749 {
2750 PetscErrorCode ierr;
2751 PetscBool sc = PETSC_FALSE,flg;
2752
2753 PetscFunctionBegin;
2754 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2755 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2756 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2757 if (flg) {
2758 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2759 }
2760 ierr = PetscOptionsTail();CHKERRQ(ierr);
2761 PetscFunctionReturn(0);
2762 }
2763
MatShift_MPIAIJ(Mat Y,PetscScalar a)2764 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2765 {
2766 PetscErrorCode ierr;
2767 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data;
2768 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data;
2769
2770 PetscFunctionBegin;
2771 if (!Y->preallocated) {
2772 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2773 } else if (!aij->nz) {
2774 PetscInt nonew = aij->nonew;
2775 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2776 aij->nonew = nonew;
2777 }
2778 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2779 PetscFunctionReturn(0);
2780 }
2781
MatMissingDiagonal_MPIAIJ(Mat A,PetscBool * missing,PetscInt * d)2782 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d)
2783 {
2784 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2785 PetscErrorCode ierr;
2786
2787 PetscFunctionBegin;
2788 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2789 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2790 if (d) {
2791 PetscInt rstart;
2792 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2793 *d += rstart;
2794
2795 }
2796 PetscFunctionReturn(0);
2797 }
2798
MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt * bsizes,PetscScalar * diag)2799 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2800 {
2801 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2802 PetscErrorCode ierr;
2803
2804 PetscFunctionBegin;
2805 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2806 PetscFunctionReturn(0);
2807 }
2808
2809 /* -------------------------------------------------------------------*/
2810 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2811 MatGetRow_MPIAIJ,
2812 MatRestoreRow_MPIAIJ,
2813 MatMult_MPIAIJ,
2814 /* 4*/ MatMultAdd_MPIAIJ,
2815 MatMultTranspose_MPIAIJ,
2816 MatMultTransposeAdd_MPIAIJ,
2817 NULL,
2818 NULL,
2819 NULL,
2820 /*10*/ NULL,
2821 NULL,
2822 NULL,
2823 MatSOR_MPIAIJ,
2824 MatTranspose_MPIAIJ,
2825 /*15*/ MatGetInfo_MPIAIJ,
2826 MatEqual_MPIAIJ,
2827 MatGetDiagonal_MPIAIJ,
2828 MatDiagonalScale_MPIAIJ,
2829 MatNorm_MPIAIJ,
2830 /*20*/ MatAssemblyBegin_MPIAIJ,
2831 MatAssemblyEnd_MPIAIJ,
2832 MatSetOption_MPIAIJ,
2833 MatZeroEntries_MPIAIJ,
2834 /*24*/ MatZeroRows_MPIAIJ,
2835 NULL,
2836 NULL,
2837 NULL,
2838 NULL,
2839 /*29*/ MatSetUp_MPIAIJ,
2840 NULL,
2841 NULL,
2842 MatGetDiagonalBlock_MPIAIJ,
2843 NULL,
2844 /*34*/ MatDuplicate_MPIAIJ,
2845 NULL,
2846 NULL,
2847 NULL,
2848 NULL,
2849 /*39*/ MatAXPY_MPIAIJ,
2850 MatCreateSubMatrices_MPIAIJ,
2851 MatIncreaseOverlap_MPIAIJ,
2852 MatGetValues_MPIAIJ,
2853 MatCopy_MPIAIJ,
2854 /*44*/ MatGetRowMax_MPIAIJ,
2855 MatScale_MPIAIJ,
2856 MatShift_MPIAIJ,
2857 MatDiagonalSet_MPIAIJ,
2858 MatZeroRowsColumns_MPIAIJ,
2859 /*49*/ MatSetRandom_MPIAIJ,
2860 NULL,
2861 NULL,
2862 NULL,
2863 NULL,
2864 /*54*/ MatFDColoringCreate_MPIXAIJ,
2865 NULL,
2866 MatSetUnfactored_MPIAIJ,
2867 MatPermute_MPIAIJ,
2868 NULL,
2869 /*59*/ MatCreateSubMatrix_MPIAIJ,
2870 MatDestroy_MPIAIJ,
2871 MatView_MPIAIJ,
2872 NULL,
2873 NULL,
2874 /*64*/ NULL,
2875 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2876 NULL,
2877 NULL,
2878 NULL,
2879 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2880 MatGetRowMinAbs_MPIAIJ,
2881 NULL,
2882 NULL,
2883 NULL,
2884 NULL,
2885 /*75*/ MatFDColoringApply_AIJ,
2886 MatSetFromOptions_MPIAIJ,
2887 NULL,
2888 NULL,
2889 MatFindZeroDiagonals_MPIAIJ,
2890 /*80*/ NULL,
2891 NULL,
2892 NULL,
2893 /*83*/ MatLoad_MPIAIJ,
2894 MatIsSymmetric_MPIAIJ,
2895 NULL,
2896 NULL,
2897 NULL,
2898 NULL,
2899 /*89*/ NULL,
2900 NULL,
2901 MatMatMultNumeric_MPIAIJ_MPIAIJ,
2902 NULL,
2903 NULL,
2904 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2905 NULL,
2906 NULL,
2907 NULL,
2908 MatBindToCPU_MPIAIJ,
2909 /*99*/ MatProductSetFromOptions_MPIAIJ,
2910 NULL,
2911 NULL,
2912 MatConjugate_MPIAIJ,
2913 NULL,
2914 /*104*/MatSetValuesRow_MPIAIJ,
2915 MatRealPart_MPIAIJ,
2916 MatImaginaryPart_MPIAIJ,
2917 NULL,
2918 NULL,
2919 /*109*/NULL,
2920 NULL,
2921 MatGetRowMin_MPIAIJ,
2922 NULL,
2923 MatMissingDiagonal_MPIAIJ,
2924 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2925 NULL,
2926 MatGetGhosts_MPIAIJ,
2927 NULL,
2928 NULL,
2929 /*119*/MatMultDiagonalBlock_MPIAIJ,
2930 NULL,
2931 NULL,
2932 NULL,
2933 MatGetMultiProcBlock_MPIAIJ,
2934 /*124*/MatFindNonzeroRows_MPIAIJ,
2935 MatGetColumnNorms_MPIAIJ,
2936 MatInvertBlockDiagonal_MPIAIJ,
2937 MatInvertVariableBlockDiagonal_MPIAIJ,
2938 MatCreateSubMatricesMPI_MPIAIJ,
2939 /*129*/NULL,
2940 NULL,
2941 NULL,
2942 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2943 NULL,
2944 /*134*/NULL,
2945 NULL,
2946 NULL,
2947 NULL,
2948 NULL,
2949 /*139*/MatSetBlockSizes_MPIAIJ,
2950 NULL,
2951 NULL,
2952 MatFDColoringSetUp_MPIXAIJ,
2953 MatFindOffBlockDiagonalEntries_MPIAIJ,
2954 MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2955 /*145*/NULL,
2956 NULL,
2957 NULL
2958 };
2959
2960 /* ----------------------------------------------------------------------------------------*/
2961
MatStoreValues_MPIAIJ(Mat mat)2962 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2963 {
2964 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2965 PetscErrorCode ierr;
2966
2967 PetscFunctionBegin;
2968 ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2969 ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2970 PetscFunctionReturn(0);
2971 }
2972
MatRetrieveValues_MPIAIJ(Mat mat)2973 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2974 {
2975 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2976 PetscErrorCode ierr;
2977
2978 PetscFunctionBegin;
2979 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2980 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2981 PetscFunctionReturn(0);
2982 }
2983
MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])2984 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2985 {
2986 Mat_MPIAIJ *b;
2987 PetscErrorCode ierr;
2988 PetscMPIInt size;
2989
2990 PetscFunctionBegin;
2991 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2992 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2993 b = (Mat_MPIAIJ*)B->data;
2994
2995 #if defined(PETSC_USE_CTABLE)
2996 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2997 #else
2998 ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2999 #endif
3000 ierr = PetscFree(b->garray);CHKERRQ(ierr);
3001 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3002 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3003
3004 /* Because the B will have been resized we simply destroy it and create a new one each time */
3005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3006 ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3007 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3008 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
3009 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3010 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3011 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3012
3013 if (!B->preallocated) {
3014 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3015 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3016 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3017 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3018 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3019 }
3020
3021 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3022 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3023 B->preallocated = PETSC_TRUE;
3024 B->was_assembled = PETSC_FALSE;
3025 B->assembled = PETSC_FALSE;
3026 PetscFunctionReturn(0);
3027 }
3028
MatResetPreallocation_MPIAIJ(Mat B)3029 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
3030 {
3031 Mat_MPIAIJ *b;
3032 PetscErrorCode ierr;
3033
3034 PetscFunctionBegin;
3035 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3036 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3037 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3038 b = (Mat_MPIAIJ*)B->data;
3039
3040 #if defined(PETSC_USE_CTABLE)
3041 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
3042 #else
3043 ierr = PetscFree(b->colmap);CHKERRQ(ierr);
3044 #endif
3045 ierr = PetscFree(b->garray);CHKERRQ(ierr);
3046 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
3047 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
3048
3049 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
3050 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
3051 B->preallocated = PETSC_TRUE;
3052 B->was_assembled = PETSC_FALSE;
3053 B->assembled = PETSC_FALSE;
3054 PetscFunctionReturn(0);
3055 }
3056
MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat * newmat)3057 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3058 {
3059 Mat mat;
3060 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3061 PetscErrorCode ierr;
3062
3063 PetscFunctionBegin;
3064 *newmat = NULL;
3065 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3066 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3067 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3068 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3069 a = (Mat_MPIAIJ*)mat->data;
3070
3071 mat->factortype = matin->factortype;
3072 mat->assembled = matin->assembled;
3073 mat->insertmode = NOT_SET_VALUES;
3074 mat->preallocated = matin->preallocated;
3075
3076 a->size = oldmat->size;
3077 a->rank = oldmat->rank;
3078 a->donotstash = oldmat->donotstash;
3079 a->roworiented = oldmat->roworiented;
3080 a->rowindices = NULL;
3081 a->rowvalues = NULL;
3082 a->getrowactive = PETSC_FALSE;
3083
3084 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3085 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3086
3087 if (oldmat->colmap) {
3088 #if defined(PETSC_USE_CTABLE)
3089 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3090 #else
3091 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
3092 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3093 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
3094 #endif
3095 } else a->colmap = NULL;
3096 if (oldmat->garray) {
3097 PetscInt len;
3098 len = oldmat->B->cmap->n;
3099 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
3100 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3101 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
3102 } else a->garray = NULL;
3103
3104 /* It may happen MatDuplicate is called with a non-assembled matrix
3105 In fact, MatDuplicate only requires the matrix to be preallocated
3106 This may happen inside a DMCreateMatrix_Shell */
3107 if (oldmat->lvec) {
3108 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3109 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3110 }
3111 if (oldmat->Mvctx) {
3112 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3113 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3114 }
3115 if (oldmat->Mvctx_mpi1) {
3116 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
3117 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
3118 }
3119
3120 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3122 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3124 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3125 *newmat = mat;
3126 PetscFunctionReturn(0);
3127 }
3128
MatLoad_MPIAIJ(Mat newMat,PetscViewer viewer)3129 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3130 {
3131 PetscBool isbinary, ishdf5;
3132 PetscErrorCode ierr;
3133
3134 PetscFunctionBegin;
3135 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
3136 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
3137 /* force binary viewer to load .info file if it has not yet done so */
3138 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3139 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3140 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr);
3141 if (isbinary) {
3142 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
3143 } else if (ishdf5) {
3144 #if defined(PETSC_HAVE_HDF5)
3145 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
3146 #else
3147 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3148 #endif
3149 } else {
3150 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3151 }
3152 PetscFunctionReturn(0);
3153 }
3154
MatLoad_MPIAIJ_Binary(Mat mat,PetscViewer viewer)3155 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3156 {
3157 PetscInt header[4],M,N,m,nz,rows,cols,sum,i;
3158 PetscInt *rowidxs,*colidxs;
3159 PetscScalar *matvals;
3160 PetscErrorCode ierr;
3161
3162 PetscFunctionBegin;
3163 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3164
3165 /* read in matrix header */
3166 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3167 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3168 M = header[1]; N = header[2]; nz = header[3];
3169 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3170 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3171 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3172
3173 /* set block sizes from the viewer's .info file */
3174 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3175 /* set global sizes if not set already */
3176 if (mat->rmap->N < 0) mat->rmap->N = M;
3177 if (mat->cmap->N < 0) mat->cmap->N = N;
3178 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3179 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3180
3181 /* check if the matrix sizes are correct */
3182 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3183 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3184
3185 /* read in row lengths and build row indices */
3186 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3187 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3188 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3189 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3190 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3191 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3192 /* read in column indices and matrix values */
3193 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3194 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3195 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3196 /* store matrix indices and values */
3197 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3198 ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3199 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3200 PetscFunctionReturn(0);
3201 }
3202
3203 /* Not scalable because of ISAllGather() unless getting all columns. */
ISGetSeqIS_Private(Mat mat,IS iscol,IS * isseq)3204 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3205 {
3206 PetscErrorCode ierr;
3207 IS iscol_local;
3208 PetscBool isstride;
3209 PetscMPIInt lisstride=0,gisstride;
3210
3211 PetscFunctionBegin;
3212 /* check if we are grabbing all columns*/
3213 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3214
3215 if (isstride) {
3216 PetscInt start,len,mstart,mlen;
3217 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3218 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3219 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3220 if (mstart == start && mlen-mstart == len) lisstride = 1;
3221 }
3222
3223 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3224 if (gisstride) {
3225 PetscInt N;
3226 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3227 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3228 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3229 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3230 } else {
3231 PetscInt cbs;
3232 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3233 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3234 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3235 }
3236
3237 *isseq = iscol_local;
3238 PetscFunctionReturn(0);
3239 }
3240
3241 /*
3242 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3243 (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3244
3245 Input Parameters:
3246 mat - matrix
3247 isrow - parallel row index set; its local indices are a subset of local columns of mat,
3248 i.e., mat->rstart <= isrow[i] < mat->rend
3249 iscol - parallel column index set; its local indices are a subset of local columns of mat,
3250 i.e., mat->cstart <= iscol[i] < mat->cend
3251 Output Parameter:
3252 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3253 iscol_o - sequential column index set for retrieving mat->B
3254 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3255 */
ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS * isrow_d,IS * iscol_d,IS * iscol_o,const PetscInt * garray[])3256 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3257 {
3258 PetscErrorCode ierr;
3259 Vec x,cmap;
3260 const PetscInt *is_idx;
3261 PetscScalar *xarray,*cmaparray;
3262 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count;
3263 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data;
3264 Mat B=a->B;
3265 Vec lvec=a->lvec,lcmap;
3266 PetscInt i,cstart,cend,Bn=B->cmap->N;
3267 MPI_Comm comm;
3268 VecScatter Mvctx=a->Mvctx;
3269
3270 PetscFunctionBegin;
3271 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3272 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3273
3274 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3275 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3276 ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3277 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3278 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3279
3280 /* Get start indices */
3281 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3282 isstart -= ncols;
3283 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3284
3285 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3286 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3287 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3288 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3289 for (i=0; i<ncols; i++) {
3290 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i];
3291 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */
3292 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */
3293 }
3294 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3295 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3296 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3297
3298 /* Get iscol_d */
3299 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3300 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3301 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3302
3303 /* Get isrow_d */
3304 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3305 rstart = mat->rmap->rstart;
3306 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3307 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3308 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3309 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3310
3311 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3312 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3313 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3314
3315 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3316 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3317 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3318
3319 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3320
3321 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3322 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3323
3324 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3325 /* off-process column indices */
3326 count = 0;
3327 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3328 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3329
3330 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3331 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3332 for (i=0; i<Bn; i++) {
3333 if (PetscRealPart(xarray[i]) > -1.0) {
3334 idx[count] = i; /* local column index in off-diagonal part B */
3335 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3336 count++;
3337 }
3338 }
3339 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3340 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3341
3342 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3343 /* cannot ensure iscol_o has same blocksize as iscol! */
3344
3345 ierr = PetscFree(idx);CHKERRQ(ierr);
3346 *garray = cmap1;
3347
3348 ierr = VecDestroy(&x);CHKERRQ(ierr);
3349 ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3350 ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3351 PetscFunctionReturn(0);
3352 }
3353
3354 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * submat)3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3356 {
3357 PetscErrorCode ierr;
3358 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub;
3359 Mat M = NULL;
3360 MPI_Comm comm;
3361 IS iscol_d,isrow_d,iscol_o;
3362 Mat Asub = NULL,Bsub = NULL;
3363 PetscInt n;
3364
3365 PetscFunctionBegin;
3366 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3367
3368 if (call == MAT_REUSE_MATRIX) {
3369 /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3370 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3371 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3372
3373 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3374 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3375
3376 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3377 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3378
3379 /* Update diagonal and off-diagonal portions of submat */
3380 asub = (Mat_MPIAIJ*)(*submat)->data;
3381 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3382 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3383 if (n) {
3384 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3385 }
3386 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3388
3389 } else { /* call == MAT_INITIAL_MATRIX) */
3390 const PetscInt *garray;
3391 PetscInt BsubN;
3392
3393 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3394 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3395
3396 /* Create local submatrices Asub and Bsub */
3397 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3398 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3399
3400 /* Create submatrix M */
3401 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3402
3403 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3404 asub = (Mat_MPIAIJ*)M->data;
3405
3406 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3407 n = asub->B->cmap->N;
3408 if (BsubN > n) {
3409 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3410 const PetscInt *idx;
3411 PetscInt i,j,*idx_new,*subgarray = asub->garray;
3412 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3413
3414 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3415 j = 0;
3416 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3417 for (i=0; i<n; i++) {
3418 if (j >= BsubN) break;
3419 while (subgarray[i] > garray[j]) j++;
3420
3421 if (subgarray[i] == garray[j]) {
3422 idx_new[i] = idx[j++];
3423 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3424 }
3425 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3426
3427 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3428 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3429
3430 } else if (BsubN < n) {
3431 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3432 }
3433
3434 ierr = PetscFree(garray);CHKERRQ(ierr);
3435 *submat = M;
3436
3437 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3438 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3439 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3440
3441 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3442 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3443
3444 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3445 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3446 }
3447 PetscFunctionReturn(0);
3448 }
3449
MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat * newmat)3450 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3451 {
3452 PetscErrorCode ierr;
3453 IS iscol_local=NULL,isrow_d;
3454 PetscInt csize;
3455 PetscInt n,i,j,start,end;
3456 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3457 MPI_Comm comm;
3458
3459 PetscFunctionBegin;
3460 /* If isrow has same processor distribution as mat,
3461 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3462 if (call == MAT_REUSE_MATRIX) {
3463 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3464 if (isrow_d) {
3465 sameRowDist = PETSC_TRUE;
3466 tsameDist[1] = PETSC_TRUE; /* sameColDist */
3467 } else {
3468 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3469 if (iscol_local) {
3470 sameRowDist = PETSC_TRUE;
3471 tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3472 }
3473 }
3474 } else {
3475 /* Check if isrow has same processor distribution as mat */
3476 sameDist[0] = PETSC_FALSE;
3477 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3478 if (!n) {
3479 sameDist[0] = PETSC_TRUE;
3480 } else {
3481 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3482 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3483 if (i >= start && j < end) {
3484 sameDist[0] = PETSC_TRUE;
3485 }
3486 }
3487
3488 /* Check if iscol has same processor distribution as mat */
3489 sameDist[1] = PETSC_FALSE;
3490 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3491 if (!n) {
3492 sameDist[1] = PETSC_TRUE;
3493 } else {
3494 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3495 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3496 if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3497 }
3498
3499 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3500 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3501 sameRowDist = tsameDist[0];
3502 }
3503
3504 if (sameRowDist) {
3505 if (tsameDist[1]) { /* sameRowDist & sameColDist */
3506 /* isrow and iscol have same processor distribution as mat */
3507 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3508 PetscFunctionReturn(0);
3509 } else { /* sameRowDist */
3510 /* isrow has same processor distribution as mat */
3511 if (call == MAT_INITIAL_MATRIX) {
3512 PetscBool sorted;
3513 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3514 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3515 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3516 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3517
3518 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3519 if (sorted) {
3520 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3521 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3522 PetscFunctionReturn(0);
3523 }
3524 } else { /* call == MAT_REUSE_MATRIX */
3525 IS iscol_sub;
3526 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3527 if (iscol_sub) {
3528 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3529 PetscFunctionReturn(0);
3530 }
3531 }
3532 }
3533 }
3534
3535 /* General case: iscol -> iscol_local which has global size of iscol */
3536 if (call == MAT_REUSE_MATRIX) {
3537 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3538 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3539 } else {
3540 if (!iscol_local) {
3541 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3542 }
3543 }
3544
3545 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3546 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3547
3548 if (call == MAT_INITIAL_MATRIX) {
3549 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3550 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3551 }
3552 PetscFunctionReturn(0);
3553 }
3554
3555 /*@C
3556 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3557 and "off-diagonal" part of the matrix in CSR format.
3558
3559 Collective
3560
3561 Input Parameters:
3562 + comm - MPI communicator
3563 . A - "diagonal" portion of matrix
3564 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3565 - garray - global index of B columns
3566
3567 Output Parameter:
3568 . mat - the matrix, with input A as its local diagonal matrix
3569 Level: advanced
3570
3571 Notes:
3572 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3573 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3574
3575 .seealso: MatCreateMPIAIJWithSplitArrays()
3576 @*/
MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat * mat)3577 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3578 {
3579 PetscErrorCode ierr;
3580 Mat_MPIAIJ *maij;
3581 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew;
3582 PetscInt *oi=b->i,*oj=b->j,i,nz,col;
3583 PetscScalar *oa=b->a;
3584 Mat Bnew;
3585 PetscInt m,n,N;
3586
3587 PetscFunctionBegin;
3588 ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3589 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3590 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3591 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3592 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3593 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3594
3595 /* Get global columns of mat */
3596 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3597
3598 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3599 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3600 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3601 maij = (Mat_MPIAIJ*)(*mat)->data;
3602
3603 (*mat)->preallocated = PETSC_TRUE;
3604
3605 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3606 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3607
3608 /* Set A as diagonal portion of *mat */
3609 maij->A = A;
3610
3611 nz = oi[m];
3612 for (i=0; i<nz; i++) {
3613 col = oj[i];
3614 oj[i] = garray[col];
3615 }
3616
3617 /* Set Bnew as off-diagonal portion of *mat */
3618 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3619 bnew = (Mat_SeqAIJ*)Bnew->data;
3620 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3621 maij->B = Bnew;
3622
3623 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3624
3625 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3626 b->free_a = PETSC_FALSE;
3627 b->free_ij = PETSC_FALSE;
3628 ierr = MatDestroy(&B);CHKERRQ(ierr);
3629
3630 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3631 bnew->free_a = PETSC_TRUE;
3632 bnew->free_ij = PETSC_TRUE;
3633
3634 /* condense columns of maij->B */
3635 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3636 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3639 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3640 PetscFunctionReturn(0);
3641 }
3642
3643 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3644
MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat * newmat)3645 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3646 {
3647 PetscErrorCode ierr;
3648 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs;
3649 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3650 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data;
3651 Mat M,Msub,B=a->B;
3652 MatScalar *aa;
3653 Mat_SeqAIJ *aij;
3654 PetscInt *garray = a->garray,*colsub,Ncols;
3655 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3656 IS iscol_sub,iscmap;
3657 const PetscInt *is_idx,*cmap;
3658 PetscBool allcolumns=PETSC_FALSE;
3659 MPI_Comm comm;
3660
3661 PetscFunctionBegin;
3662 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3663
3664 if (call == MAT_REUSE_MATRIX) {
3665 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3666 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3667 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3668
3669 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3670 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3671
3672 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3673 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3674
3675 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3676
3677 } else { /* call == MAT_INITIAL_MATRIX) */
3678 PetscBool flg;
3679
3680 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3681 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3682
3683 /* (1) iscol -> nonscalable iscol_local */
3684 /* Check for special case: each processor gets entire matrix columns */
3685 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3686 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3687 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3688 if (allcolumns) {
3689 iscol_sub = iscol_local;
3690 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3691 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3692
3693 } else {
3694 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3695 PetscInt *idx,*cmap1,k;
3696 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3697 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3698 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3699 count = 0;
3700 k = 0;
3701 for (i=0; i<Ncols; i++) {
3702 j = is_idx[i];
3703 if (j >= cstart && j < cend) {
3704 /* diagonal part of mat */
3705 idx[count] = j;
3706 cmap1[count++] = i; /* column index in submat */
3707 } else if (Bn) {
3708 /* off-diagonal part of mat */
3709 if (j == garray[k]) {
3710 idx[count] = j;
3711 cmap1[count++] = i; /* column index in submat */
3712 } else if (j > garray[k]) {
3713 while (j > garray[k] && k < Bn-1) k++;
3714 if (j == garray[k]) {
3715 idx[count] = j;
3716 cmap1[count++] = i; /* column index in submat */
3717 }
3718 }
3719 }
3720 }
3721 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3722
3723 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3724 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3725 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3726
3727 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3728 }
3729
3730 /* (3) Create sequential Msub */
3731 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3732 }
3733
3734 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3735 aij = (Mat_SeqAIJ*)(Msub)->data;
3736 ii = aij->i;
3737 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3738
3739 /*
3740 m - number of local rows
3741 Ncols - number of columns (same on all processors)
3742 rstart - first row in new global matrix generated
3743 */
3744 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3745
3746 if (call == MAT_INITIAL_MATRIX) {
3747 /* (4) Create parallel newmat */
3748 PetscMPIInt rank,size;
3749 PetscInt csize;
3750
3751 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3752 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3753
3754 /*
3755 Determine the number of non-zeros in the diagonal and off-diagonal
3756 portions of the matrix in order to do correct preallocation
3757 */
3758
3759 /* first get start and end of "diagonal" columns */
3760 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3761 if (csize == PETSC_DECIDE) {
3762 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3763 if (mglobal == Ncols) { /* square matrix */
3764 nlocal = m;
3765 } else {
3766 nlocal = Ncols/size + ((Ncols % size) > rank);
3767 }
3768 } else {
3769 nlocal = csize;
3770 }
3771 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3772 rstart = rend - nlocal;
3773 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3774
3775 /* next, compute all the lengths */
3776 jj = aij->j;
3777 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3778 olens = dlens + m;
3779 for (i=0; i<m; i++) {
3780 jend = ii[i+1] - ii[i];
3781 olen = 0;
3782 dlen = 0;
3783 for (j=0; j<jend; j++) {
3784 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3785 else dlen++;
3786 jj++;
3787 }
3788 olens[i] = olen;
3789 dlens[i] = dlen;
3790 }
3791
3792 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3793 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3794
3795 ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3796 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3797 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3798 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3799 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3800 ierr = PetscFree(dlens);CHKERRQ(ierr);
3801
3802 } else { /* call == MAT_REUSE_MATRIX */
3803 M = *newmat;
3804 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3805 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3806 ierr = MatZeroEntries(M);CHKERRQ(ierr);
3807 /*
3808 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3809 rather than the slower MatSetValues().
3810 */
3811 M->was_assembled = PETSC_TRUE;
3812 M->assembled = PETSC_FALSE;
3813 }
3814
3815 /* (5) Set values of Msub to *newmat */
3816 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3817 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3818
3819 jj = aij->j;
3820 aa = aij->a;
3821 for (i=0; i<m; i++) {
3822 row = rstart + i;
3823 nz = ii[i+1] - ii[i];
3824 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3825 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3826 jj += nz; aa += nz;
3827 }
3828 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3829
3830 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832
3833 ierr = PetscFree(colsub);CHKERRQ(ierr);
3834
3835 /* save Msub, iscol_sub and iscmap used in processor for next request */
3836 if (call == MAT_INITIAL_MATRIX) {
3837 *newmat = M;
3838 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3839 ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3840
3841 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3842 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3843
3844 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3845 ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3846
3847 if (iscol_local) {
3848 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3849 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3850 }
3851 }
3852 PetscFunctionReturn(0);
3853 }
3854
3855 /*
3856 Not great since it makes two copies of the submatrix, first an SeqAIJ
3857 in local and then by concatenating the local matrices the end result.
3858 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3859
3860 Note: This requires a sequential iscol with all indices.
3861 */
MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat * newmat)3862 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3863 {
3864 PetscErrorCode ierr;
3865 PetscMPIInt rank,size;
3866 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3867 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3868 Mat M,Mreuse;
3869 MatScalar *aa,*vwork;
3870 MPI_Comm comm;
3871 Mat_SeqAIJ *aij;
3872 PetscBool colflag,allcolumns=PETSC_FALSE;
3873
3874 PetscFunctionBegin;
3875 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3876 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3877 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3878
3879 /* Check for special case: each processor gets entire matrix columns */
3880 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3881 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3882 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3883 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3884
3885 if (call == MAT_REUSE_MATRIX) {
3886 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3887 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3888 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3889 } else {
3890 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3891 }
3892
3893 /*
3894 m - number of local rows
3895 n - number of columns (same on all processors)
3896 rstart - first row in new global matrix generated
3897 */
3898 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3899 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3900 if (call == MAT_INITIAL_MATRIX) {
3901 aij = (Mat_SeqAIJ*)(Mreuse)->data;
3902 ii = aij->i;
3903 jj = aij->j;
3904
3905 /*
3906 Determine the number of non-zeros in the diagonal and off-diagonal
3907 portions of the matrix in order to do correct preallocation
3908 */
3909
3910 /* first get start and end of "diagonal" columns */
3911 if (csize == PETSC_DECIDE) {
3912 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3913 if (mglobal == n) { /* square matrix */
3914 nlocal = m;
3915 } else {
3916 nlocal = n/size + ((n % size) > rank);
3917 }
3918 } else {
3919 nlocal = csize;
3920 }
3921 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3922 rstart = rend - nlocal;
3923 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3924
3925 /* next, compute all the lengths */
3926 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3927 olens = dlens + m;
3928 for (i=0; i<m; i++) {
3929 jend = ii[i+1] - ii[i];
3930 olen = 0;
3931 dlen = 0;
3932 for (j=0; j<jend; j++) {
3933 if (*jj < rstart || *jj >= rend) olen++;
3934 else dlen++;
3935 jj++;
3936 }
3937 olens[i] = olen;
3938 dlens[i] = dlen;
3939 }
3940 ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3941 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3942 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3943 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3944 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3945 ierr = PetscFree(dlens);CHKERRQ(ierr);
3946 } else {
3947 PetscInt ml,nl;
3948
3949 M = *newmat;
3950 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3951 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3952 ierr = MatZeroEntries(M);CHKERRQ(ierr);
3953 /*
3954 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3955 rather than the slower MatSetValues().
3956 */
3957 M->was_assembled = PETSC_TRUE;
3958 M->assembled = PETSC_FALSE;
3959 }
3960 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3961 aij = (Mat_SeqAIJ*)(Mreuse)->data;
3962 ii = aij->i;
3963 jj = aij->j;
3964 aa = aij->a;
3965 for (i=0; i<m; i++) {
3966 row = rstart + i;
3967 nz = ii[i+1] - ii[i];
3968 cwork = jj; jj += nz;
3969 vwork = aa; aa += nz;
3970 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3971 }
3972
3973 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975 *newmat = M;
3976
3977 /* save submatrix used in processor for next request */
3978 if (call == MAT_INITIAL_MATRIX) {
3979 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3980 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3981 }
3982 PetscFunctionReturn(0);
3983 }
3984
MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])3985 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3986 {
3987 PetscInt m,cstart, cend,j,nnz,i,d;
3988 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3989 const PetscInt *JJ;
3990 PetscErrorCode ierr;
3991 PetscBool nooffprocentries;
3992
3993 PetscFunctionBegin;
3994 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3995
3996 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3997 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3998 m = B->rmap->n;
3999 cstart = B->cmap->rstart;
4000 cend = B->cmap->rend;
4001 rstart = B->rmap->rstart;
4002
4003 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
4004
4005 if (PetscDefined(USE_DEBUG)) {
4006 for (i=0; i<m; i++) {
4007 nnz = Ii[i+1]- Ii[i];
4008 JJ = J + Ii[i];
4009 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4010 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
4011 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4012 }
4013 }
4014
4015 for (i=0; i<m; i++) {
4016 nnz = Ii[i+1]- Ii[i];
4017 JJ = J + Ii[i];
4018 nnz_max = PetscMax(nnz_max,nnz);
4019 d = 0;
4020 for (j=0; j<nnz; j++) {
4021 if (cstart <= JJ[j] && JJ[j] < cend) d++;
4022 }
4023 d_nnz[i] = d;
4024 o_nnz[i] = nnz - d;
4025 }
4026 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4027 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4028
4029 for (i=0; i<m; i++) {
4030 ii = i + rstart;
4031 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
4032 }
4033 nooffprocentries = B->nooffprocentries;
4034 B->nooffprocentries = PETSC_TRUE;
4035 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4036 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4037 B->nooffprocentries = nooffprocentries;
4038
4039 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4040 PetscFunctionReturn(0);
4041 }
4042
4043 /*@
4044 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4045 (the default parallel PETSc format).
4046
4047 Collective
4048
4049 Input Parameters:
4050 + B - the matrix
4051 . i - the indices into j for the start of each local row (starts with zero)
4052 . j - the column indices for each local row (starts with zero)
4053 - v - optional values in the matrix
4054
4055 Level: developer
4056
4057 Notes:
4058 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
4059 thus you CANNOT change the matrix entries by changing the values of v[] after you have
4060 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4061
4062 The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4063
4064 The format which is used for the sparse matrix input, is equivalent to a
4065 row-major ordering.. i.e for the following matrix, the input data expected is
4066 as shown
4067
4068 $ 1 0 0
4069 $ 2 0 3 P0
4070 $ -------
4071 $ 4 5 6 P1
4072 $
4073 $ Process0 [P0]: rows_owned=[0,1]
4074 $ i = {0,1,3} [size = nrow+1 = 2+1]
4075 $ j = {0,0,2} [size = 3]
4076 $ v = {1,2,3} [size = 3]
4077 $
4078 $ Process1 [P1]: rows_owned=[2]
4079 $ i = {0,3} [size = nrow+1 = 1+1]
4080 $ j = {0,1,2} [size = 3]
4081 $ v = {4,5,6} [size = 3]
4082
4083 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
4084 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4085 @*/
MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[],const PetscScalar v[])4086 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4087 {
4088 PetscErrorCode ierr;
4089
4090 PetscFunctionBegin;
4091 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4092 PetscFunctionReturn(0);
4093 }
4094
4095 /*@C
4096 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4097 (the default parallel PETSc format). For good matrix assembly performance
4098 the user should preallocate the matrix storage by setting the parameters
4099 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4100 performance can be increased by more than a factor of 50.
4101
4102 Collective
4103
4104 Input Parameters:
4105 + B - the matrix
4106 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4107 (same value is used for all local rows)
4108 . d_nnz - array containing the number of nonzeros in the various rows of the
4109 DIAGONAL portion of the local submatrix (possibly different for each row)
4110 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
4111 The size of this array is equal to the number of local rows, i.e 'm'.
4112 For matrices that will be factored, you must leave room for (and set)
4113 the diagonal entry even if it is zero.
4114 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4115 submatrix (same value is used for all local rows).
4116 - o_nnz - array containing the number of nonzeros in the various rows of the
4117 OFF-DIAGONAL portion of the local submatrix (possibly different for
4118 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
4119 structure. The size of this array is equal to the number
4120 of local rows, i.e 'm'.
4121
4122 If the *_nnz parameter is given then the *_nz parameter is ignored
4123
4124 The AIJ format (also called the Yale sparse matrix format or
4125 compressed row storage (CSR)), is fully compatible with standard Fortran 77
4126 storage. The stored row and column indices begin with zero.
4127 See Users-Manual: ch_mat for details.
4128
4129 The parallel matrix is partitioned such that the first m0 rows belong to
4130 process 0, the next m1 rows belong to process 1, the next m2 rows belong
4131 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4132
4133 The DIAGONAL portion of the local submatrix of a processor can be defined
4134 as the submatrix which is obtained by extraction the part corresponding to
4135 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4136 first row that belongs to the processor, r2 is the last row belonging to
4137 the this processor, and c1-c2 is range of indices of the local part of a
4138 vector suitable for applying the matrix to. This is an mxn matrix. In the
4139 common case of a square matrix, the row and column ranges are the same and
4140 the DIAGONAL part is also square. The remaining portion of the local
4141 submatrix (mxN) constitute the OFF-DIAGONAL portion.
4142
4143 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4144
4145 You can call MatGetInfo() to get information on how effective the preallocation was;
4146 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4147 You can also run with the option -info and look for messages with the string
4148 malloc in them to see if additional memory allocation was needed.
4149
4150 Example usage:
4151
4152 Consider the following 8x8 matrix with 34 non-zero values, that is
4153 assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4154 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4155 as follows:
4156
4157 .vb
4158 1 2 0 | 0 3 0 | 0 4
4159 Proc0 0 5 6 | 7 0 0 | 8 0
4160 9 0 10 | 11 0 0 | 12 0
4161 -------------------------------------
4162 13 0 14 | 15 16 17 | 0 0
4163 Proc1 0 18 0 | 19 20 21 | 0 0
4164 0 0 0 | 22 23 0 | 24 0
4165 -------------------------------------
4166 Proc2 25 26 27 | 0 0 28 | 29 0
4167 30 0 0 | 31 32 33 | 0 34
4168 .ve
4169
4170 This can be represented as a collection of submatrices as:
4171
4172 .vb
4173 A B C
4174 D E F
4175 G H I
4176 .ve
4177
4178 Where the submatrices A,B,C are owned by proc0, D,E,F are
4179 owned by proc1, G,H,I are owned by proc2.
4180
4181 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4182 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4183 The 'M','N' parameters are 8,8, and have the same values on all procs.
4184
4185 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4186 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4187 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4188 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4189 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4190 matrix, ans [DF] as another SeqAIJ matrix.
4191
4192 When d_nz, o_nz parameters are specified, d_nz storage elements are
4193 allocated for every row of the local diagonal submatrix, and o_nz
4194 storage locations are allocated for every row of the OFF-DIAGONAL submat.
4195 One way to choose d_nz and o_nz is to use the max nonzerors per local
4196 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4197 In this case, the values of d_nz,o_nz are:
4198 .vb
4199 proc0 : dnz = 2, o_nz = 2
4200 proc1 : dnz = 3, o_nz = 2
4201 proc2 : dnz = 1, o_nz = 4
4202 .ve
4203 We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4204 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4205 for proc3. i.e we are using 12+15+10=37 storage locations to store
4206 34 values.
4207
4208 When d_nnz, o_nnz parameters are specified, the storage is specified
4209 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4210 In the above case the values for d_nnz,o_nnz are:
4211 .vb
4212 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4213 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4214 proc2: d_nnz = [1,1] and o_nnz = [4,4]
4215 .ve
4216 Here the space allocated is sum of all the above values i.e 34, and
4217 hence pre-allocation is perfect.
4218
4219 Level: intermediate
4220
4221 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4222 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4223 @*/
MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])4224 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4225 {
4226 PetscErrorCode ierr;
4227
4228 PetscFunctionBegin;
4229 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4230 PetscValidType(B,1);
4231 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4232 PetscFunctionReturn(0);
4233 }
4234
4235 /*@
4236 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4237 CSR format for the local rows.
4238
4239 Collective
4240
4241 Input Parameters:
4242 + comm - MPI communicator
4243 . m - number of local rows (Cannot be PETSC_DECIDE)
4244 . n - This value should be the same as the local size used in creating the
4245 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4246 calculated if N is given) For square matrices n is almost always m.
4247 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4248 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4249 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4250 . j - column indices
4251 - a - matrix values
4252
4253 Output Parameter:
4254 . mat - the matrix
4255
4256 Level: intermediate
4257
4258 Notes:
4259 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4260 thus you CANNOT change the matrix entries by changing the values of a[] after you have
4261 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4262
4263 The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4264
4265 The format which is used for the sparse matrix input, is equivalent to a
4266 row-major ordering.. i.e for the following matrix, the input data expected is
4267 as shown
4268
4269 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4270
4271 $ 1 0 0
4272 $ 2 0 3 P0
4273 $ -------
4274 $ 4 5 6 P1
4275 $
4276 $ Process0 [P0]: rows_owned=[0,1]
4277 $ i = {0,1,3} [size = nrow+1 = 2+1]
4278 $ j = {0,0,2} [size = 3]
4279 $ v = {1,2,3} [size = 3]
4280 $
4281 $ Process1 [P1]: rows_owned=[2]
4282 $ i = {0,3} [size = nrow+1 = 1+1]
4283 $ j = {0,1,2} [size = 3]
4284 $ v = {4,5,6} [size = 3]
4285
4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4287 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4288 @*/
MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat * mat)4289 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4290 {
4291 PetscErrorCode ierr;
4292
4293 PetscFunctionBegin;
4294 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4295 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4296 ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4297 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4298 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4299 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4300 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4301 PetscFunctionReturn(0);
4302 }
4303
4304 /*@
4305 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4306 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4307
4308 Collective
4309
4310 Input Parameters:
4311 + mat - the matrix
4312 . m - number of local rows (Cannot be PETSC_DECIDE)
4313 . n - This value should be the same as the local size used in creating the
4314 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4315 calculated if N is given) For square matrices n is almost always m.
4316 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4317 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4318 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4319 . J - column indices
4320 - v - matrix values
4321
4322 Level: intermediate
4323
4324 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4325 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4326 @*/
MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])4327 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4328 {
4329 PetscErrorCode ierr;
4330 PetscInt cstart,nnz,i,j;
4331 PetscInt *ld;
4332 PetscBool nooffprocentries;
4333 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data;
4334 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data;
4335 PetscScalar *ad = Ad->a, *ao = Ao->a;
4336 const PetscInt *Adi = Ad->i;
4337 PetscInt ldi,Iii,md;
4338
4339 PetscFunctionBegin;
4340 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4341 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4342 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4343 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4344
4345 cstart = mat->cmap->rstart;
4346 if (!Aij->ld) {
4347 /* count number of entries below block diagonal */
4348 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4349 Aij->ld = ld;
4350 for (i=0; i<m; i++) {
4351 nnz = Ii[i+1]- Ii[i];
4352 j = 0;
4353 while (J[j] < cstart && j < nnz) {j++;}
4354 J += nnz;
4355 ld[i] = j;
4356 }
4357 } else {
4358 ld = Aij->ld;
4359 }
4360
4361 for (i=0; i<m; i++) {
4362 nnz = Ii[i+1]- Ii[i];
4363 Iii = Ii[i];
4364 ldi = ld[i];
4365 md = Adi[i+1]-Adi[i];
4366 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4367 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4368 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4369 ad += md;
4370 ao += nnz - md;
4371 }
4372 nooffprocentries = mat->nooffprocentries;
4373 mat->nooffprocentries = PETSC_TRUE;
4374 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4375 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4376 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4377 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379 mat->nooffprocentries = nooffprocentries;
4380 PetscFunctionReturn(0);
4381 }
4382
4383 /*@C
4384 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4385 (the default parallel PETSc format). For good matrix assembly performance
4386 the user should preallocate the matrix storage by setting the parameters
4387 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
4388 performance can be increased by more than a factor of 50.
4389
4390 Collective
4391
4392 Input Parameters:
4393 + comm - MPI communicator
4394 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4395 This value should be the same as the local size used in creating the
4396 y vector for the matrix-vector product y = Ax.
4397 . n - This value should be the same as the local size used in creating the
4398 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4399 calculated if N is given) For square matrices n is almost always m.
4400 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4401 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4402 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
4403 (same value is used for all local rows)
4404 . d_nnz - array containing the number of nonzeros in the various rows of the
4405 DIAGONAL portion of the local submatrix (possibly different for each row)
4406 or NULL, if d_nz is used to specify the nonzero structure.
4407 The size of this array is equal to the number of local rows, i.e 'm'.
4408 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
4409 submatrix (same value is used for all local rows).
4410 - o_nnz - array containing the number of nonzeros in the various rows of the
4411 OFF-DIAGONAL portion of the local submatrix (possibly different for
4412 each row) or NULL, if o_nz is used to specify the nonzero
4413 structure. The size of this array is equal to the number
4414 of local rows, i.e 'm'.
4415
4416 Output Parameter:
4417 . A - the matrix
4418
4419 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4420 MatXXXXSetPreallocation() paradigm instead of this routine directly.
4421 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4422
4423 Notes:
4424 If the *_nnz parameter is given then the *_nz parameter is ignored
4425
4426 m,n,M,N parameters specify the size of the matrix, and its partitioning across
4427 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4428 storage requirements for this matrix.
4429
4430 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
4431 processor than it must be used on all processors that share the object for
4432 that argument.
4433
4434 The user MUST specify either the local or global matrix dimensions
4435 (possibly both).
4436
4437 The parallel matrix is partitioned across processors such that the
4438 first m0 rows belong to process 0, the next m1 rows belong to
4439 process 1, the next m2 rows belong to process 2 etc.. where
4440 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4441 values corresponding to [m x N] submatrix.
4442
4443 The columns are logically partitioned with the n0 columns belonging
4444 to 0th partition, the next n1 columns belonging to the next
4445 partition etc.. where n0,n1,n2... are the input parameter 'n'.
4446
4447 The DIAGONAL portion of the local submatrix on any given processor
4448 is the submatrix corresponding to the rows and columns m,n
4449 corresponding to the given processor. i.e diagonal matrix on
4450 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4451 etc. The remaining portion of the local submatrix [m x (N-n)]
4452 constitute the OFF-DIAGONAL portion. The example below better
4453 illustrates this concept.
4454
4455 For a square global matrix we define each processor's diagonal portion
4456 to be its local rows and the corresponding columns (a square submatrix);
4457 each processor's off-diagonal portion encompasses the remainder of the
4458 local matrix (a rectangular submatrix).
4459
4460 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4461
4462 When calling this routine with a single process communicator, a matrix of
4463 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
4464 type of communicator, use the construction mechanism
4465 .vb
4466 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4467 .ve
4468
4469 $ MatCreate(...,&A);
4470 $ MatSetType(A,MATMPIAIJ);
4471 $ MatSetSizes(A, m,n,M,N);
4472 $ MatMPIAIJSetPreallocation(A,...);
4473
4474 By default, this format uses inodes (identical nodes) when possible.
4475 We search for consecutive rows with the same nonzero structure, thereby
4476 reusing matrix information to achieve increased efficiency.
4477
4478 Options Database Keys:
4479 + -mat_no_inode - Do not use inodes
4480 - -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4481
4482
4483
4484 Example usage:
4485
4486 Consider the following 8x8 matrix with 34 non-zero values, that is
4487 assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4488 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4489 as follows
4490
4491 .vb
4492 1 2 0 | 0 3 0 | 0 4
4493 Proc0 0 5 6 | 7 0 0 | 8 0
4494 9 0 10 | 11 0 0 | 12 0
4495 -------------------------------------
4496 13 0 14 | 15 16 17 | 0 0
4497 Proc1 0 18 0 | 19 20 21 | 0 0
4498 0 0 0 | 22 23 0 | 24 0
4499 -------------------------------------
4500 Proc2 25 26 27 | 0 0 28 | 29 0
4501 30 0 0 | 31 32 33 | 0 34
4502 .ve
4503
4504 This can be represented as a collection of submatrices as
4505
4506 .vb
4507 A B C
4508 D E F
4509 G H I
4510 .ve
4511
4512 Where the submatrices A,B,C are owned by proc0, D,E,F are
4513 owned by proc1, G,H,I are owned by proc2.
4514
4515 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4516 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4517 The 'M','N' parameters are 8,8, and have the same values on all procs.
4518
4519 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4520 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4521 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4522 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4523 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4524 matrix, ans [DF] as another SeqAIJ matrix.
4525
4526 When d_nz, o_nz parameters are specified, d_nz storage elements are
4527 allocated for every row of the local diagonal submatrix, and o_nz
4528 storage locations are allocated for every row of the OFF-DIAGONAL submat.
4529 One way to choose d_nz and o_nz is to use the max nonzerors per local
4530 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4531 In this case, the values of d_nz,o_nz are
4532 .vb
4533 proc0 : dnz = 2, o_nz = 2
4534 proc1 : dnz = 3, o_nz = 2
4535 proc2 : dnz = 1, o_nz = 4
4536 .ve
4537 We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4538 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4539 for proc3. i.e we are using 12+15+10=37 storage locations to store
4540 34 values.
4541
4542 When d_nnz, o_nnz parameters are specified, the storage is specified
4543 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4544 In the above case the values for d_nnz,o_nnz are
4545 .vb
4546 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4547 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4548 proc2: d_nnz = [1,1] and o_nnz = [4,4]
4549 .ve
4550 Here the space allocated is sum of all the above values i.e 34, and
4551 hence pre-allocation is perfect.
4552
4553 Level: intermediate
4554
4555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4556 MATMPIAIJ, MatCreateMPIAIJWithArrays()
4557 @*/
MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat * A)4558 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4559 {
4560 PetscErrorCode ierr;
4561 PetscMPIInt size;
4562
4563 PetscFunctionBegin;
4564 ierr = MatCreate(comm,A);CHKERRQ(ierr);
4565 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4566 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4567 if (size > 1) {
4568 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4569 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4570 } else {
4571 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4572 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4573 }
4574 PetscFunctionReturn(0);
4575 }
4576
4577 /*@C
4578 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4579
4580 Not collective
4581
4582 Input Parameter:
4583 . A - The MPIAIJ matrix
4584
4585 Output Parameters:
4586 + Ad - The local diagonal block as a SeqAIJ matrix
4587 . Ao - The local off-diagonal block as a SeqAIJ matrix
4588 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4589
4590 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4591 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4592 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4593 local column numbers to global column numbers in the original matrix.
4594
4595 Level: intermediate
4596
4597 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4598 @*/
MatMPIAIJGetSeqAIJ(Mat A,Mat * Ad,Mat * Ao,const PetscInt * colmap[])4599 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4600 {
4601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4602 PetscBool flg;
4603 PetscErrorCode ierr;
4604
4605 PetscFunctionBegin;
4606 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4607 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4608 if (Ad) *Ad = a->A;
4609 if (Ao) *Ao = a->B;
4610 if (colmap) *colmap = a->garray;
4611 PetscFunctionReturn(0);
4612 }
4613
MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)4614 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4615 {
4616 PetscErrorCode ierr;
4617 PetscInt m,N,i,rstart,nnz,Ii;
4618 PetscInt *indx;
4619 PetscScalar *values;
4620
4621 PetscFunctionBegin;
4622 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4623 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4624 PetscInt *dnz,*onz,sum,bs,cbs;
4625
4626 if (n == PETSC_DECIDE) {
4627 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4628 }
4629 /* Check sum(n) = N */
4630 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4631 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4632
4633 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4634 rstart -= m;
4635
4636 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4637 for (i=0; i<m; i++) {
4638 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4639 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4640 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4641 }
4642
4643 ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4644 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4645 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4646 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4647 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4648 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4649 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4650 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4651 }
4652
4653 /* numeric phase */
4654 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4655 for (i=0; i<m; i++) {
4656 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4657 Ii = i + rstart;
4658 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4659 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4660 }
4661 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4662 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4663 PetscFunctionReturn(0);
4664 }
4665
MatFileSplit(Mat A,char * outfile)4666 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4667 {
4668 PetscErrorCode ierr;
4669 PetscMPIInt rank;
4670 PetscInt m,N,i,rstart,nnz;
4671 size_t len;
4672 const PetscInt *indx;
4673 PetscViewer out;
4674 char *name;
4675 Mat B;
4676 const PetscScalar *values;
4677
4678 PetscFunctionBegin;
4679 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4680 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4681 /* Should this be the type of the diagonal block of A? */
4682 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4683 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4684 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4685 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4686 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4687 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4688 for (i=0; i<m; i++) {
4689 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4690 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4691 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4692 }
4693 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4694 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4695
4696 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4697 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4698 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4699 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4700 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4701 ierr = PetscFree(name);CHKERRQ(ierr);
4702 ierr = MatView(B,out);CHKERRQ(ierr);
4703 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4704 ierr = MatDestroy(&B);CHKERRQ(ierr);
4705 PetscFunctionReturn(0);
4706 }
4707
MatDestroy_MPIAIJ_SeqsToMPI(void * data)4708 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4709 {
4710 PetscErrorCode ierr;
4711 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4712
4713 PetscFunctionBegin;
4714 if (!merge) PetscFunctionReturn(0);
4715 ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4716 ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4717 ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4718 ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4719 ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4720 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4721 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4722 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4723 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4724 ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4725 ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4726 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4727 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4728 ierr = PetscFree(merge);CHKERRQ(ierr);
4729 PetscFunctionReturn(0);
4730 }
4731
4732 #include <../src/mat/utils/freespace.h>
4733 #include <petscbt.h>
4734
MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)4735 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4736 {
4737 PetscErrorCode ierr;
4738 MPI_Comm comm;
4739 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data;
4740 PetscMPIInt size,rank,taga,*len_s;
4741 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4742 PetscInt proc,m;
4743 PetscInt **buf_ri,**buf_rj;
4744 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4745 PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
4746 MPI_Request *s_waits,*r_waits;
4747 MPI_Status *status;
4748 MatScalar *aa=a->a;
4749 MatScalar **abuf_r,*ba_i;
4750 Mat_Merge_SeqsToMPI *merge;
4751 PetscContainer container;
4752
4753 PetscFunctionBegin;
4754 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4755 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4756
4757 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4758 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4759
4760 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4761 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4762 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4763
4764 bi = merge->bi;
4765 bj = merge->bj;
4766 buf_ri = merge->buf_ri;
4767 buf_rj = merge->buf_rj;
4768
4769 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4770 owners = merge->rowmap->range;
4771 len_s = merge->len_s;
4772
4773 /* send and recv matrix values */
4774 /*-----------------------------*/
4775 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4776 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4777
4778 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4779 for (proc=0,k=0; proc<size; proc++) {
4780 if (!len_s[proc]) continue;
4781 i = owners[proc];
4782 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4783 k++;
4784 }
4785
4786 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4787 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4788 ierr = PetscFree(status);CHKERRQ(ierr);
4789
4790 ierr = PetscFree(s_waits);CHKERRQ(ierr);
4791 ierr = PetscFree(r_waits);CHKERRQ(ierr);
4792
4793 /* insert mat values of mpimat */
4794 /*----------------------------*/
4795 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4796 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4797
4798 for (k=0; k<merge->nrecv; k++) {
4799 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4800 nrows = *(buf_ri_k[k]);
4801 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
4802 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
4803 }
4804
4805 /* set values of ba */
4806 m = merge->rowmap->n;
4807 for (i=0; i<m; i++) {
4808 arow = owners[rank] + i;
4809 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
4810 bnzi = bi[i+1] - bi[i];
4811 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4812
4813 /* add local non-zero vals of this proc's seqmat into ba */
4814 anzi = ai[arow+1] - ai[arow];
4815 aj = a->j + ai[arow];
4816 aa = a->a + ai[arow];
4817 nextaj = 0;
4818 for (j=0; nextaj<anzi; j++) {
4819 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4820 ba_i[j] += aa[nextaj++];
4821 }
4822 }
4823
4824 /* add received vals into ba */
4825 for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4826 /* i-th row */
4827 if (i == *nextrow[k]) {
4828 anzi = *(nextai[k]+1) - *nextai[k];
4829 aj = buf_rj[k] + *(nextai[k]);
4830 aa = abuf_r[k] + *(nextai[k]);
4831 nextaj = 0;
4832 for (j=0; nextaj<anzi; j++) {
4833 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4834 ba_i[j] += aa[nextaj++];
4835 }
4836 }
4837 nextrow[k]++; nextai[k]++;
4838 }
4839 }
4840 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4841 }
4842 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4843 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4844
4845 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4846 ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4847 ierr = PetscFree(ba_i);CHKERRQ(ierr);
4848 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4849 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4850 PetscFunctionReturn(0);
4851 }
4852
MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat * mpimat)4853 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4854 {
4855 PetscErrorCode ierr;
4856 Mat B_mpi;
4857 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
4858 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4859 PetscInt **buf_rj,**buf_ri,**buf_ri_k;
4860 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4861 PetscInt len,proc,*dnz,*onz,bs,cbs;
4862 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4863 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4864 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
4865 MPI_Status *status;
4866 PetscFreeSpaceList free_space=NULL,current_space=NULL;
4867 PetscBT lnkbt;
4868 Mat_Merge_SeqsToMPI *merge;
4869 PetscContainer container;
4870
4871 PetscFunctionBegin;
4872 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4873
4874 /* make sure it is a PETSc comm */
4875 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4876 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4877 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4878
4879 ierr = PetscNew(&merge);CHKERRQ(ierr);
4880 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4881
4882 /* determine row ownership */
4883 /*---------------------------------------------------------*/
4884 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4885 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4886 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4887 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4888 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4889 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4890 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4891
4892 m = merge->rowmap->n;
4893 owners = merge->rowmap->range;
4894
4895 /* determine the number of messages to send, their lengths */
4896 /*---------------------------------------------------------*/
4897 len_s = merge->len_s;
4898
4899 len = 0; /* length of buf_si[] */
4900 merge->nsend = 0;
4901 for (proc=0; proc<size; proc++) {
4902 len_si[proc] = 0;
4903 if (proc == rank) {
4904 len_s[proc] = 0;
4905 } else {
4906 len_si[proc] = owners[proc+1] - owners[proc] + 1;
4907 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4908 }
4909 if (len_s[proc]) {
4910 merge->nsend++;
4911 nrows = 0;
4912 for (i=owners[proc]; i<owners[proc+1]; i++) {
4913 if (ai[i+1] > ai[i]) nrows++;
4914 }
4915 len_si[proc] = 2*(nrows+1);
4916 len += len_si[proc];
4917 }
4918 }
4919
4920 /* determine the number and length of messages to receive for ij-structure */
4921 /*-------------------------------------------------------------------------*/
4922 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4923 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4924
4925 /* post the Irecv of j-structure */
4926 /*-------------------------------*/
4927 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4928 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4929
4930 /* post the Isend of j-structure */
4931 /*--------------------------------*/
4932 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4933
4934 for (proc=0, k=0; proc<size; proc++) {
4935 if (!len_s[proc]) continue;
4936 i = owners[proc];
4937 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4938 k++;
4939 }
4940
4941 /* receives and sends of j-structure are complete */
4942 /*------------------------------------------------*/
4943 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4944 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4945
4946 /* send and recv i-structure */
4947 /*---------------------------*/
4948 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4949 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4950
4951 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4952 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4953 for (proc=0,k=0; proc<size; proc++) {
4954 if (!len_s[proc]) continue;
4955 /* form outgoing message for i-structure:
4956 buf_si[0]: nrows to be sent
4957 [1:nrows]: row index (global)
4958 [nrows+1:2*nrows+1]: i-structure index
4959 */
4960 /*-------------------------------------------*/
4961 nrows = len_si[proc]/2 - 1;
4962 buf_si_i = buf_si + nrows+1;
4963 buf_si[0] = nrows;
4964 buf_si_i[0] = 0;
4965 nrows = 0;
4966 for (i=owners[proc]; i<owners[proc+1]; i++) {
4967 anzi = ai[i+1] - ai[i];
4968 if (anzi) {
4969 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4970 buf_si[nrows+1] = i-owners[proc]; /* local row index */
4971 nrows++;
4972 }
4973 }
4974 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4975 k++;
4976 buf_si += len_si[proc];
4977 }
4978
4979 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4980 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4981
4982 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4983 for (i=0; i<merge->nrecv; i++) {
4984 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4985 }
4986
4987 ierr = PetscFree(len_si);CHKERRQ(ierr);
4988 ierr = PetscFree(len_ri);CHKERRQ(ierr);
4989 ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4990 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4991 ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4992 ierr = PetscFree(buf_s);CHKERRQ(ierr);
4993 ierr = PetscFree(status);CHKERRQ(ierr);
4994
4995 /* compute a local seq matrix in each processor */
4996 /*----------------------------------------------*/
4997 /* allocate bi array and free space for accumulating nonzero column info */
4998 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4999 bi[0] = 0;
5000
5001 /* create and initialize a linked list */
5002 nlnk = N+1;
5003 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5004
5005 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5006 len = ai[owners[rank+1]] - ai[owners[rank]];
5007 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
5008
5009 current_space = free_space;
5010
5011 /* determine symbolic info for each local row */
5012 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
5013
5014 for (k=0; k<merge->nrecv; k++) {
5015 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5016 nrows = *buf_ri_k[k];
5017 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
5018 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */
5019 }
5020
5021 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5022 len = 0;
5023 for (i=0; i<m; i++) {
5024 bnzi = 0;
5025 /* add local non-zero cols of this proc's seqmat into lnk */
5026 arow = owners[rank] + i;
5027 anzi = ai[arow+1] - ai[arow];
5028 aj = a->j + ai[arow];
5029 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5030 bnzi += nlnk;
5031 /* add received col data into lnk */
5032 for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5033 if (i == *nextrow[k]) { /* i-th row */
5034 anzi = *(nextai[k]+1) - *nextai[k];
5035 aj = buf_rj[k] + *nextai[k];
5036 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5037 bnzi += nlnk;
5038 nextrow[k]++; nextai[k]++;
5039 }
5040 }
5041 if (len < bnzi) len = bnzi; /* =max(bnzi) */
5042
5043 /* if free space is not available, make more free space */
5044 if (current_space->local_remaining<bnzi) {
5045 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr);
5046 nspacedouble++;
5047 }
5048 /* copy data into free space, then initialize lnk */
5049 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5050 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5051
5052 current_space->array += bnzi;
5053 current_space->local_used += bnzi;
5054 current_space->local_remaining -= bnzi;
5055
5056 bi[i+1] = bi[i] + bnzi;
5057 }
5058
5059 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5060
5061 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
5062 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5063 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5064
5065 /* create symbolic parallel matrix B_mpi */
5066 /*---------------------------------------*/
5067 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5068 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5069 if (n==PETSC_DECIDE) {
5070 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5071 } else {
5072 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5073 }
5074 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5075 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5076 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5077 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5078 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5079
5080 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5081 B_mpi->assembled = PETSC_FALSE;
5082 merge->bi = bi;
5083 merge->bj = bj;
5084 merge->buf_ri = buf_ri;
5085 merge->buf_rj = buf_rj;
5086 merge->coi = NULL;
5087 merge->coj = NULL;
5088 merge->owners_co = NULL;
5089
5090 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5091
5092 /* attach the supporting struct to B_mpi for reuse */
5093 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5094 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5095 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
5096 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5097 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
5098 *mpimat = B_mpi;
5099
5100 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5101 PetscFunctionReturn(0);
5102 }
5103
5104 /*@C
5105 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
5106 matrices from each processor
5107
5108 Collective
5109
5110 Input Parameters:
5111 + comm - the communicators the parallel matrix will live on
5112 . seqmat - the input sequential matrices
5113 . m - number of local rows (or PETSC_DECIDE)
5114 . n - number of local columns (or PETSC_DECIDE)
5115 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5116
5117 Output Parameter:
5118 . mpimat - the parallel matrix generated
5119
5120 Level: advanced
5121
5122 Notes:
5123 The dimensions of the sequential matrix in each processor MUST be the same.
5124 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5125 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5126 @*/
MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat * mpimat)5127 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5128 {
5129 PetscErrorCode ierr;
5130 PetscMPIInt size;
5131
5132 PetscFunctionBegin;
5133 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5134 if (size == 1) {
5135 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5136 if (scall == MAT_INITIAL_MATRIX) {
5137 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5138 } else {
5139 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5140 }
5141 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5142 PetscFunctionReturn(0);
5143 }
5144 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5145 if (scall == MAT_INITIAL_MATRIX) {
5146 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5147 }
5148 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5149 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5150 PetscFunctionReturn(0);
5151 }
5152
5153 /*@
5154 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5155 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5156 with MatGetSize()
5157
5158 Not Collective
5159
5160 Input Parameters:
5161 + A - the matrix
5162 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5163
5164 Output Parameter:
5165 . A_loc - the local sequential matrix generated
5166
5167 Level: developer
5168
5169 Notes:
5170 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5171 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5172 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5173 modify the values of the returned A_loc.
5174
5175 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed()
5176
5177 @*/
MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat * A_loc)5178 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5179 {
5180 PetscErrorCode ierr;
5181 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
5182 Mat_SeqAIJ *mat,*a,*b;
5183 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5184 MatScalar *aa,*ba,*cam;
5185 PetscScalar *ca;
5186 PetscMPIInt size;
5187 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5188 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
5189 PetscBool match;
5190
5191 PetscFunctionBegin;
5192 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5193 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5194 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5195 if (size == 1) {
5196 if (scall == MAT_INITIAL_MATRIX) {
5197 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5198 *A_loc = mpimat->A;
5199 } else if (scall == MAT_REUSE_MATRIX) {
5200 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5201 }
5202 PetscFunctionReturn(0);
5203 }
5204
5205 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5206 a = (Mat_SeqAIJ*)(mpimat->A)->data;
5207 b = (Mat_SeqAIJ*)(mpimat->B)->data;
5208 ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5209 aa = a->a; ba = b->a;
5210 if (scall == MAT_INITIAL_MATRIX) {
5211 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5212 ci[0] = 0;
5213 for (i=0; i<am; i++) {
5214 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5215 }
5216 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5217 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5218 k = 0;
5219 for (i=0; i<am; i++) {
5220 ncols_o = bi[i+1] - bi[i];
5221 ncols_d = ai[i+1] - ai[i];
5222 /* off-diagonal portion of A */
5223 for (jo=0; jo<ncols_o; jo++) {
5224 col = cmap[*bj];
5225 if (col >= cstart) break;
5226 cj[k] = col; bj++;
5227 ca[k++] = *ba++;
5228 }
5229 /* diagonal portion of A */
5230 for (j=0; j<ncols_d; j++) {
5231 cj[k] = cstart + *aj++;
5232 ca[k++] = *aa++;
5233 }
5234 /* off-diagonal portion of A */
5235 for (j=jo; j<ncols_o; j++) {
5236 cj[k] = cmap[*bj++];
5237 ca[k++] = *ba++;
5238 }
5239 }
5240 /* put together the new matrix */
5241 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5242 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5243 /* Since these are PETSc arrays, change flags to free them as necessary. */
5244 mat = (Mat_SeqAIJ*)(*A_loc)->data;
5245 mat->free_a = PETSC_TRUE;
5246 mat->free_ij = PETSC_TRUE;
5247 mat->nonew = 0;
5248 } else if (scall == MAT_REUSE_MATRIX) {
5249 mat=(Mat_SeqAIJ*)(*A_loc)->data;
5250 ci = mat->i; cj = mat->j; cam = mat->a;
5251 for (i=0; i<am; i++) {
5252 /* off-diagonal portion of A */
5253 ncols_o = bi[i+1] - bi[i];
5254 for (jo=0; jo<ncols_o; jo++) {
5255 col = cmap[*bj];
5256 if (col >= cstart) break;
5257 *cam++ = *ba++; bj++;
5258 }
5259 /* diagonal portion of A */
5260 ncols_d = ai[i+1] - ai[i];
5261 for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5262 /* off-diagonal portion of A */
5263 for (j=jo; j<ncols_o; j++) {
5264 *cam++ = *ba++; bj++;
5265 }
5266 }
5267 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5268 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5269 PetscFunctionReturn(0);
5270 }
5271
5272 /*@C
5273 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5274
5275 Not Collective
5276
5277 Input Parameters:
5278 + A - the matrix
5279 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5280 - row, col - index sets of rows and columns to extract (or NULL)
5281
5282 Output Parameter:
5283 . A_loc - the local sequential matrix generated
5284
5285 Level: developer
5286
5287 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5288
5289 @*/
MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS * row,IS * col,Mat * A_loc)5290 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5291 {
5292 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5293 PetscErrorCode ierr;
5294 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5295 IS isrowa,iscola;
5296 Mat *aloc;
5297 PetscBool match;
5298
5299 PetscFunctionBegin;
5300 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5301 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5302 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5303 if (!row) {
5304 start = A->rmap->rstart; end = A->rmap->rend;
5305 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5306 } else {
5307 isrowa = *row;
5308 }
5309 if (!col) {
5310 start = A->cmap->rstart;
5311 cmap = a->garray;
5312 nzA = a->A->cmap->n;
5313 nzB = a->B->cmap->n;
5314 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5315 ncols = 0;
5316 for (i=0; i<nzB; i++) {
5317 if (cmap[i] < start) idx[ncols++] = cmap[i];
5318 else break;
5319 }
5320 imark = i;
5321 for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5322 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5323 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5324 } else {
5325 iscola = *col;
5326 }
5327 if (scall != MAT_INITIAL_MATRIX) {
5328 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5329 aloc[0] = *A_loc;
5330 }
5331 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5332 if (!col) { /* attach global id of condensed columns */
5333 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5334 }
5335 *A_loc = aloc[0];
5336 ierr = PetscFree(aloc);CHKERRQ(ierr);
5337 if (!row) {
5338 ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5339 }
5340 if (!col) {
5341 ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5342 }
5343 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5344 PetscFunctionReturn(0);
5345 }
5346
5347 /*
5348 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5349 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5350 * on a global size.
5351 * */
MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat * P_oth)5352 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5353 {
5354 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data;
5355 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5356 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5357 PetscMPIInt owner;
5358 PetscSFNode *iremote,*oiremote;
5359 const PetscInt *lrowindices;
5360 PetscErrorCode ierr;
5361 PetscSF sf,osf;
5362 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j;
5363 PetscInt ontotalcols,dntotalcols,ntotalcols,nout;
5364 MPI_Comm comm;
5365 ISLocalToGlobalMapping mapping;
5366
5367 PetscFunctionBegin;
5368 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5369 /* plocalsize is the number of roots
5370 * nrows is the number of leaves
5371 * */
5372 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5373 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5374 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5375 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5376 for (i=0;i<nrows;i++) {
5377 /* Find a remote index and an owner for a row
5378 * The row could be local or remote
5379 * */
5380 owner = 0;
5381 lidx = 0;
5382 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5383 iremote[i].index = lidx;
5384 iremote[i].rank = owner;
5385 }
5386 /* Create SF to communicate how many nonzero columns for each row */
5387 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5388 /* SF will figure out the number of nonzero colunms for each row, and their
5389 * offsets
5390 * */
5391 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5392 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5393 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5394
5395 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5396 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5397 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5398 roffsets[0] = 0;
5399 roffsets[1] = 0;
5400 for (i=0;i<plocalsize;i++) {
5401 /* diag */
5402 nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5403 /* off diag */
5404 nrcols[i*2+1] = po->i[i+1] - po->i[i];
5405 /* compute offsets so that we relative location for each row */
5406 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5407 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5408 }
5409 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5410 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5411 /* 'r' means root, and 'l' means leaf */
5412 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5413 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5414 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5415 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5416 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5417 ierr = PetscFree(roffsets);CHKERRQ(ierr);
5418 ierr = PetscFree(nrcols);CHKERRQ(ierr);
5419 dntotalcols = 0;
5420 ontotalcols = 0;
5421 ncol = 0;
5422 for (i=0;i<nrows;i++) {
5423 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5424 ncol = PetscMax(pnnz[i],ncol);
5425 /* diag */
5426 dntotalcols += nlcols[i*2+0];
5427 /* off diag */
5428 ontotalcols += nlcols[i*2+1];
5429 }
5430 /* We do not need to figure the right number of columns
5431 * since all the calculations will be done by going through the raw data
5432 * */
5433 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5434 ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5435 ierr = PetscFree(pnnz);CHKERRQ(ierr);
5436 p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5437 /* diag */
5438 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5439 /* off diag */
5440 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5441 /* diag */
5442 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5443 /* off diag */
5444 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5445 dntotalcols = 0;
5446 ontotalcols = 0;
5447 ntotalcols = 0;
5448 for (i=0;i<nrows;i++) {
5449 owner = 0;
5450 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5451 /* Set iremote for diag matrix */
5452 for (j=0;j<nlcols[i*2+0];j++) {
5453 iremote[dntotalcols].index = loffsets[i*2+0] + j;
5454 iremote[dntotalcols].rank = owner;
5455 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5456 ilocal[dntotalcols++] = ntotalcols++;
5457 }
5458 /* off diag */
5459 for (j=0;j<nlcols[i*2+1];j++) {
5460 oiremote[ontotalcols].index = loffsets[i*2+1] + j;
5461 oiremote[ontotalcols].rank = owner;
5462 oilocal[ontotalcols++] = ntotalcols++;
5463 }
5464 }
5465 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5466 ierr = PetscFree(loffsets);CHKERRQ(ierr);
5467 ierr = PetscFree(nlcols);CHKERRQ(ierr);
5468 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5469 /* P serves as roots and P_oth is leaves
5470 * Diag matrix
5471 * */
5472 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5473 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5474 ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5475
5476 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5477 /* Off diag */
5478 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5479 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5480 ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5481 /* We operate on the matrix internal data for saving memory */
5482 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5483 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5484 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5485 /* Convert to global indices for diag matrix */
5486 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5487 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5488 /* We want P_oth store global indices */
5489 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5490 /* Use memory scalable approach */
5491 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5492 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5493 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5494 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5495 /* Convert back to local indices */
5496 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5497 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5498 nout = 0;
5499 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5500 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5501 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5502 /* Exchange values */
5503 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5504 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5505 /* Stop PETSc from shrinking memory */
5506 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5507 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5508 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5509 /* Attach PetscSF objects to P_oth so that we can reuse it later */
5510 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5511 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5512 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5513 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5514 PetscFunctionReturn(0);
5515 }
5516
5517 /*
5518 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5519 * This supports MPIAIJ and MAIJ
5520 * */
MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat * P_oth)5521 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5522 {
5523 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5524 Mat_SeqAIJ *p_oth;
5525 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5526 IS rows,map;
5527 PetscHMapI hamp;
5528 PetscInt i,htsize,*rowindices,off,*mapping,key,count;
5529 MPI_Comm comm;
5530 PetscSF sf,osf;
5531 PetscBool has;
5532 PetscErrorCode ierr;
5533
5534 PetscFunctionBegin;
5535 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5536 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5537 /* If it is the first time, create an index set of off-diag nonzero columns of A,
5538 * and then create a submatrix (that often is an overlapping matrix)
5539 * */
5540 if (reuse == MAT_INITIAL_MATRIX) {
5541 /* Use a hash table to figure out unique keys */
5542 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5543 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5544 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5545 count = 0;
5546 /* Assume that a->g is sorted, otherwise the following does not make sense */
5547 for (i=0;i<a->B->cmap->n;i++) {
5548 key = a->garray[i]/dof;
5549 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5550 if (!has) {
5551 mapping[i] = count;
5552 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5553 } else {
5554 /* Current 'i' has the same value the previous step */
5555 mapping[i] = count-1;
5556 }
5557 }
5558 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5559 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5560 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5561 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5562 off = 0;
5563 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5564 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5565 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5566 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5567 /* In case, the matrix was already created but users want to recreate the matrix */
5568 ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5569 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5570 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5571 ierr = ISDestroy(&map);CHKERRQ(ierr);
5572 ierr = ISDestroy(&rows);CHKERRQ(ierr);
5573 } else if (reuse == MAT_REUSE_MATRIX) {
5574 /* If matrix was already created, we simply update values using SF objects
5575 * that as attached to the matrix ealier.
5576 * */
5577 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5578 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5579 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5580 p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5581 /* Update values in place */
5582 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5583 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5584 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5585 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5586 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5587 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5588 PetscFunctionReturn(0);
5589 }
5590
5591 /*@C
5592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5593
5594 Collective on Mat
5595
5596 Input Parameters:
5597 + A,B - the matrices in mpiaij format
5598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5599 - rowb, colb - index sets of rows and columns of B to extract (or NULL)
5600
5601 Output Parameter:
5602 + rowb, colb - index sets of rows and columns of B to extract
5603 - B_seq - the sequential matrix generated
5604
5605 Level: developer
5606
5607 @*/
MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS * rowb,IS * colb,Mat * B_seq)5608 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5609 {
5610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5611 PetscErrorCode ierr;
5612 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5613 IS isrowb,iscolb;
5614 Mat *bseq=NULL;
5615
5616 PetscFunctionBegin;
5617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5618 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5619 }
5620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5621
5622 if (scall == MAT_INITIAL_MATRIX) {
5623 start = A->cmap->rstart;
5624 cmap = a->garray;
5625 nzA = a->A->cmap->n;
5626 nzB = a->B->cmap->n;
5627 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5628 ncols = 0;
5629 for (i=0; i<nzB; i++) { /* row < local row index */
5630 if (cmap[i] < start) idx[ncols++] = cmap[i];
5631 else break;
5632 }
5633 imark = i;
5634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
5635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5637 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5638 } else {
5639 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5640 isrowb = *rowb; iscolb = *colb;
5641 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5642 bseq[0] = *B_seq;
5643 }
5644 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5645 *B_seq = bseq[0];
5646 ierr = PetscFree(bseq);CHKERRQ(ierr);
5647 if (!rowb) {
5648 ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5649 } else {
5650 *rowb = isrowb;
5651 }
5652 if (!colb) {
5653 ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5654 } else {
5655 *colb = iscolb;
5656 }
5657 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5658 PetscFunctionReturn(0);
5659 }
5660
5661 /*
5662 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5663 of the OFF-DIAGONAL portion of local A
5664
5665 Collective on Mat
5666
5667 Input Parameters:
5668 + A,B - the matrices in mpiaij format
5669 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5670
5671 Output Parameter:
5672 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5673 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5674 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5675 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5676
5677 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5678 for this matrix. This is not desirable..
5679
5680 Level: developer
5681
5682 */
MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt ** startsj_s,PetscInt ** startsj_r,MatScalar ** bufa_ptr,Mat * B_oth)5683 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5684 {
5685 PetscErrorCode ierr;
5686 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
5687 Mat_SeqAIJ *b_oth;
5688 VecScatter ctx;
5689 MPI_Comm comm;
5690 const PetscMPIInt *rprocs,*sprocs;
5691 const PetscInt *srow,*rstarts,*sstarts;
5692 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5693 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5694 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL;
5695 MPI_Request *rwaits = NULL,*swaits = NULL;
5696 MPI_Status rstatus;
5697 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi;
5698
5699 PetscFunctionBegin;
5700 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5701 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5702
5703 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5704 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5705 }
5706 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5707 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5708
5709 if (size == 1) {
5710 startsj_s = NULL;
5711 bufa_ptr = NULL;
5712 *B_oth = NULL;
5713 PetscFunctionReturn(0);
5714 }
5715
5716 ctx = a->Mvctx;
5717 tag = ((PetscObject)ctx)->tag;
5718
5719 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
5720 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5721 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5722 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5723 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5724 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5725 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5726
5727 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5728 if (scall == MAT_INITIAL_MATRIX) {
5729 /* i-array */
5730 /*---------*/
5731 /* post receives */
5732 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5733 for (i=0; i<nrecvs; i++) {
5734 rowlen = rvalues + rstarts[i]*rbs;
5735 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5736 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5737 }
5738
5739 /* pack the outgoing message */
5740 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5741
5742 sstartsj[0] = 0;
5743 rstartsj[0] = 0;
5744 len = 0; /* total length of j or a array to be sent */
5745 if (nsends) {
5746 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5747 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5748 }
5749 for (i=0; i<nsends; i++) {
5750 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5751 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5752 for (j=0; j<nrows; j++) {
5753 row = srow[k] + B->rmap->range[rank]; /* global row idx */
5754 for (l=0; l<sbs; l++) {
5755 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5756
5757 rowlen[j*sbs+l] = ncols;
5758
5759 len += ncols;
5760 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5761 }
5762 k++;
5763 }
5764 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5765
5766 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5767 }
5768 /* recvs and sends of i-array are completed */
5769 i = nrecvs;
5770 while (i--) {
5771 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5772 }
5773 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5774 ierr = PetscFree(svalues);CHKERRQ(ierr);
5775
5776 /* allocate buffers for sending j and a arrays */
5777 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5778 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5779
5780 /* create i-array of B_oth */
5781 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5782
5783 b_othi[0] = 0;
5784 len = 0; /* total length of j or a array to be received */
5785 k = 0;
5786 for (i=0; i<nrecvs; i++) {
5787 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5788 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5789 for (j=0; j<nrows; j++) {
5790 b_othi[k+1] = b_othi[k] + rowlen[j];
5791 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5792 k++;
5793 }
5794 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5795 }
5796 ierr = PetscFree(rvalues);CHKERRQ(ierr);
5797
5798 /* allocate space for j and a arrrays of B_oth */
5799 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5800 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5801
5802 /* j-array */
5803 /*---------*/
5804 /* post receives of j-array */
5805 for (i=0; i<nrecvs; i++) {
5806 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5807 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5808 }
5809
5810 /* pack the outgoing message j-array */
5811 if (nsends) k = sstarts[0];
5812 for (i=0; i<nsends; i++) {
5813 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5814 bufJ = bufj+sstartsj[i];
5815 for (j=0; j<nrows; j++) {
5816 row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5817 for (ll=0; ll<sbs; ll++) {
5818 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5819 for (l=0; l<ncols; l++) {
5820 *bufJ++ = cols[l];
5821 }
5822 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5823 }
5824 }
5825 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5826 }
5827
5828 /* recvs and sends of j-array are completed */
5829 i = nrecvs;
5830 while (i--) {
5831 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5832 }
5833 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5834 } else if (scall == MAT_REUSE_MATRIX) {
5835 sstartsj = *startsj_s;
5836 rstartsj = *startsj_r;
5837 bufa = *bufa_ptr;
5838 b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5839 b_otha = b_oth->a;
5840 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5841
5842 /* a-array */
5843 /*---------*/
5844 /* post receives of a-array */
5845 for (i=0; i<nrecvs; i++) {
5846 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5847 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5848 }
5849
5850 /* pack the outgoing message a-array */
5851 if (nsends) k = sstarts[0];
5852 for (i=0; i<nsends; i++) {
5853 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5854 bufA = bufa+sstartsj[i];
5855 for (j=0; j<nrows; j++) {
5856 row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5857 for (ll=0; ll<sbs; ll++) {
5858 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5859 for (l=0; l<ncols; l++) {
5860 *bufA++ = vals[l];
5861 }
5862 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5863 }
5864 }
5865 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5866 }
5867 /* recvs and sends of a-array are completed */
5868 i = nrecvs;
5869 while (i--) {
5870 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5871 }
5872 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);}
5873 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5874
5875 if (scall == MAT_INITIAL_MATRIX) {
5876 /* put together the new matrix */
5877 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5878
5879 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5880 /* Since these are PETSc arrays, change flags to free them as necessary. */
5881 b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
5882 b_oth->free_a = PETSC_TRUE;
5883 b_oth->free_ij = PETSC_TRUE;
5884 b_oth->nonew = 0;
5885
5886 ierr = PetscFree(bufj);CHKERRQ(ierr);
5887 if (!startsj_s || !bufa_ptr) {
5888 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5889 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5890 } else {
5891 *startsj_s = sstartsj;
5892 *startsj_r = rstartsj;
5893 *bufa_ptr = bufa;
5894 }
5895 }
5896
5897 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5898 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5899 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5900 PetscFunctionReturn(0);
5901 }
5902
5903 /*@C
5904 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5905
5906 Not Collective
5907
5908 Input Parameters:
5909 . A - The matrix in mpiaij format
5910
5911 Output Parameter:
5912 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5913 . colmap - A map from global column index to local index into lvec
5914 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5915
5916 Level: developer
5917
5918 @*/
5919 #if defined(PETSC_USE_CTABLE)
MatGetCommunicationStructs(Mat A,Vec * lvec,PetscTable * colmap,VecScatter * multScatter)5920 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5921 #else
5922 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5923 #endif
5924 {
5925 Mat_MPIAIJ *a;
5926
5927 PetscFunctionBegin;
5928 PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5929 PetscValidPointer(lvec, 2);
5930 PetscValidPointer(colmap, 3);
5931 PetscValidPointer(multScatter, 4);
5932 a = (Mat_MPIAIJ*) A->data;
5933 if (lvec) *lvec = a->lvec;
5934 if (colmap) *colmap = a->colmap;
5935 if (multScatter) *multScatter = a->Mvctx;
5936 PetscFunctionReturn(0);
5937 }
5938
5939 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5940 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5941 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5942 #if defined(PETSC_HAVE_MKL_SPARSE)
5943 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5944 #endif
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5946 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5947 #if defined(PETSC_HAVE_ELEMENTAL)
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 #if defined(PETSC_HAVE_SCALAPACK)
5951 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5952 #endif
5953 #if defined(PETSC_HAVE_HYPRE)
5954 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5955 #endif
5956 #if defined(PETSC_HAVE_CUDA)
5957 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5958 #endif
5959 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5960 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5961 #endif
5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5963 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5964 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5965
5966 /*
5967 Computes (B'*A')' since computing B*A directly is untenable
5968
5969 n p p
5970 [ ] [ ] [ ]
5971 m [ A ] * n [ B ] = m [ C ]
5972 [ ] [ ] [ ]
5973
5974 */
MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)5975 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5976 {
5977 PetscErrorCode ierr;
5978 Mat At,Bt,Ct;
5979
5980 PetscFunctionBegin;
5981 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5982 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5983 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5984 ierr = MatDestroy(&At);CHKERRQ(ierr);
5985 ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5986 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5987 ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5988 PetscFunctionReturn(0);
5989 }
5990
MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)5991 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5992 {
5993 PetscErrorCode ierr;
5994 PetscBool cisdense;
5995
5996 PetscFunctionBegin;
5997 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5998 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5999 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
6000 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
6001 if (!cisdense) {
6002 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6003 }
6004 ierr = MatSetUp(C);CHKERRQ(ierr);
6005
6006 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6007 PetscFunctionReturn(0);
6008 }
6009
6010 /* ----------------------------------------------------------------*/
MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)6011 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6012 {
6013 Mat_Product *product = C->product;
6014 Mat A = product->A,B=product->B;
6015
6016 PetscFunctionBegin;
6017 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6018 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6019
6020 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6021 C->ops->productsymbolic = MatProductSymbolic_AB;
6022 PetscFunctionReturn(0);
6023 }
6024
MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)6025 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6026 {
6027 PetscErrorCode ierr;
6028 Mat_Product *product = C->product;
6029
6030 PetscFunctionBegin;
6031 if (product->type == MATPRODUCT_AB) {
6032 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6033 }
6034 PetscFunctionReturn(0);
6035 }
6036 /* ----------------------------------------------------------------*/
6037
6038 /*MC
6039 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6040
6041 Options Database Keys:
6042 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6043
6044 Level: beginner
6045
6046 Notes:
6047 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6048 in this case the values associated with the rows and columns one passes in are set to zero
6049 in the matrix
6050
6051 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6052 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6053
6054 .seealso: MatCreateAIJ()
6055 M*/
6056
MatCreate_MPIAIJ(Mat B)6057 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6058 {
6059 Mat_MPIAIJ *b;
6060 PetscErrorCode ierr;
6061 PetscMPIInt size;
6062
6063 PetscFunctionBegin;
6064 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
6065
6066 ierr = PetscNewLog(B,&b);CHKERRQ(ierr);
6067 B->data = (void*)b;
6068 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6069 B->assembled = PETSC_FALSE;
6070 B->insertmode = NOT_SET_VALUES;
6071 b->size = size;
6072
6073 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
6074
6075 /* build cache for off array entries formed */
6076 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6077
6078 b->donotstash = PETSC_FALSE;
6079 b->colmap = NULL;
6080 b->garray = NULL;
6081 b->roworiented = PETSC_TRUE;
6082
6083 /* stuff used for matrix vector multiply */
6084 b->lvec = NULL;
6085 b->Mvctx = NULL;
6086
6087 /* stuff for MatGetRow() */
6088 b->rowindices = NULL;
6089 b->rowvalues = NULL;
6090 b->getrowactive = PETSC_FALSE;
6091
6092 /* flexible pointer used in CUSP/CUSPARSE classes */
6093 b->spptr = NULL;
6094
6095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6105 #if defined(PETSC_HAVE_CUDA)
6106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6107 #endif
6108 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6110 #endif
6111 #if defined(PETSC_HAVE_MKL_SPARSE)
6112 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6113 #endif
6114 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6116 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6117 #if defined(PETSC_HAVE_ELEMENTAL)
6118 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6119 #endif
6120 #if defined(PETSC_HAVE_SCALAPACK)
6121 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6122 #endif
6123 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6124 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6125 #if defined(PETSC_HAVE_HYPRE)
6126 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6127 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6128 #endif
6129 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6130 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6131 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6132 PetscFunctionReturn(0);
6133 }
6134
6135 /*@C
6136 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6137 and "off-diagonal" part of the matrix in CSR format.
6138
6139 Collective
6140
6141 Input Parameters:
6142 + comm - MPI communicator
6143 . m - number of local rows (Cannot be PETSC_DECIDE)
6144 . n - This value should be the same as the local size used in creating the
6145 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6146 calculated if N is given) For square matrices n is almost always m.
6147 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6148 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6149 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6150 . j - column indices
6151 . a - matrix values
6152 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6153 . oj - column indices
6154 - oa - matrix values
6155
6156 Output Parameter:
6157 . mat - the matrix
6158
6159 Level: advanced
6160
6161 Notes:
6162 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6163 must free the arrays once the matrix has been destroyed and not before.
6164
6165 The i and j indices are 0 based
6166
6167 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6168
6169 This sets local rows and cannot be used to set off-processor values.
6170
6171 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6172 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6173 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6174 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6175 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6176 communication if it is known that only local entries will be set.
6177
6178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6179 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6180 @*/
MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[],PetscInt oj[],PetscScalar oa[],Mat * mat)6181 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6182 {
6183 PetscErrorCode ierr;
6184 Mat_MPIAIJ *maij;
6185
6186 PetscFunctionBegin;
6187 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6188 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6189 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6190 ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6191 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6192 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6193 maij = (Mat_MPIAIJ*) (*mat)->data;
6194
6195 (*mat)->preallocated = PETSC_TRUE;
6196
6197 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6198 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6199
6200 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6201 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6202
6203 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6204 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6205 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6206 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6207
6208 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6209 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6210 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6211 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6212 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6213 PetscFunctionReturn(0);
6214 }
6215
6216 /*
6217 Special version for direct calls from Fortran
6218 */
6219 #include <petsc/private/fortranimpl.h>
6220
6221 /* Change these macros so can be used in void function */
6222 #undef CHKERRQ
6223 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6224 #undef SETERRQ2
6225 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6226 #undef SETERRQ3
6227 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6228 #undef SETERRQ
6229 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6230
6231 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6232 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6233 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6234 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6235 #else
6236 #endif
matsetvaluesmpiaij_(Mat * mmat,PetscInt * mm,const PetscInt im[],PetscInt * mn,const PetscInt in[],const PetscScalar v[],InsertMode * maddv,PetscErrorCode * _ierr)6237 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6238 {
6239 Mat mat = *mmat;
6240 PetscInt m = *mm, n = *mn;
6241 InsertMode addv = *maddv;
6242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
6243 PetscScalar value;
6244 PetscErrorCode ierr;
6245
6246 MatCheckPreallocated(mat,1);
6247 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6248 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6249 {
6250 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
6251 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6252 PetscBool roworiented = aij->roworiented;
6253
6254 /* Some Variables required in the macro */
6255 Mat A = aij->A;
6256 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
6257 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6258 MatScalar *aa = a->a;
6259 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6260 Mat B = aij->B;
6261 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
6262 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6263 MatScalar *ba = b->a;
6264 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6265 * cannot use "#if defined" inside a macro. */
6266 PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6267
6268 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6269 PetscInt nonew = a->nonew;
6270 MatScalar *ap1,*ap2;
6271
6272 PetscFunctionBegin;
6273 for (i=0; i<m; i++) {
6274 if (im[i] < 0) continue;
6275 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6276 if (im[i] >= rstart && im[i] < rend) {
6277 row = im[i] - rstart;
6278 lastcol1 = -1;
6279 rp1 = aj + ai[row];
6280 ap1 = aa + ai[row];
6281 rmax1 = aimax[row];
6282 nrow1 = ailen[row];
6283 low1 = 0;
6284 high1 = nrow1;
6285 lastcol2 = -1;
6286 rp2 = bj + bi[row];
6287 ap2 = ba + bi[row];
6288 rmax2 = bimax[row];
6289 nrow2 = bilen[row];
6290 low2 = 0;
6291 high2 = nrow2;
6292
6293 for (j=0; j<n; j++) {
6294 if (roworiented) value = v[i*n+j];
6295 else value = v[i+j*m];
6296 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6297 if (in[j] >= cstart && in[j] < cend) {
6298 col = in[j] - cstart;
6299 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6300 #if defined(PETSC_HAVE_DEVICE)
6301 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6302 #endif
6303 } else if (in[j] < 0) continue;
6304 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6305 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6306 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6307 } else {
6308 if (mat->was_assembled) {
6309 if (!aij->colmap) {
6310 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6311 }
6312 #if defined(PETSC_USE_CTABLE)
6313 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6314 col--;
6315 #else
6316 col = aij->colmap[in[j]] - 1;
6317 #endif
6318 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6319 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6320 col = in[j];
6321 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6322 B = aij->B;
6323 b = (Mat_SeqAIJ*)B->data;
6324 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6325 rp2 = bj + bi[row];
6326 ap2 = ba + bi[row];
6327 rmax2 = bimax[row];
6328 nrow2 = bilen[row];
6329 low2 = 0;
6330 high2 = nrow2;
6331 bm = aij->B->rmap->n;
6332 ba = b->a;
6333 inserted = PETSC_FALSE;
6334 }
6335 } else col = in[j];
6336 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6337 #if defined(PETSC_HAVE_DEVICE)
6338 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6339 #endif
6340 }
6341 }
6342 } else if (!aij->donotstash) {
6343 if (roworiented) {
6344 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6345 } else {
6346 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6347 }
6348 }
6349 }
6350 }
6351 PetscFunctionReturnVoid();
6352 }
6353