1
2 /*
3 Basic functions for basic parallel dense matrices.
4 */
5
6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9
10 /*@
11
12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13 matrix that represents the operator. For sequential matrices it returns itself.
14
15 Input Parameter:
16 . A - the Seq or MPI dense matrix
17
18 Output Parameter:
19 . B - the inner matrix
20
21 Level: intermediate
22
23 @*/
MatDenseGetLocalMatrix(Mat A,Mat * B)24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
27 PetscErrorCode ierr;
28 PetscBool flg;
29
30 PetscFunctionBegin;
31 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32 PetscValidPointer(B,2);
33 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34 if (flg) *B = mat->A;
35 else {
36 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37 if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38 *B = A;
39 }
40 PetscFunctionReturn(0);
41 }
42
MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45 Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
46 PetscErrorCode ierr;
47 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48
49 PetscFunctionBegin;
50 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51 lrow = row - rstart;
52 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
53 PetscFunctionReturn(0);
54 }
55
MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58 Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
59 PetscErrorCode ierr;
60 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
61
62 PetscFunctionBegin;
63 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
64 lrow = row - rstart;
65 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
66 PetscFunctionReturn(0);
67 }
68
MatGetDiagonalBlock_MPIDense(Mat A,Mat * a)69 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
72 PetscErrorCode ierr;
73 PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
74 PetscScalar *array;
75 MPI_Comm comm;
76 PetscBool flg;
77 Mat B;
78
79 PetscFunctionBegin;
80 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
81 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
83 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
84
85 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
86 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
87 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
88 ierr = MatCreate(comm,&B);CHKERRQ(ierr);
89 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
90 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
91 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
92 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
93 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
94 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
97 *a = B;
98 ierr = MatDestroy(&B);CHKERRQ(ierr);
99 } else *a = B;
100 PetscFunctionReturn(0);
101 }
102
MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
104 {
105 Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
106 PetscErrorCode ierr;
107 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108 PetscBool roworiented = A->roworiented;
109
110 PetscFunctionBegin;
111 for (i=0; i<m; i++) {
112 if (idxm[i] < 0) continue;
113 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114 if (idxm[i] >= rstart && idxm[i] < rend) {
115 row = idxm[i] - rstart;
116 if (roworiented) {
117 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
118 } else {
119 for (j=0; j<n; j++) {
120 if (idxn[j] < 0) continue;
121 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
123 }
124 }
125 } else if (!A->donotstash) {
126 mat->assembled = PETSC_FALSE;
127 if (roworiented) {
128 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
129 } else {
130 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
131 }
132 }
133 }
134 PetscFunctionReturn(0);
135 }
136
MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
140 PetscErrorCode ierr;
141 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142
143 PetscFunctionBegin;
144 for (i=0; i<m; i++) {
145 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147 if (idxm[i] >= rstart && idxm[i] < rend) {
148 row = idxm[i] - rstart;
149 for (j=0; j<n; j++) {
150 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153 }
154 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155 }
156 PetscFunctionReturn(0);
157 }
158
MatDenseGetLDA_MPIDense(Mat A,PetscInt * lda)159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
160 {
161 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
162 PetscErrorCode ierr;
163
164 PetscFunctionBegin;
165 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
166 PetscFunctionReturn(0);
167 }
168
MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
170 {
171 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
172 PetscBool iscuda;
173 PetscErrorCode ierr;
174
175 PetscFunctionBegin;
176 if (!a->A) {
177 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
179 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
180 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
181 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
182 ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
183 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
184 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
185 }
186 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
187 PetscFunctionReturn(0);
188 }
189
MatDenseGetArray_MPIDense(Mat A,PetscScalar ** array)190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
193 PetscErrorCode ierr;
194
195 PetscFunctionBegin;
196 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
197 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
198 PetscFunctionReturn(0);
199 }
200
MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar ** array)201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
202 {
203 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
204 PetscErrorCode ierr;
205
206 PetscFunctionBegin;
207 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
209 PetscFunctionReturn(0);
210 }
211
MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar ** array)212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
215 PetscErrorCode ierr;
216
217 PetscFunctionBegin;
218 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
220 PetscFunctionReturn(0);
221 }
222
MatDensePlaceArray_MPIDense(Mat A,const PetscScalar * array)223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
224 {
225 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
226 PetscErrorCode ierr;
227
228 PetscFunctionBegin;
229 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
230 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
231 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
232 PetscFunctionReturn(0);
233 }
234
MatDenseResetArray_MPIDense(Mat A)235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
236 {
237 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
238 PetscErrorCode ierr;
239
240 PetscFunctionBegin;
241 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
242 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
243 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
244 PetscFunctionReturn(0);
245 }
246
MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar * array)247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
248 {
249 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
250 PetscErrorCode ierr;
251
252 PetscFunctionBegin;
253 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
254 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
255 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
256 PetscFunctionReturn(0);
257 }
258
MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
260 {
261 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
262 PetscErrorCode ierr;
263 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
264 const PetscInt *irow,*icol;
265 const PetscScalar *v;
266 PetscScalar *bv;
267 Mat newmat;
268 IS iscol_local;
269 MPI_Comm comm_is,comm_mat;
270
271 PetscFunctionBegin;
272 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
273 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
274 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
275
276 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
277 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
278 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
279 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
280 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
281 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
282
283 /* No parallel redistribution currently supported! Should really check each index set
284 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
285 original matrix! */
286
287 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
288 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
289
290 /* Check submatrix call */
291 if (scall == MAT_REUSE_MATRIX) {
292 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
293 /* Really need to test rows and column sizes! */
294 newmat = *B;
295 } else {
296 /* Create and fill new matrix */
297 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
298 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
299 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
300 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
301 }
302
303 /* Now extract the data pointers and do the copy, column at a time */
304 newmatd = (Mat_MPIDense*)newmat->data;
305 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
306 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
307 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
308 for (i=0; i<Ncols; i++) {
309 const PetscScalar *av = v + lda*icol[i];
310 for (j=0; j<nrows; j++) {
311 *bv++ = av[irow[j] - rstart];
312 }
313 }
314 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
315 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
316
317 /* Assemble the matrices so that the correct flags are set */
318 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320
321 /* Free work space */
322 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
323 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
324 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
325 *B = newmat;
326 PetscFunctionReturn(0);
327 }
328
MatDenseRestoreArray_MPIDense(Mat A,PetscScalar ** array)329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
330 {
331 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
332 PetscErrorCode ierr;
333
334 PetscFunctionBegin;
335 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
336 PetscFunctionReturn(0);
337 }
338
MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar ** array)339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
340 {
341 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
342 PetscErrorCode ierr;
343
344 PetscFunctionBegin;
345 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
346 PetscFunctionReturn(0);
347 }
348
MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar ** array)349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
350 {
351 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
352 PetscErrorCode ierr;
353
354 PetscFunctionBegin;
355 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
356 PetscFunctionReturn(0);
357 }
358
MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
362 PetscErrorCode ierr;
363 PetscInt nstash,reallocs;
364
365 PetscFunctionBegin;
366 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
367
368 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
369 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
370 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
371 PetscFunctionReturn(0);
372 }
373
MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
375 {
376 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
377 PetscErrorCode ierr;
378 PetscInt i,*row,*col,flg,j,rstart,ncols;
379 PetscMPIInt n;
380 PetscScalar *val;
381
382 PetscFunctionBegin;
383 if (!mdn->donotstash && !mat->nooffprocentries) {
384 /* wait on receives */
385 while (1) {
386 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
387 if (!flg) break;
388
389 for (i=0; i<n;) {
390 /* Now identify the consecutive vals belonging to the same row */
391 for (j=i,rstart=row[j]; j<n; j++) {
392 if (row[j] != rstart) break;
393 }
394 if (j < n) ncols = j-i;
395 else ncols = n-i;
396 /* Now assemble all these values with a single function call */
397 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
398 i = j;
399 }
400 }
401 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
402 }
403
404 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
405 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
406
407 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
408 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
409 }
410 PetscFunctionReturn(0);
411 }
412
MatZeroEntries_MPIDense(Mat A)413 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
414 {
415 PetscErrorCode ierr;
416 Mat_MPIDense *l = (Mat_MPIDense*)A->data;
417
418 PetscFunctionBegin;
419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420 PetscFunctionReturn(0);
421 }
422
MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
424 {
425 Mat_MPIDense *l = (Mat_MPIDense*)A->data;
426 PetscErrorCode ierr;
427 PetscInt i,len,*lrows;
428
429 PetscFunctionBegin;
430 /* get locally owned rows */
431 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
432 /* fix right hand side if needed */
433 if (x && b) {
434 const PetscScalar *xx;
435 PetscScalar *bb;
436
437 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
438 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
441 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
442 }
443 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
444 if (diag != 0.0) {
445 Vec d;
446
447 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
448 ierr = VecSet(d,diag);CHKERRQ(ierr);
449 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
450 ierr = VecDestroy(&d);CHKERRQ(ierr);
451 }
452 ierr = PetscFree(lrows);CHKERRQ(ierr);
453 PetscFunctionReturn(0);
454 }
455
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460
MatMult_MPIDense(Mat mat,Vec xx,Vec yy)461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
464 PetscErrorCode ierr;
465 const PetscScalar *ax;
466 PetscScalar *ay;
467 PetscMemType axmtype,aymtype;
468
469 PetscFunctionBegin;
470 ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr);
471 ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
472 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
473 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
474 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
475 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
476 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
477 PetscFunctionReturn(0);
478 }
479
MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
483 PetscErrorCode ierr;
484 const PetscScalar *ax;
485 PetscScalar *ay;
486 PetscMemType axmtype,aymtype;
487
488 PetscFunctionBegin;
489 ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr);
490 ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
491 ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
492 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
493 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
494 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
495 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
496 PetscFunctionReturn(0);
497 }
498
MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
502 PetscErrorCode ierr;
503 const PetscScalar *ax;
504 PetscScalar *ay;
505 PetscMemType axmtype,aymtype;
506
507 PetscFunctionBegin;
508 ierr = VecSet(yy,0.0);CHKERRQ(ierr);
509 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
510 ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
511 ierr = VecGetArrayInPlace_Internal(yy,&ay,&aymtype);CHKERRQ(ierr);
512 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
513 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
515 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
516 PetscFunctionReturn(0);
517 }
518
MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
522 PetscErrorCode ierr;
523 const PetscScalar *ax;
524 PetscScalar *ay;
525 PetscMemType axmtype,aymtype;
526
527 PetscFunctionBegin;
528 ierr = VecCopy(yy,zz);CHKERRQ(ierr);
529 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
530 ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
531 ierr = VecGetArrayInPlace_Internal(zz,&ay,&aymtype);CHKERRQ(ierr);
532 ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
533 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
534 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
535 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
536 PetscFunctionReturn(0);
537 }
538
MatGetDiagonal_MPIDense(Mat A,Vec v)539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
542 PetscErrorCode ierr;
543 PetscInt lda,len,i,n,m = A->rmap->n,radd;
544 PetscScalar *x,zero = 0.0;
545 const PetscScalar *av;
546
547 PetscFunctionBegin;
548 ierr = VecSet(v,zero);CHKERRQ(ierr);
549 ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550 ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552 len = PetscMin(a->A->rmap->n,a->A->cmap->n);
553 radd = A->rmap->rstart*m;
554 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
555 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
556 for (i=0; i<len; i++) {
557 x[i] = av[radd + i*lda + i];
558 }
559 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
560 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
561 PetscFunctionReturn(0);
562 }
563
MatDestroy_MPIDense(Mat mat)564 PetscErrorCode MatDestroy_MPIDense(Mat mat)
565 {
566 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
567 PetscErrorCode ierr;
568
569 PetscFunctionBegin;
570 #if defined(PETSC_USE_LOG)
571 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
572 #endif
573 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
574 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
575 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
576 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
577 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
578 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
579 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
580 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
581
582 ierr = PetscFree(mat->data);CHKERRQ(ierr);
583 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
584
585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
596 #if defined(PETSC_HAVE_ELEMENTAL)
597 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
601 #endif
602 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
603 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
605 #if defined (PETSC_HAVE_CUDA)
606 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
607 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
608 #endif
609 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
610 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
611 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
612 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
613 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
614 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
615 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
616 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
617 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
618 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
619 PetscFunctionReturn(0);
620 }
621
622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623
624 #include <petscdraw.h>
MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
626 {
627 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
628 PetscErrorCode ierr;
629 PetscMPIInt rank;
630 PetscViewerType vtype;
631 PetscBool iascii,isdraw;
632 PetscViewer sviewer;
633 PetscViewerFormat format;
634
635 PetscFunctionBegin;
636 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
637 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
638 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
639 if (iascii) {
640 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
641 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
642 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
643 MatInfo info;
644 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
645 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
646 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
647 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
648 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
649 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
650 PetscFunctionReturn(0);
651 } else if (format == PETSC_VIEWER_ASCII_INFO) {
652 PetscFunctionReturn(0);
653 }
654 } else if (isdraw) {
655 PetscDraw draw;
656 PetscBool isnull;
657
658 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
659 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
660 if (isnull) PetscFunctionReturn(0);
661 }
662
663 {
664 /* assemble the entire matrix onto first processor. */
665 Mat A;
666 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
667 PetscInt *cols;
668 PetscScalar *vals;
669
670 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
671 if (!rank) {
672 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
673 } else {
674 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
675 }
676 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
677 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
678 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
679 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
680
681 /* Copy the matrix ... This isn't the most efficient means,
682 but it's quick for now */
683 A->insertmode = INSERT_VALUES;
684
685 row = mat->rmap->rstart;
686 m = mdn->A->rmap->n;
687 for (i=0; i<m; i++) {
688 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
689 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
690 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
691 row++;
692 }
693
694 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
696 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
697 if (!rank) {
698 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
699 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
700 }
701 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
702 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
703 ierr = MatDestroy(&A);CHKERRQ(ierr);
704 }
705 PetscFunctionReturn(0);
706 }
707
MatView_MPIDense(Mat mat,PetscViewer viewer)708 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
709 {
710 PetscErrorCode ierr;
711 PetscBool iascii,isbinary,isdraw,issocket;
712
713 PetscFunctionBegin;
714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
715 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
716 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
717 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
718
719 if (iascii || issocket || isdraw) {
720 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
721 } else if (isbinary) {
722 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
723 }
724 PetscFunctionReturn(0);
725 }
726
MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo * info)727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
728 {
729 Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
730 Mat mdn = mat->A;
731 PetscErrorCode ierr;
732 PetscLogDouble isend[5],irecv[5];
733
734 PetscFunctionBegin;
735 info->block_size = 1.0;
736
737 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
738
739 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
740 isend[3] = info->memory; isend[4] = info->mallocs;
741 if (flag == MAT_LOCAL) {
742 info->nz_used = isend[0];
743 info->nz_allocated = isend[1];
744 info->nz_unneeded = isend[2];
745 info->memory = isend[3];
746 info->mallocs = isend[4];
747 } else if (flag == MAT_GLOBAL_MAX) {
748 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
749
750 info->nz_used = irecv[0];
751 info->nz_allocated = irecv[1];
752 info->nz_unneeded = irecv[2];
753 info->memory = irecv[3];
754 info->mallocs = irecv[4];
755 } else if (flag == MAT_GLOBAL_SUM) {
756 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
757
758 info->nz_used = irecv[0];
759 info->nz_allocated = irecv[1];
760 info->nz_unneeded = irecv[2];
761 info->memory = irecv[3];
762 info->mallocs = irecv[4];
763 }
764 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
765 info->fill_ratio_needed = 0;
766 info->factor_mallocs = 0;
767 PetscFunctionReturn(0);
768 }
769
MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)770 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
771 {
772 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
773 PetscErrorCode ierr;
774
775 PetscFunctionBegin;
776 switch (op) {
777 case MAT_NEW_NONZERO_LOCATIONS:
778 case MAT_NEW_NONZERO_LOCATION_ERR:
779 case MAT_NEW_NONZERO_ALLOCATION_ERR:
780 MatCheckPreallocated(A,1);
781 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
782 break;
783 case MAT_ROW_ORIENTED:
784 MatCheckPreallocated(A,1);
785 a->roworiented = flg;
786 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
787 break;
788 case MAT_NEW_DIAGONALS:
789 case MAT_KEEP_NONZERO_PATTERN:
790 case MAT_USE_HASH_TABLE:
791 case MAT_SORTED_FULL:
792 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
793 break;
794 case MAT_IGNORE_OFF_PROC_ENTRIES:
795 a->donotstash = flg;
796 break;
797 case MAT_SYMMETRIC:
798 case MAT_STRUCTURALLY_SYMMETRIC:
799 case MAT_HERMITIAN:
800 case MAT_SYMMETRY_ETERNAL:
801 case MAT_IGNORE_LOWER_TRIANGULAR:
802 case MAT_IGNORE_ZERO_ENTRIES:
803 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
804 break;
805 default:
806 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
807 }
808 PetscFunctionReturn(0);
809 }
810
MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)811 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
812 {
813 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
814 const PetscScalar *l;
815 PetscScalar x,*v,*vv,*r;
816 PetscErrorCode ierr;
817 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
818
819 PetscFunctionBegin;
820 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
821 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
822 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
823 if (ll) {
824 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
825 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
826 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
827 for (i=0; i<m; i++) {
828 x = l[i];
829 v = vv + i;
830 for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
831 }
832 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
833 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
834 }
835 if (rr) {
836 const PetscScalar *ar;
837
838 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
839 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
840 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
841 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
842 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
843 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
844 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
845 for (i=0; i<n; i++) {
846 x = r[i];
847 v = vv + i*lda;
848 for (j=0; j<m; j++) (*v++) *= x;
849 }
850 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
851 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
852 }
853 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
854 PetscFunctionReturn(0);
855 }
856
MatNorm_MPIDense(Mat A,NormType type,PetscReal * nrm)857 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
858 {
859 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
860 PetscErrorCode ierr;
861 PetscInt i,j;
862 PetscMPIInt size;
863 PetscReal sum = 0.0;
864 const PetscScalar *av,*v;
865
866 PetscFunctionBegin;
867 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
868 v = av;
869 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
870 if (size == 1) {
871 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
872 } else {
873 if (type == NORM_FROBENIUS) {
874 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
875 sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
876 }
877 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
878 *nrm = PetscSqrtReal(*nrm);
879 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
880 } else if (type == NORM_1) {
881 PetscReal *tmp,*tmp2;
882 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
883 *nrm = 0.0;
884 v = av;
885 for (j=0; j<mdn->A->cmap->n; j++) {
886 for (i=0; i<mdn->A->rmap->n; i++) {
887 tmp[j] += PetscAbsScalar(*v); v++;
888 }
889 }
890 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
891 for (j=0; j<A->cmap->N; j++) {
892 if (tmp2[j] > *nrm) *nrm = tmp2[j];
893 }
894 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
895 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
896 } else if (type == NORM_INFINITY) { /* max row norm */
897 PetscReal ntemp;
898 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
899 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
900 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
901 }
902 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
903 PetscFunctionReturn(0);
904 }
905
MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat * matout)906 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
907 {
908 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
909 Mat B;
910 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
911 PetscErrorCode ierr;
912 PetscInt j,i,lda;
913 PetscScalar *v;
914
915 PetscFunctionBegin;
916 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
917 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
918 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
919 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
920 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
921 } else B = *matout;
922
923 m = a->A->rmap->n; n = a->A->cmap->n;
924 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
925 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
926 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
927 for (i=0; i<m; i++) rwork[i] = rstart + i;
928 for (j=0; j<n; j++) {
929 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
930 v += lda;
931 }
932 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
933 ierr = PetscFree(rwork);CHKERRQ(ierr);
934 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
935 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
936 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
937 *matout = B;
938 } else {
939 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
940 }
941 PetscFunctionReturn(0);
942 }
943
944 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
945 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
946
MatSetUp_MPIDense(Mat A)947 PetscErrorCode MatSetUp_MPIDense(Mat A)
948 {
949 PetscErrorCode ierr;
950
951 PetscFunctionBegin;
952 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
953 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
954 if (!A->preallocated) {
955 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
956 }
957 PetscFunctionReturn(0);
958 }
959
MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)960 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
961 {
962 PetscErrorCode ierr;
963 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
964
965 PetscFunctionBegin;
966 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
967 PetscFunctionReturn(0);
968 }
969
MatConjugate_MPIDense(Mat mat)970 PetscErrorCode MatConjugate_MPIDense(Mat mat)
971 {
972 Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
973 PetscErrorCode ierr;
974
975 PetscFunctionBegin;
976 ierr = MatConjugate(a->A);CHKERRQ(ierr);
977 PetscFunctionReturn(0);
978 }
979
MatRealPart_MPIDense(Mat A)980 PetscErrorCode MatRealPart_MPIDense(Mat A)
981 {
982 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
983 PetscErrorCode ierr;
984
985 PetscFunctionBegin;
986 ierr = MatRealPart(a->A);CHKERRQ(ierr);
987 PetscFunctionReturn(0);
988 }
989
MatImaginaryPart_MPIDense(Mat A)990 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
991 {
992 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
993 PetscErrorCode ierr;
994
995 PetscFunctionBegin;
996 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
997 PetscFunctionReturn(0);
998 }
999
MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)1000 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1001 {
1002 PetscErrorCode ierr;
1003 Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1004
1005 PetscFunctionBegin;
1006 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1007 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1008 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1009 PetscFunctionReturn(0);
1010 }
1011
1012 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1013
MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal * norms)1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1015 {
1016 PetscErrorCode ierr;
1017 PetscInt i,n;
1018 Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1019 PetscReal *work;
1020
1021 PetscFunctionBegin;
1022 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1023 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1024 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1025 if (type == NORM_2) {
1026 for (i=0; i<n; i++) work[i] *= work[i];
1027 }
1028 if (type == NORM_INFINITY) {
1029 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1030 } else {
1031 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1032 }
1033 ierr = PetscFree(work);CHKERRQ(ierr);
1034 if (type == NORM_2) {
1035 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1036 }
1037 PetscFunctionReturn(0);
1038 }
1039
1040 #if defined(PETSC_HAVE_CUDA)
MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1041 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1042 {
1043 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1044 PetscErrorCode ierr;
1045 PetscInt lda;
1046
1047 PetscFunctionBegin;
1048 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1049 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1050 if (!a->cvec) {
1051 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1052 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1053 }
1054 a->vecinuse = col + 1;
1055 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1056 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1057 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1058 *v = a->cvec;
1059 PetscFunctionReturn(0);
1060 }
1061
MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1062 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1063 {
1064 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1065 PetscErrorCode ierr;
1066
1067 PetscFunctionBegin;
1068 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1069 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1070 a->vecinuse = 0;
1071 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1072 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1073 *v = NULL;
1074 PetscFunctionReturn(0);
1075 }
1076
MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1077 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1078 {
1079 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1080 PetscInt lda;
1081 PetscErrorCode ierr;
1082
1083 PetscFunctionBegin;
1084 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1085 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1086 if (!a->cvec) {
1087 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1088 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1089 }
1090 a->vecinuse = col + 1;
1091 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1092 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1093 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1094 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1095 *v = a->cvec;
1096 PetscFunctionReturn(0);
1097 }
1098
MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1099 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1100 {
1101 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1102 PetscErrorCode ierr;
1103
1104 PetscFunctionBegin;
1105 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1106 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1107 a->vecinuse = 0;
1108 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1109 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1110 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1111 *v = NULL;
1112 PetscFunctionReturn(0);
1113 }
1114
MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1115 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1116 {
1117 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1118 PetscErrorCode ierr;
1119 PetscInt lda;
1120
1121 PetscFunctionBegin;
1122 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1123 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1124 if (!a->cvec) {
1125 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1126 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1127 }
1128 a->vecinuse = col + 1;
1129 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1130 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1131 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1132 *v = a->cvec;
1133 PetscFunctionReturn(0);
1134 }
1135
MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1136 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1139 PetscErrorCode ierr;
1140
1141 PetscFunctionBegin;
1142 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1143 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1144 a->vecinuse = 0;
1145 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1146 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1147 *v = NULL;
1148 PetscFunctionReturn(0);
1149 }
1150
MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A,const PetscScalar * a)1151 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1152 {
1153 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1154 PetscErrorCode ierr;
1155
1156 PetscFunctionBegin;
1157 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1158 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1159 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1160 PetscFunctionReturn(0);
1161 }
1162
MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)1163 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1164 {
1165 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1166 PetscErrorCode ierr;
1167
1168 PetscFunctionBegin;
1169 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1170 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1171 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1172 PetscFunctionReturn(0);
1173 }
1174
MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A,const PetscScalar * a)1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1176 {
1177 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1178 PetscErrorCode ierr;
1179
1180 PetscFunctionBegin;
1181 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1182 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1183 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1184 PetscFunctionReturn(0);
1185 }
1186
MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A,PetscScalar ** a)1187 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1188 {
1189 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1190 PetscErrorCode ierr;
1191
1192 PetscFunctionBegin;
1193 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1194 PetscFunctionReturn(0);
1195 }
1196
MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A,PetscScalar ** a)1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1198 {
1199 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1200 PetscErrorCode ierr;
1201
1202 PetscFunctionBegin;
1203 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1204 PetscFunctionReturn(0);
1205 }
1206
MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A,const PetscScalar ** a)1207 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1208 {
1209 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1210 PetscErrorCode ierr;
1211
1212 PetscFunctionBegin;
1213 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1214 PetscFunctionReturn(0);
1215 }
1216
MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A,const PetscScalar ** a)1217 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1218 {
1219 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1220 PetscErrorCode ierr;
1221
1222 PetscFunctionBegin;
1223 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1224 PetscFunctionReturn(0);
1225 }
1226
MatDenseCUDAGetArray_MPIDenseCUDA(Mat A,PetscScalar ** a)1227 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1228 {
1229 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1230 PetscErrorCode ierr;
1231
1232 PetscFunctionBegin;
1233 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1234 PetscFunctionReturn(0);
1235 }
1236
MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A,PetscScalar ** a)1237 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1238 {
1239 Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1240 PetscErrorCode ierr;
1241
1242 PetscFunctionBegin;
1243 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1244 PetscFunctionReturn(0);
1245 }
1246
1247 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1249 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1250 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1251 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1252 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1253 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1254
MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)1255 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1256 {
1257 Mat_MPIDense *d = (Mat_MPIDense*)mat->data;
1258 PetscErrorCode ierr;
1259
1260 PetscFunctionBegin;
1261 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1262 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1263 if (d->A) {
1264 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1265 }
1266 mat->boundtocpu = bind;
1267 if (!bind) {
1268 PetscBool iscuda;
1269
1270 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1271 if (!iscuda) {
1272 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1273 }
1274 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1275 if (!iscuda) {
1276 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1277 }
1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1284 } else {
1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1291 }
1292 if (d->cmat) {
1293 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1294 }
1295 PetscFunctionReturn(0);
1296 }
1297
MatMPIDenseCUDASetPreallocation(Mat A,PetscScalar * d_data)1298 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1299 {
1300 Mat_MPIDense *d = (Mat_MPIDense*)A->data;
1301 PetscErrorCode ierr;
1302 PetscBool iscuda;
1303
1304 PetscFunctionBegin;
1305 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1306 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1307 if (!iscuda) PetscFunctionReturn(0);
1308 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1309 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1310 if (!d->A) {
1311 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1312 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1313 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1314 }
1315 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1316 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1317 A->preallocated = PETSC_TRUE;
1318 PetscFunctionReturn(0);
1319 }
1320 #endif
1321
MatSetRandom_MPIDense(Mat x,PetscRandom rctx)1322 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1323 {
1324 Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1325 PetscErrorCode ierr;
1326
1327 PetscFunctionBegin;
1328 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1329 PetscFunctionReturn(0);
1330 }
1331
MatMissingDiagonal_MPIDense(Mat A,PetscBool * missing,PetscInt * d)1332 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1333 {
1334 PetscFunctionBegin;
1335 *missing = PETSC_FALSE;
1336 PetscFunctionReturn(0);
1337 }
1338
1339 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1340 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1341 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1342 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1343 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1344 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1345
1346 /* -------------------------------------------------------------------*/
1347 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1348 MatGetRow_MPIDense,
1349 MatRestoreRow_MPIDense,
1350 MatMult_MPIDense,
1351 /* 4*/ MatMultAdd_MPIDense,
1352 MatMultTranspose_MPIDense,
1353 MatMultTransposeAdd_MPIDense,
1354 NULL,
1355 NULL,
1356 NULL,
1357 /* 10*/ NULL,
1358 NULL,
1359 NULL,
1360 NULL,
1361 MatTranspose_MPIDense,
1362 /* 15*/ MatGetInfo_MPIDense,
1363 MatEqual_MPIDense,
1364 MatGetDiagonal_MPIDense,
1365 MatDiagonalScale_MPIDense,
1366 MatNorm_MPIDense,
1367 /* 20*/ MatAssemblyBegin_MPIDense,
1368 MatAssemblyEnd_MPIDense,
1369 MatSetOption_MPIDense,
1370 MatZeroEntries_MPIDense,
1371 /* 24*/ MatZeroRows_MPIDense,
1372 NULL,
1373 NULL,
1374 NULL,
1375 NULL,
1376 /* 29*/ MatSetUp_MPIDense,
1377 NULL,
1378 NULL,
1379 MatGetDiagonalBlock_MPIDense,
1380 NULL,
1381 /* 34*/ MatDuplicate_MPIDense,
1382 NULL,
1383 NULL,
1384 NULL,
1385 NULL,
1386 /* 39*/ MatAXPY_MPIDense,
1387 MatCreateSubMatrices_MPIDense,
1388 NULL,
1389 MatGetValues_MPIDense,
1390 NULL,
1391 /* 44*/ NULL,
1392 MatScale_MPIDense,
1393 MatShift_Basic,
1394 NULL,
1395 NULL,
1396 /* 49*/ MatSetRandom_MPIDense,
1397 NULL,
1398 NULL,
1399 NULL,
1400 NULL,
1401 /* 54*/ NULL,
1402 NULL,
1403 NULL,
1404 NULL,
1405 NULL,
1406 /* 59*/ MatCreateSubMatrix_MPIDense,
1407 MatDestroy_MPIDense,
1408 MatView_MPIDense,
1409 NULL,
1410 NULL,
1411 /* 64*/ NULL,
1412 NULL,
1413 NULL,
1414 NULL,
1415 NULL,
1416 /* 69*/ NULL,
1417 NULL,
1418 NULL,
1419 NULL,
1420 NULL,
1421 /* 74*/ NULL,
1422 NULL,
1423 NULL,
1424 NULL,
1425 NULL,
1426 /* 79*/ NULL,
1427 NULL,
1428 NULL,
1429 NULL,
1430 /* 83*/ MatLoad_MPIDense,
1431 NULL,
1432 NULL,
1433 NULL,
1434 NULL,
1435 NULL,
1436 /* 89*/ NULL,
1437 NULL,
1438 NULL,
1439 NULL,
1440 NULL,
1441 /* 94*/ NULL,
1442 NULL,
1443 MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1444 MatMatTransposeMultNumeric_MPIDense_MPIDense,
1445 NULL,
1446 /* 99*/ MatProductSetFromOptions_MPIDense,
1447 NULL,
1448 NULL,
1449 MatConjugate_MPIDense,
1450 NULL,
1451 /*104*/ NULL,
1452 MatRealPart_MPIDense,
1453 MatImaginaryPart_MPIDense,
1454 NULL,
1455 NULL,
1456 /*109*/ NULL,
1457 NULL,
1458 NULL,
1459 MatGetColumnVector_MPIDense,
1460 MatMissingDiagonal_MPIDense,
1461 /*114*/ NULL,
1462 NULL,
1463 NULL,
1464 NULL,
1465 NULL,
1466 /*119*/ NULL,
1467 NULL,
1468 NULL,
1469 NULL,
1470 NULL,
1471 /*124*/ NULL,
1472 MatGetColumnNorms_MPIDense,
1473 NULL,
1474 NULL,
1475 NULL,
1476 /*129*/ NULL,
1477 NULL,
1478 MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1479 MatTransposeMatMultNumeric_MPIDense_MPIDense,
1480 NULL,
1481 /*134*/ NULL,
1482 NULL,
1483 NULL,
1484 NULL,
1485 NULL,
1486 /*139*/ NULL,
1487 NULL,
1488 NULL,
1489 NULL,
1490 NULL,
1491 MatCreateMPIMatConcatenateSeqMat_MPIDense,
1492 /*145*/ NULL,
1493 NULL,
1494 NULL
1495 };
1496
MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar * data)1497 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1498 {
1499 Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1500 PetscBool iscuda;
1501 PetscErrorCode ierr;
1502
1503 PetscFunctionBegin;
1504 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1505 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1506 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1507 if (!a->A) {
1508 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1509 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1510 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1511 }
1512 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1513 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1514 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1515 mat->preallocated = PETSC_TRUE;
1516 PetscFunctionReturn(0);
1517 }
1518
1519 #if defined(PETSC_HAVE_ELEMENTAL)
MatConvert_MPIDense_Elemental(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1520 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1521 {
1522 Mat mat_elemental;
1523 PetscErrorCode ierr;
1524 PetscScalar *v;
1525 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1526
1527 PetscFunctionBegin;
1528 if (reuse == MAT_REUSE_MATRIX) {
1529 mat_elemental = *newmat;
1530 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1531 } else {
1532 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1533 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1534 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1535 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1536 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1537 }
1538
1539 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1540 for (i=0; i<N; i++) cols[i] = i;
1541 for (i=0; i<m; i++) rows[i] = rstart + i;
1542
1543 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1544 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1545 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1546 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1549 ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1550
1551 if (reuse == MAT_INPLACE_MATRIX) {
1552 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1553 } else {
1554 *newmat = mat_elemental;
1555 }
1556 PetscFunctionReturn(0);
1557 }
1558 #endif
1559
MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar ** vals)1560 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1561 {
1562 Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1563 PetscErrorCode ierr;
1564
1565 PetscFunctionBegin;
1566 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1567 PetscFunctionReturn(0);
1568 }
1569
MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar ** vals)1570 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1571 {
1572 Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1573 PetscErrorCode ierr;
1574
1575 PetscFunctionBegin;
1576 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1577 PetscFunctionReturn(0);
1578 }
1579
MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)1580 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1581 {
1582 PetscErrorCode ierr;
1583 Mat_MPIDense *mat;
1584 PetscInt m,nloc,N;
1585
1586 PetscFunctionBegin;
1587 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1588 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1589 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1590 PetscInt sum;
1591
1592 if (n == PETSC_DECIDE) {
1593 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1594 }
1595 /* Check sum(n) = N */
1596 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1597 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1598
1599 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1600 }
1601
1602 /* numeric phase */
1603 mat = (Mat_MPIDense*)(*outmat)->data;
1604 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1605 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607 PetscFunctionReturn(0);
1608 }
1609
1610 #if defined(PETSC_HAVE_CUDA)
MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat * newmat)1611 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1612 {
1613 Mat B;
1614 Mat_MPIDense *m;
1615 PetscErrorCode ierr;
1616
1617 PetscFunctionBegin;
1618 if (reuse == MAT_INITIAL_MATRIX) {
1619 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1620 } else if (reuse == MAT_REUSE_MATRIX) {
1621 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1622 }
1623
1624 B = *newmat;
1625 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1626 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1627 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1628 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1629 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1630 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1631 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1632 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1633 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1634 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1635 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1643 m = (Mat_MPIDense*)(B)->data;
1644 if (m->A) {
1645 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1646 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1647 }
1648 B->ops->bindtocpu = NULL;
1649 B->offloadmask = PETSC_OFFLOAD_CPU;
1650 PetscFunctionReturn(0);
1651 }
1652
MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat * newmat)1653 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1654 {
1655 Mat B;
1656 Mat_MPIDense *m;
1657 PetscErrorCode ierr;
1658
1659 PetscFunctionBegin;
1660 if (reuse == MAT_INITIAL_MATRIX) {
1661 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1662 } else if (reuse == MAT_REUSE_MATRIX) {
1663 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1664 }
1665
1666 B = *newmat;
1667 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1668 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1669 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1672 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1674 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1684 m = (Mat_MPIDense*)(B)->data;
1685 if (m->A) {
1686 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1687 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1688 B->offloadmask = PETSC_OFFLOAD_BOTH;
1689 } else {
1690 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1691 }
1692 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1693
1694 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1695 PetscFunctionReturn(0);
1696 }
1697 #endif
1698
MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1699 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1700 {
1701 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1702 PetscErrorCode ierr;
1703 PetscInt lda;
1704
1705 PetscFunctionBegin;
1706 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1707 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1708 if (!a->cvec) {
1709 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1710 }
1711 a->vecinuse = col + 1;
1712 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1713 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1714 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1715 *v = a->cvec;
1716 PetscFunctionReturn(0);
1717 }
1718
MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1719 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1720 {
1721 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1722 PetscErrorCode ierr;
1723
1724 PetscFunctionBegin;
1725 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1726 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1727 a->vecinuse = 0;
1728 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1729 ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1730 *v = NULL;
1731 PetscFunctionReturn(0);
1732 }
1733
MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1734 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1735 {
1736 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1737 PetscErrorCode ierr;
1738 PetscInt lda;
1739
1740 PetscFunctionBegin;
1741 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1742 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1743 if (!a->cvec) {
1744 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1745 }
1746 a->vecinuse = col + 1;
1747 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1748 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1749 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1750 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1751 *v = a->cvec;
1752 PetscFunctionReturn(0);
1753 }
1754
MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1755 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1756 {
1757 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1758 PetscErrorCode ierr;
1759
1760 PetscFunctionBegin;
1761 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1762 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1763 a->vecinuse = 0;
1764 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1765 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1766 ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1767 *v = NULL;
1768 PetscFunctionReturn(0);
1769 }
1770
MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1771 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1772 {
1773 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1774 PetscErrorCode ierr;
1775 PetscInt lda;
1776
1777 PetscFunctionBegin;
1778 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1779 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1780 if (!a->cvec) {
1781 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1782 }
1783 a->vecinuse = col + 1;
1784 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1785 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1786 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1787 *v = a->cvec;
1788 PetscFunctionReturn(0);
1789 }
1790
MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1792 {
1793 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1794 PetscErrorCode ierr;
1795
1796 PetscFunctionBegin;
1797 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1798 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1799 a->vecinuse = 0;
1800 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1801 ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1802 *v = NULL;
1803 PetscFunctionReturn(0);
1804 }
1805
MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat * v)1806 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1807 {
1808 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1809 Mat_MPIDense *c;
1810 PetscErrorCode ierr;
1811 MPI_Comm comm;
1812 PetscBool setup = PETSC_FALSE;
1813
1814 PetscFunctionBegin;
1815 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1816 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1817 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1818 if (!a->cmat) {
1819 setup = PETSC_TRUE;
1820 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1821 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1822 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1823 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1824 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1825 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1826 } else if (cend-cbegin != a->cmat->cmap->N) {
1827 setup = PETSC_TRUE;
1828 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1829 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1830 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1831 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1832 }
1833 c = (Mat_MPIDense*)a->cmat->data;
1834 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1835 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1836 if (setup) { /* do we really need this? */
1837 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1838 }
1839 a->cmat->preallocated = PETSC_TRUE;
1840 a->cmat->assembled = PETSC_TRUE;
1841 a->matinuse = cbegin + 1;
1842 *v = a->cmat;
1843 PetscFunctionReturn(0);
1844 }
1845
MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat * v)1846 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1847 {
1848 Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1849 Mat_MPIDense *c;
1850 PetscErrorCode ierr;
1851
1852 PetscFunctionBegin;
1853 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1854 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1855 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1856 a->matinuse = 0;
1857 c = (Mat_MPIDense*)a->cmat->data;
1858 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1859 *v = NULL;
1860 PetscFunctionReturn(0);
1861 }
1862
MatCreate_MPIDense(Mat mat)1863 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1864 {
1865 Mat_MPIDense *a;
1866 PetscErrorCode ierr;
1867
1868 PetscFunctionBegin;
1869 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr);
1870 mat->data = (void*)a;
1871 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1872
1873 mat->insertmode = NOT_SET_VALUES;
1874
1875 /* build cache for off array entries formed */
1876 a->donotstash = PETSC_FALSE;
1877
1878 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1879
1880 /* stuff used for matrix vector multiply */
1881 a->lvec = NULL;
1882 a->Mvctx = NULL;
1883 a->roworiented = PETSC_TRUE;
1884
1885 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1887 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1891 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1898 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1899 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1901 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1904 #if defined(PETSC_HAVE_ELEMENTAL)
1905 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1906 #endif
1907 #if defined(PETSC_HAVE_SCALAPACK)
1908 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1909 #endif
1910 #if defined(PETSC_HAVE_CUDA)
1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1912 #endif
1913 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1914 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1915 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1916 #if defined(PETSC_HAVE_CUDA)
1917 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1918 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1919 #endif
1920
1921 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1922 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1923 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1924 PetscFunctionReturn(0);
1925 }
1926
1927 /*MC
1928 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1929
1930 Options Database Keys:
1931 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1932
1933 Level: beginner
1934
1935 .seealso:
1936
1937 M*/
1938 #if defined(PETSC_HAVE_CUDA)
MatCreate_MPIDenseCUDA(Mat B)1939 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1940 {
1941 PetscErrorCode ierr;
1942
1943 PetscFunctionBegin;
1944 ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1945 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1946 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1947 PetscFunctionReturn(0);
1948 }
1949 #endif
1950
1951 /*MC
1952 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1953
1954 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1955 and MATMPIDENSE otherwise.
1956
1957 Options Database Keys:
1958 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1959
1960 Level: beginner
1961
1962
1963 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1964 M*/
1965
1966 /*MC
1967 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1968
1969 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1970 and MATMPIDENSECUDA otherwise.
1971
1972 Options Database Keys:
1973 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1974
1975 Level: beginner
1976
1977 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1978 M*/
1979
1980 /*@C
1981 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1982
1983 Collective
1984
1985 Input Parameters:
1986 . B - the matrix
1987 - data - optional location of matrix data. Set data=NULL for PETSc
1988 to control all matrix memory allocation.
1989
1990 Notes:
1991 The dense format is fully compatible with standard Fortran 77
1992 storage by columns.
1993
1994 The data input variable is intended primarily for Fortran programmers
1995 who wish to allocate their own matrix memory space. Most users should
1996 set data=NULL.
1997
1998 Level: intermediate
1999
2000 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2001 @*/
MatMPIDenseSetPreallocation(Mat B,PetscScalar * data)2002 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2003 {
2004 PetscErrorCode ierr;
2005
2006 PetscFunctionBegin;
2007 PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2008 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2009 PetscFunctionReturn(0);
2010 }
2011
2012 /*@
2013 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2014 array provided by the user. This is useful to avoid copying an array
2015 into a matrix
2016
2017 Not Collective
2018
2019 Input Parameters:
2020 + mat - the matrix
2021 - array - the array in column major order
2022
2023 Notes:
2024 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2025 freed when the matrix is destroyed.
2026
2027 Level: developer
2028
2029 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2030
2031 @*/
MatDensePlaceArray(Mat mat,const PetscScalar * array)2032 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array)
2033 {
2034 PetscErrorCode ierr;
2035
2036 PetscFunctionBegin;
2037 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2038 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2039 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2040 #if defined(PETSC_HAVE_CUDA)
2041 mat->offloadmask = PETSC_OFFLOAD_CPU;
2042 #endif
2043 PetscFunctionReturn(0);
2044 }
2045
2046 /*@
2047 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2048
2049 Not Collective
2050
2051 Input Parameters:
2052 . mat - the matrix
2053
2054 Notes:
2055 You can only call this after a call to MatDensePlaceArray()
2056
2057 Level: developer
2058
2059 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2060
2061 @*/
MatDenseResetArray(Mat mat)2062 PetscErrorCode MatDenseResetArray(Mat mat)
2063 {
2064 PetscErrorCode ierr;
2065
2066 PetscFunctionBegin;
2067 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2068 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2069 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2070 PetscFunctionReturn(0);
2071 }
2072
2073 /*@
2074 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2075 array provided by the user. This is useful to avoid copying an array
2076 into a matrix
2077
2078 Not Collective
2079
2080 Input Parameters:
2081 + mat - the matrix
2082 - array - the array in column major order
2083
2084 Notes:
2085 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2086 freed by the user. It will be freed when the matrix is destroyed.
2087
2088 Level: developer
2089
2090 .seealso: MatDenseGetArray(), VecReplaceArray()
2091 @*/
MatDenseReplaceArray(Mat mat,const PetscScalar * array)2092 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2093 {
2094 PetscErrorCode ierr;
2095
2096 PetscFunctionBegin;
2097 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2098 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2099 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2100 #if defined(PETSC_HAVE_CUDA)
2101 mat->offloadmask = PETSC_OFFLOAD_CPU;
2102 #endif
2103 PetscFunctionReturn(0);
2104 }
2105
2106 #if defined(PETSC_HAVE_CUDA)
2107 /*@C
2108 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2109 array provided by the user. This is useful to avoid copying an array
2110 into a matrix
2111
2112 Not Collective
2113
2114 Input Parameters:
2115 + mat - the matrix
2116 - array - the array in column major order
2117
2118 Notes:
2119 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2120 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2121
2122 Level: developer
2123
2124 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2125 @*/
MatDenseCUDAPlaceArray(Mat mat,const PetscScalar * array)2126 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2127 {
2128 PetscErrorCode ierr;
2129
2130 PetscFunctionBegin;
2131 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2132 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2133 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2134 mat->offloadmask = PETSC_OFFLOAD_GPU;
2135 PetscFunctionReturn(0);
2136 }
2137
2138 /*@C
2139 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2140
2141 Not Collective
2142
2143 Input Parameters:
2144 . mat - the matrix
2145
2146 Notes:
2147 You can only call this after a call to MatDenseCUDAPlaceArray()
2148
2149 Level: developer
2150
2151 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2152
2153 @*/
MatDenseCUDAResetArray(Mat mat)2154 PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2155 {
2156 PetscErrorCode ierr;
2157
2158 PetscFunctionBegin;
2159 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2160 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2161 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2162 PetscFunctionReturn(0);
2163 }
2164
2165 /*@C
2166 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2167 array provided by the user. This is useful to avoid copying an array
2168 into a matrix
2169
2170 Not Collective
2171
2172 Input Parameters:
2173 + mat - the matrix
2174 - array - the array in column major order
2175
2176 Notes:
2177 This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2178 The memory passed in CANNOT be freed by the user. It will be freed
2179 when the matrix is destroyed. The array should respect the matrix leading dimension.
2180
2181 Level: developer
2182
2183 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2184 @*/
MatDenseCUDAReplaceArray(Mat mat,const PetscScalar * array)2185 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2186 {
2187 PetscErrorCode ierr;
2188
2189 PetscFunctionBegin;
2190 PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2191 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2192 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2193 mat->offloadmask = PETSC_OFFLOAD_GPU;
2194 PetscFunctionReturn(0);
2195 }
2196
2197 /*@C
2198 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2199
2200 Not Collective
2201
2202 Input Parameters:
2203 . A - the matrix
2204
2205 Output Parameters
2206 . array - the GPU array in column major order
2207
2208 Notes:
2209 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2210
2211 Level: developer
2212
2213 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2214 @*/
MatDenseCUDAGetArrayWrite(Mat A,PetscScalar ** a)2215 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2216 {
2217 PetscErrorCode ierr;
2218
2219 PetscFunctionBegin;
2220 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2221 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2222 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2223 PetscFunctionReturn(0);
2224 }
2225
2226 /*@C
2227 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2228
2229 Not Collective
2230
2231 Input Parameters:
2232 + A - the matrix
2233 - array - the GPU array in column major order
2234
2235 Notes:
2236
2237 Level: developer
2238
2239 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2240 @*/
MatDenseCUDARestoreArrayWrite(Mat A,PetscScalar ** a)2241 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2242 {
2243 PetscErrorCode ierr;
2244
2245 PetscFunctionBegin;
2246 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2247 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2248 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2249 A->offloadmask = PETSC_OFFLOAD_GPU;
2250 PetscFunctionReturn(0);
2251 }
2252
2253 /*@C
2254 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2255
2256 Not Collective
2257
2258 Input Parameters:
2259 . A - the matrix
2260
2261 Output Parameters
2262 . array - the GPU array in column major order
2263
2264 Notes:
2265 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2266
2267 Level: developer
2268
2269 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2270 @*/
MatDenseCUDAGetArrayRead(Mat A,const PetscScalar ** a)2271 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2272 {
2273 PetscErrorCode ierr;
2274
2275 PetscFunctionBegin;
2276 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2277 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2278 PetscFunctionReturn(0);
2279 }
2280
2281 /*@C
2282 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2283
2284 Not Collective
2285
2286 Input Parameters:
2287 + A - the matrix
2288 - array - the GPU array in column major order
2289
2290 Notes:
2291 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2292
2293 Level: developer
2294
2295 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2296 @*/
MatDenseCUDARestoreArrayRead(Mat A,const PetscScalar ** a)2297 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2298 {
2299 PetscErrorCode ierr;
2300
2301 PetscFunctionBegin;
2302 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2303 PetscFunctionReturn(0);
2304 }
2305
2306 /*@C
2307 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2308
2309 Not Collective
2310
2311 Input Parameters:
2312 . A - the matrix
2313
2314 Output Parameters
2315 . array - the GPU array in column major order
2316
2317 Notes:
2318 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2319
2320 Level: developer
2321
2322 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2323 @*/
MatDenseCUDAGetArray(Mat A,PetscScalar ** a)2324 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2325 {
2326 PetscErrorCode ierr;
2327
2328 PetscFunctionBegin;
2329 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2330 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2331 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2332 PetscFunctionReturn(0);
2333 }
2334
2335 /*@C
2336 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2337
2338 Not Collective
2339
2340 Input Parameters:
2341 + A - the matrix
2342 - array - the GPU array in column major order
2343
2344 Notes:
2345
2346 Level: developer
2347
2348 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2349 @*/
MatDenseCUDARestoreArray(Mat A,PetscScalar ** a)2350 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2351 {
2352 PetscErrorCode ierr;
2353
2354 PetscFunctionBegin;
2355 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2356 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2357 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2358 A->offloadmask = PETSC_OFFLOAD_GPU;
2359 PetscFunctionReturn(0);
2360 }
2361 #endif
2362
2363 /*@C
2364 MatCreateDense - Creates a matrix in dense format.
2365
2366 Collective
2367
2368 Input Parameters:
2369 + comm - MPI communicator
2370 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2371 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2372 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2373 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2374 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2375 to control all matrix memory allocation.
2376
2377 Output Parameter:
2378 . A - the matrix
2379
2380 Notes:
2381 The dense format is fully compatible with standard Fortran 77
2382 storage by columns.
2383
2384 The data input variable is intended primarily for Fortran programmers
2385 who wish to allocate their own matrix memory space. Most users should
2386 set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2387
2388 The user MUST specify either the local or global matrix dimensions
2389 (possibly both).
2390
2391 Level: intermediate
2392
2393 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2394 @*/
MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar * data,Mat * A)2395 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2396 {
2397 PetscErrorCode ierr;
2398 PetscMPIInt size;
2399
2400 PetscFunctionBegin;
2401 ierr = MatCreate(comm,A);CHKERRQ(ierr);
2402 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2403 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2404 if (size > 1) {
2405 PetscBool havedata = (PetscBool)!!data;
2406
2407 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2408 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2409 ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2410 if (havedata) { /* user provided data array, so no need to assemble */
2411 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2412 (*A)->assembled = PETSC_TRUE;
2413 }
2414 } else {
2415 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2416 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2417 }
2418 PetscFunctionReturn(0);
2419 }
2420
2421 #if defined(PETSC_HAVE_CUDA)
2422 /*@C
2423 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2424
2425 Collective
2426
2427 Input Parameters:
2428 + comm - MPI communicator
2429 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2430 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2431 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2432 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2433 - data - optional location of GPU matrix data. Set data=NULL for PETSc
2434 to control matrix memory allocation.
2435
2436 Output Parameter:
2437 . A - the matrix
2438
2439 Notes:
2440
2441 Level: intermediate
2442
2443 .seealso: MatCreate(), MatCreateDense()
2444 @*/
MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar * data,Mat * A)2445 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2446 {
2447 PetscErrorCode ierr;
2448 PetscMPIInt size;
2449
2450 PetscFunctionBegin;
2451 ierr = MatCreate(comm,A);CHKERRQ(ierr);
2452 PetscValidLogicalCollectiveBool(*A,!!data,6);
2453 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2454 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2455 if (size > 1) {
2456 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2457 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2458 if (data) { /* user provided data array, so no need to assemble */
2459 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2460 (*A)->assembled = PETSC_TRUE;
2461 }
2462 } else {
2463 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2464 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2465 }
2466 PetscFunctionReturn(0);
2467 }
2468 #endif
2469
MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat * newmat)2470 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2471 {
2472 Mat mat;
2473 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
2474 PetscErrorCode ierr;
2475
2476 PetscFunctionBegin;
2477 *newmat = NULL;
2478 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2479 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2480 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2481 a = (Mat_MPIDense*)mat->data;
2482
2483 mat->factortype = A->factortype;
2484 mat->assembled = PETSC_TRUE;
2485 mat->preallocated = PETSC_TRUE;
2486
2487 mat->insertmode = NOT_SET_VALUES;
2488 a->donotstash = oldmat->donotstash;
2489
2490 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2491 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2492
2493 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2494 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2495 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2496
2497 *newmat = mat;
2498 PetscFunctionReturn(0);
2499 }
2500
MatLoad_MPIDense(Mat newMat,PetscViewer viewer)2501 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2502 {
2503 PetscErrorCode ierr;
2504 PetscBool isbinary;
2505 #if defined(PETSC_HAVE_HDF5)
2506 PetscBool ishdf5;
2507 #endif
2508
2509 PetscFunctionBegin;
2510 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2511 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2512 /* force binary viewer to load .info file if it has not yet done so */
2513 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2515 #if defined(PETSC_HAVE_HDF5)
2516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr);
2517 #endif
2518 if (isbinary) {
2519 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2520 #if defined(PETSC_HAVE_HDF5)
2521 } else if (ishdf5) {
2522 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2523 #endif
2524 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2525 PetscFunctionReturn(0);
2526 }
2527
MatEqual_MPIDense(Mat A,Mat B,PetscBool * flag)2528 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2529 {
2530 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2531 Mat a,b;
2532 PetscBool flg;
2533 PetscErrorCode ierr;
2534
2535 PetscFunctionBegin;
2536 a = matA->A;
2537 b = matB->A;
2538 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2539 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2540 PetscFunctionReturn(0);
2541 }
2542
MatDestroy_MatTransMatMult_MPIDense_MPIDense(void * data)2543 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2544 {
2545 PetscErrorCode ierr;
2546 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2547
2548 PetscFunctionBegin;
2549 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2550 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2551 ierr = PetscFree(atb);CHKERRQ(ierr);
2552 PetscFunctionReturn(0);
2553 }
2554
MatDestroy_MatMatTransMult_MPIDense_MPIDense(void * data)2555 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2556 {
2557 PetscErrorCode ierr;
2558 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2559
2560 PetscFunctionBegin;
2561 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2562 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2563 ierr = PetscFree(abt);CHKERRQ(ierr);
2564 PetscFunctionReturn(0);
2565 }
2566
MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2567 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2568 {
2569 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2570 Mat_TransMatMultDense *atb;
2571 PetscErrorCode ierr;
2572 MPI_Comm comm;
2573 PetscMPIInt size,*recvcounts;
2574 PetscScalar *carray,*sendbuf;
2575 const PetscScalar *atbarray;
2576 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2577 const PetscInt *ranges;
2578
2579 PetscFunctionBegin;
2580 MatCheckProduct(C,3);
2581 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2582 atb = (Mat_TransMatMultDense *)C->product->data;
2583 recvcounts = atb->recvcounts;
2584 sendbuf = atb->sendbuf;
2585
2586 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2587 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2588
2589 /* compute atbarray = aseq^T * bseq */
2590 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2591
2592 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2593
2594 /* arrange atbarray into sendbuf */
2595 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2596 for (proc=0, k=0; proc<size; proc++) {
2597 for (j=0; j<cN; j++) {
2598 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2599 }
2600 }
2601 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2602
2603 /* sum all atbarray to local values of C */
2604 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2605 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2606 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2607 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2608 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2609 PetscFunctionReturn(0);
2610 }
2611
MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2612 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2613 {
2614 PetscErrorCode ierr;
2615 MPI_Comm comm;
2616 PetscMPIInt size;
2617 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
2618 Mat_TransMatMultDense *atb;
2619 PetscBool cisdense;
2620 PetscInt i;
2621 const PetscInt *ranges;
2622
2623 PetscFunctionBegin;
2624 MatCheckProduct(C,3);
2625 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2626 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2627 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2628 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2629 }
2630
2631 /* create matrix product C */
2632 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2633 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2634 if (!cisdense) {
2635 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2636 }
2637 ierr = MatSetUp(C);CHKERRQ(ierr);
2638
2639 /* create data structure for reuse C */
2640 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2641 ierr = PetscNew(&atb);CHKERRQ(ierr);
2642 cM = C->rmap->N;
2643 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2644 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2645 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2646
2647 C->product->data = atb;
2648 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2649 PetscFunctionReturn(0);
2650 }
2651
MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2652 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2653 {
2654 PetscErrorCode ierr;
2655 MPI_Comm comm;
2656 PetscMPIInt i, size;
2657 PetscInt maxRows, bufsiz;
2658 PetscMPIInt tag;
2659 PetscInt alg;
2660 Mat_MatTransMultDense *abt;
2661 Mat_Product *product = C->product;
2662 PetscBool flg;
2663
2664 PetscFunctionBegin;
2665 MatCheckProduct(C,4);
2666 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2667 /* check local size of A and B */
2668 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2669
2670 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2671 alg = flg ? 0 : 1;
2672
2673 /* setup matrix product C */
2674 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2675 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2676 ierr = MatSetUp(C);CHKERRQ(ierr);
2677 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2678
2679 /* create data structure for reuse C */
2680 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2681 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2682 ierr = PetscNew(&abt);CHKERRQ(ierr);
2683 abt->tag = tag;
2684 abt->alg = alg;
2685 switch (alg) {
2686 case 1: /* alg: "cyclic" */
2687 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2688 bufsiz = A->cmap->N * maxRows;
2689 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2690 break;
2691 default: /* alg: "allgatherv" */
2692 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2693 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2694 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2695 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2696 break;
2697 }
2698
2699 C->product->data = abt;
2700 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2701 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2702 PetscFunctionReturn(0);
2703 }
2704
MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A,Mat B,Mat C)2705 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2706 {
2707 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2708 Mat_MatTransMultDense *abt;
2709 PetscErrorCode ierr;
2710 MPI_Comm comm;
2711 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2712 PetscScalar *sendbuf, *recvbuf=NULL, *cv;
2713 PetscInt i,cK=A->cmap->N,k,j,bn;
2714 PetscScalar _DOne=1.0,_DZero=0.0;
2715 const PetscScalar *av,*bv;
2716 PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
2717 MPI_Request reqs[2];
2718 const PetscInt *ranges;
2719
2720 PetscFunctionBegin;
2721 MatCheckProduct(C,3);
2722 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2723 abt = (Mat_MatTransMultDense*)C->product->data;
2724 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2727 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2728 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2729 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2730 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2731 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2732 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2733 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2734 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2735 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2736 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2737 bn = B->rmap->n;
2738 if (blda == bn) {
2739 sendbuf = (PetscScalar*)bv;
2740 } else {
2741 sendbuf = abt->buf[0];
2742 for (k = 0, i = 0; i < cK; i++) {
2743 for (j = 0; j < bn; j++, k++) {
2744 sendbuf[k] = bv[i * blda + j];
2745 }
2746 }
2747 }
2748 if (size > 1) {
2749 sendto = (rank + size - 1) % size;
2750 recvfrom = (rank + size + 1) % size;
2751 } else {
2752 sendto = recvfrom = 0;
2753 }
2754 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2755 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2756 recvisfrom = rank;
2757 for (i = 0; i < size; i++) {
2758 /* we have finished receiving in sending, bufs can be read/modified */
2759 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2760 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2761
2762 if (nextrecvisfrom != rank) {
2763 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2764 sendsiz = cK * bn;
2765 recvsiz = cK * nextbn;
2766 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2767 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2768 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2769 }
2770
2771 /* local aseq * sendbuf^T */
2772 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2773 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2774
2775 if (nextrecvisfrom != rank) {
2776 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2777 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2778 }
2779 bn = nextbn;
2780 recvisfrom = nextrecvisfrom;
2781 sendbuf = recvbuf;
2782 }
2783 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2784 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2785 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2786 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2787 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788 PetscFunctionReturn(0);
2789 }
2790
MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A,Mat B,Mat C)2791 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2792 {
2793 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2794 Mat_MatTransMultDense *abt;
2795 PetscErrorCode ierr;
2796 MPI_Comm comm;
2797 PetscMPIInt size;
2798 PetscScalar *cv, *sendbuf, *recvbuf;
2799 const PetscScalar *av,*bv;
2800 PetscInt blda,i,cK=A->cmap->N,k,j,bn;
2801 PetscScalar _DOne=1.0,_DZero=0.0;
2802 PetscBLASInt cm, cn, ck, alda, clda;
2803
2804 PetscFunctionBegin;
2805 MatCheckProduct(C,3);
2806 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2807 abt = (Mat_MatTransMultDense*)C->product->data;
2808 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2810 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2811 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2812 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2813 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2814 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2815 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2816 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2817 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2818 /* copy transpose of B into buf[0] */
2819 bn = B->rmap->n;
2820 sendbuf = abt->buf[0];
2821 recvbuf = abt->buf[1];
2822 for (k = 0, j = 0; j < bn; j++) {
2823 for (i = 0; i < cK; i++, k++) {
2824 sendbuf[k] = bv[i * blda + j];
2825 }
2826 }
2827 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2828 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2829 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2830 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2831 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2832 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2833 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2834 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2835 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2836 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838 PetscFunctionReturn(0);
2839 }
2840
MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2841 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2842 {
2843 Mat_MatTransMultDense *abt;
2844 PetscErrorCode ierr;
2845
2846 PetscFunctionBegin;
2847 MatCheckProduct(C,3);
2848 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2849 abt = (Mat_MatTransMultDense*)C->product->data;
2850 switch (abt->alg) {
2851 case 1:
2852 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2853 break;
2854 default:
2855 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2856 break;
2857 }
2858 PetscFunctionReturn(0);
2859 }
2860
MatDestroy_MatMatMult_MPIDense_MPIDense(void * data)2861 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2862 {
2863 PetscErrorCode ierr;
2864 Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2865
2866 PetscFunctionBegin;
2867 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2868 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2869 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2870 ierr = PetscFree(ab);CHKERRQ(ierr);
2871 PetscFunctionReturn(0);
2872 }
2873
2874 #if defined(PETSC_HAVE_ELEMENTAL)
MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2875 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2876 {
2877 PetscErrorCode ierr;
2878 Mat_MatMultDense *ab;
2879
2880 PetscFunctionBegin;
2881 MatCheckProduct(C,3);
2882 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2883 ab = (Mat_MatMultDense*)C->product->data;
2884 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2885 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2886 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2887 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2888 PetscFunctionReturn(0);
2889 }
2890
MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2891 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2892 {
2893 PetscErrorCode ierr;
2894 Mat Ae,Be,Ce;
2895 Mat_MatMultDense *ab;
2896
2897 PetscFunctionBegin;
2898 MatCheckProduct(C,4);
2899 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2900 /* check local size of A and B */
2901 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2902 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2903 }
2904
2905 /* create elemental matrices Ae and Be */
2906 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2907 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2908 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2909 ierr = MatSetUp(Ae);CHKERRQ(ierr);
2910 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2911
2912 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2913 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2914 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2915 ierr = MatSetUp(Be);CHKERRQ(ierr);
2916 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2917
2918 /* compute symbolic Ce = Ae*Be */
2919 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2920 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2921
2922 /* setup C */
2923 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2924 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2925 ierr = MatSetUp(C);CHKERRQ(ierr);
2926
2927 /* create data structure for reuse Cdense */
2928 ierr = PetscNew(&ab);CHKERRQ(ierr);
2929 ab->Ae = Ae;
2930 ab->Be = Be;
2931 ab->Ce = Ce;
2932
2933 C->product->data = ab;
2934 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2935 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2936 PetscFunctionReturn(0);
2937 }
2938 #endif
2939 /* ----------------------------------------------- */
2940 #if defined(PETSC_HAVE_ELEMENTAL)
MatProductSetFromOptions_MPIDense_AB(Mat C)2941 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2942 {
2943 PetscFunctionBegin;
2944 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2945 C->ops->productsymbolic = MatProductSymbolic_AB;
2946 PetscFunctionReturn(0);
2947 }
2948 #endif
2949
MatProductSetFromOptions_MPIDense_AtB(Mat C)2950 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2951 {
2952 Mat_Product *product = C->product;
2953 Mat A = product->A,B=product->B;
2954
2955 PetscFunctionBegin;
2956 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2957 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2958 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2959 C->ops->productsymbolic = MatProductSymbolic_AtB;
2960 PetscFunctionReturn(0);
2961 }
2962
MatProductSetFromOptions_MPIDense_ABt(Mat C)2963 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2964 {
2965 PetscErrorCode ierr;
2966 Mat_Product *product = C->product;
2967 const char *algTypes[2] = {"allgatherv","cyclic"};
2968 PetscInt alg,nalg = 2;
2969 PetscBool flg = PETSC_FALSE;
2970
2971 PetscFunctionBegin;
2972 /* Set default algorithm */
2973 alg = 0; /* default is allgatherv */
2974 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2975 if (flg) {
2976 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2977 }
2978
2979 /* Get runtime option */
2980 if (product->api_user) {
2981 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2982 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2983 ierr = PetscOptionsEnd();CHKERRQ(ierr);
2984 } else {
2985 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2986 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2987 ierr = PetscOptionsEnd();CHKERRQ(ierr);
2988 }
2989 if (flg) {
2990 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2991 }
2992
2993 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2994 C->ops->productsymbolic = MatProductSymbolic_ABt;
2995 PetscFunctionReturn(0);
2996 }
2997
MatProductSetFromOptions_MPIDense(Mat C)2998 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2999 {
3000 PetscErrorCode ierr;
3001 Mat_Product *product = C->product;
3002
3003 PetscFunctionBegin;
3004 switch (product->type) {
3005 #if defined(PETSC_HAVE_ELEMENTAL)
3006 case MATPRODUCT_AB:
3007 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3008 break;
3009 #endif
3010 case MATPRODUCT_AtB:
3011 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3012 break;
3013 case MATPRODUCT_ABt:
3014 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3015 break;
3016 default:
3017 break;
3018 }
3019 PetscFunctionReturn(0);
3020 }
3021