1 
2 /*
3    Basic functions for basic parallel dense matrices.
4 */
5 
6 #include <../src/mat/impls/dense/mpi/mpidense.h>    /*I   "petscmat.h"  I*/
7 #include <../src/mat/impls/aij/mpi/mpiaij.h>
8 #include <petscblaslapack.h>
9 
10 /*@
11 
12       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13               matrix that represents the operator. For sequential matrices it returns itself.
14 
15     Input Parameter:
16 .      A - the Seq or MPI dense matrix
17 
18     Output Parameter:
19 .      B - the inner matrix
20 
21     Level: intermediate
22 
23 @*/
MatDenseGetLocalMatrix(Mat A,Mat * B)24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25 {
26   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
27   PetscErrorCode ierr;
28   PetscBool      flg;
29 
30   PetscFunctionBegin;
31   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
32   PetscValidPointer(B,2);
33   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr);
34   if (flg) *B = mat->A;
35   else {
36     ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);CHKERRQ(ierr);
37     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38     *B = A;
39   }
40   PetscFunctionReturn(0);
41 }
42 
MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)43 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44 {
45   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
46   PetscErrorCode ierr;
47   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
48 
49   PetscFunctionBegin;
50   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51   lrow = row - rstart;
52   ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
53   PetscFunctionReturn(0);
54 }
55 
MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt * nz,PetscInt ** idx,PetscScalar ** v)56 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57 {
58   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
59   PetscErrorCode ierr;
60   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
61 
62   PetscFunctionBegin;
63   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
64   lrow = row - rstart;
65   ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
MatGetDiagonalBlock_MPIDense(Mat A,Mat * a)69 PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70 {
71   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
72   PetscErrorCode ierr;
73   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
74   PetscScalar    *array;
75   MPI_Comm       comm;
76   PetscBool      flg;
77   Mat            B;
78 
79   PetscFunctionBegin;
80   ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr);
81   if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82   ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr);
83   if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
84 
85     ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr);
86     if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
87     ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr);
88     ierr = MatCreate(comm,&B);CHKERRQ(ierr);
89     ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr);
90     ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr);
91     ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
92     ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr);
93     ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr);
94     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96     ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr);
97     *a   = B;
98     ierr = MatDestroy(&B);CHKERRQ(ierr);
99   } else *a = B;
100   PetscFunctionReturn(0);
101 }
102 
MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)103 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
104 {
105   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
106   PetscErrorCode ierr;
107   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108   PetscBool      roworiented = A->roworiented;
109 
110   PetscFunctionBegin;
111   for (i=0; i<m; i++) {
112     if (idxm[i] < 0) continue;
113     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114     if (idxm[i] >= rstart && idxm[i] < rend) {
115       row = idxm[i] - rstart;
116       if (roworiented) {
117         ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr);
118       } else {
119         for (j=0; j<n; j++) {
120           if (idxn[j] < 0) continue;
121           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122           ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr);
123         }
124       }
125     } else if (!A->donotstash) {
126       mat->assembled = PETSC_FALSE;
127       if (roworiented) {
128         ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
129       } else {
130         ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
131       }
132     }
133   }
134   PetscFunctionReturn(0);
135 }
136 
MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])137 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138 {
139   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
140   PetscErrorCode ierr;
141   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
142 
143   PetscFunctionBegin;
144   for (i=0; i<m; i++) {
145     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147     if (idxm[i] >= rstart && idxm[i] < rend) {
148       row = idxm[i] - rstart;
149       for (j=0; j<n; j++) {
150         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151         if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152         ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr);
153       }
154     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155   }
156   PetscFunctionReturn(0);
157 }
158 
MatDenseGetLDA_MPIDense(Mat A,PetscInt * lda)159 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
160 {
161   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
162   PetscErrorCode ierr;
163 
164   PetscFunctionBegin;
165   ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)169 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
170 {
171   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
172   PetscBool      iscuda;
173   PetscErrorCode ierr;
174 
175   PetscFunctionBegin;
176   if (!a->A) {
177     if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
179     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
180     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
181     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);CHKERRQ(ierr);
182     ierr = MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
183     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
184     ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
185   }
186   ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
MatDenseGetArray_MPIDense(Mat A,PetscScalar ** array)190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191 {
192   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
193   PetscErrorCode ierr;
194 
195   PetscFunctionBegin;
196   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
197   ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr);
198   PetscFunctionReturn(0);
199 }
200 
MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar ** array)201 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
202 {
203   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208   ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar ** array)212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213 {
214   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
215   PetscErrorCode ierr;
216 
217   PetscFunctionBegin;
218   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219   ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
MatDensePlaceArray_MPIDense(Mat A,const PetscScalar * array)223 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
224 {
225   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
230   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
231   ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr);
232   PetscFunctionReturn(0);
233 }
234 
MatDenseResetArray_MPIDense(Mat A)235 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
236 {
237   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
238   PetscErrorCode ierr;
239 
240   PetscFunctionBegin;
241   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
242   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
243   ierr = MatDenseResetArray(a->A);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar * array)247 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
248 {
249   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
254   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
255   ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat * B)259 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
260 {
261   Mat_MPIDense      *mat  = (Mat_MPIDense*)A->data,*newmatd;
262   PetscErrorCode    ierr;
263   PetscInt          lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
264   const PetscInt    *irow,*icol;
265   const PetscScalar *v;
266   PetscScalar       *bv;
267   Mat               newmat;
268   IS                iscol_local;
269   MPI_Comm          comm_is,comm_mat;
270 
271   PetscFunctionBegin;
272   ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr);
273   ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr);
274   if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
275 
276   ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
277   ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr);
278   ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr);
279   ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr);
280   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
281   ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */
282 
283   /* No parallel redistribution currently supported! Should really check each index set
284      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
285      original matrix! */
286 
287   ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr);
288   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
289 
290   /* Check submatrix call */
291   if (scall == MAT_REUSE_MATRIX) {
292     /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
293     /* Really need to test rows and column sizes! */
294     newmat = *B;
295   } else {
296     /* Create and fill new matrix */
297     ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr);
298     ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
299     ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
300     ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr);
301   }
302 
303   /* Now extract the data pointers and do the copy, column at a time */
304   newmatd = (Mat_MPIDense*)newmat->data;
305   ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr);
306   ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr);
307   ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr);
308   for (i=0; i<Ncols; i++) {
309     const PetscScalar *av = v + lda*icol[i];
310     for (j=0; j<nrows; j++) {
311       *bv++ = av[irow[j] - rstart];
312     }
313   }
314   ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr);
315   ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr);
316 
317   /* Assemble the matrices so that the correct flags are set */
318   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
319   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320 
321   /* Free work space */
322   ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr);
323   ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr);
324   ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
325   *B   = newmat;
326   PetscFunctionReturn(0);
327 }
328 
MatDenseRestoreArray_MPIDense(Mat A,PetscScalar ** array)329 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
330 {
331   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
332   PetscErrorCode ierr;
333 
334   PetscFunctionBegin;
335   ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar ** array)339 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
340 {
341   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
342   PetscErrorCode ierr;
343 
344   PetscFunctionBegin;
345   ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar ** array)349 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
350 {
351   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)359 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
360 {
361   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       nstash,reallocs;
364 
365   PetscFunctionBegin;
366   if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
367 
368   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
369   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
370   ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)374 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
375 {
376   Mat_MPIDense   *mdn=(Mat_MPIDense*)mat->data;
377   PetscErrorCode ierr;
378   PetscInt       i,*row,*col,flg,j,rstart,ncols;
379   PetscMPIInt    n;
380   PetscScalar    *val;
381 
382   PetscFunctionBegin;
383   if (!mdn->donotstash && !mat->nooffprocentries) {
384     /*  wait on receives */
385     while (1) {
386       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
387       if (!flg) break;
388 
389       for (i=0; i<n;) {
390         /* Now identify the consecutive vals belonging to the same row */
391         for (j=i,rstart=row[j]; j<n; j++) {
392           if (row[j] != rstart) break;
393         }
394         if (j < n) ncols = j-i;
395         else       ncols = n-i;
396         /* Now assemble all these values with a single function call */
397         ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
398         i    = j;
399       }
400     }
401     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
402   }
403 
404   ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr);
406 
407   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
408     ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
409   }
410   PetscFunctionReturn(0);
411 }
412 
MatZeroEntries_MPIDense(Mat A)413 PetscErrorCode MatZeroEntries_MPIDense(Mat A)
414 {
415   PetscErrorCode ierr;
416   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)423 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
424 {
425   Mat_MPIDense      *l = (Mat_MPIDense*)A->data;
426   PetscErrorCode    ierr;
427   PetscInt          i,len,*lrows;
428 
429   PetscFunctionBegin;
430   /* get locally owned rows */
431   ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr);
432   /* fix right hand side if needed */
433   if (x && b) {
434     const PetscScalar *xx;
435     PetscScalar       *bb;
436 
437     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
438     ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr);
439     for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
441     ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr);
442   }
443   ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
444   if (diag != 0.0) {
445     Vec d;
446 
447     ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr);
448     ierr = VecSet(d,diag);CHKERRQ(ierr);
449     ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr);
450     ierr = VecDestroy(&d);CHKERRQ(ierr);
451   }
452   ierr = PetscFree(lrows);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
460 
MatMult_MPIDense(Mat mat,Vec xx,Vec yy)461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462 {
463   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
464   PetscErrorCode    ierr;
465   const PetscScalar *ax;
466   PetscScalar       *ay;
467   PetscMemType      axmtype,aymtype;
468 
469   PetscFunctionBegin;
470   ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr);
471   ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
472   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
473   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
474   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
475   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
476   ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
481 {
482   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
483   PetscErrorCode    ierr;
484   const PetscScalar *ax;
485   PetscScalar       *ay;
486   PetscMemType      axmtype,aymtype;
487 
488   PetscFunctionBegin;
489   ierr = VecGetArrayReadInPlace_Internal(xx,&ax,&axmtype);CHKERRQ(ierr);
490   ierr = VecGetArrayInPlace_Internal(mdn->lvec,&ay,&aymtype);CHKERRQ(ierr);
491   ierr = PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay);CHKERRQ(ierr);
492   ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr);
493   ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr);
494   ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr);
495   ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr);
496   PetscFunctionReturn(0);
497 }
498 
MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
500 {
501   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
502   PetscErrorCode    ierr;
503   const PetscScalar *ax;
504   PetscScalar       *ay;
505   PetscMemType      axmtype,aymtype;
506 
507   PetscFunctionBegin;
508   ierr = VecSet(yy,0.0);CHKERRQ(ierr);
509   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
510   ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
511   ierr = VecGetArrayInPlace_Internal(yy,&ay,&aymtype);CHKERRQ(ierr);
512   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
513   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
514   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
515   ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
520 {
521   Mat_MPIDense      *a = (Mat_MPIDense*)A->data;
522   PetscErrorCode    ierr;
523   const PetscScalar *ax;
524   PetscScalar       *ay;
525   PetscMemType      axmtype,aymtype;
526 
527   PetscFunctionBegin;
528   ierr = VecCopy(yy,zz);CHKERRQ(ierr);
529   ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr);
530   ierr = VecGetArrayReadInPlace_Internal(a->lvec,&ax,&axmtype);CHKERRQ(ierr);
531   ierr = VecGetArrayInPlace_Internal(zz,&ay,&aymtype);CHKERRQ(ierr);
532   ierr = PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM);CHKERRQ(ierr);
533   ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr);
534   ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr);
535   ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
MatGetDiagonal_MPIDense(Mat A,Vec v)539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
540 {
541   Mat_MPIDense      *a    = (Mat_MPIDense*)A->data;
542   PetscErrorCode    ierr;
543   PetscInt          lda,len,i,n,m = A->rmap->n,radd;
544   PetscScalar       *x,zero = 0.0;
545   const PetscScalar *av;
546 
547   PetscFunctionBegin;
548   ierr = VecSet(v,zero);CHKERRQ(ierr);
549   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
550   ierr = VecGetSize(v,&n);CHKERRQ(ierr);
551   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
552   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
553   radd = A->rmap->rstart*m;
554   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
555   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
556   for (i=0; i<len; i++) {
557     x[i] = av[radd + i*lda + i];
558   }
559   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
560   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
561   PetscFunctionReturn(0);
562 }
563 
MatDestroy_MPIDense(Mat mat)564 PetscErrorCode MatDestroy_MPIDense(Mat mat)
565 {
566   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
567   PetscErrorCode ierr;
568 
569   PetscFunctionBegin;
570 #if defined(PETSC_USE_LOG)
571   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
572 #endif
573   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
574   if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
575   if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
576   ierr = MatDestroy(&mdn->A);CHKERRQ(ierr);
577   ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr);
578   ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr);
579   ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr);
580   ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr);
581 
582   ierr = PetscFree(mat->data);CHKERRQ(ierr);
583   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
584 
585   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr);
586   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr);
587   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr);
588   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr);
589   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr);
590   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr);
591   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr);
592   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr);
593   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr);
594   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr);
595   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr);
596 #if defined(PETSC_HAVE_ELEMENTAL)
597   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr);
598 #endif
599 #if defined(PETSC_HAVE_SCALAPACK)
600   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);CHKERRQ(ierr);
601 #endif
602   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr);
603   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
604   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr);
605 #if defined (PETSC_HAVE_CUDA)
606   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr);
607   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
608 #endif
609   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr);
610   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr);
611   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr);
612   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr);
613   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr);
614   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr);
615   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr);
616   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr);
617   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr);
618   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
623 
624 #include <petscdraw.h>
MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
626 {
627   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
628   PetscErrorCode    ierr;
629   PetscMPIInt       rank;
630   PetscViewerType   vtype;
631   PetscBool         iascii,isdraw;
632   PetscViewer       sviewer;
633   PetscViewerFormat format;
634 
635   PetscFunctionBegin;
636   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
637   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
638   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
639   if (iascii) {
640     ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr);
641     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
642     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
643       MatInfo info;
644       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
645       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
646       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
647       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
648       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
649       ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr);
650       PetscFunctionReturn(0);
651     } else if (format == PETSC_VIEWER_ASCII_INFO) {
652       PetscFunctionReturn(0);
653     }
654   } else if (isdraw) {
655     PetscDraw draw;
656     PetscBool isnull;
657 
658     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
659     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
660     if (isnull) PetscFunctionReturn(0);
661   }
662 
663   {
664     /* assemble the entire matrix onto first processor. */
665     Mat         A;
666     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
667     PetscInt    *cols;
668     PetscScalar *vals;
669 
670     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
671     if (!rank) {
672       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
673     } else {
674       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
675     }
676     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
677     ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
678     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
679     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
680 
681     /* Copy the matrix ... This isn't the most efficient means,
682        but it's quick for now */
683     A->insertmode = INSERT_VALUES;
684 
685     row = mat->rmap->rstart;
686     m   = mdn->A->rmap->n;
687     for (i=0; i<m; i++) {
688       ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
689       ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
690       ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr);
691       row++;
692     }
693 
694     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
696     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
697     if (!rank) {
698       ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
699       ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr);
700     }
701     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
702     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
703     ierr = MatDestroy(&A);CHKERRQ(ierr);
704   }
705   PetscFunctionReturn(0);
706 }
707 
MatView_MPIDense(Mat mat,PetscViewer viewer)708 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
709 {
710   PetscErrorCode ierr;
711   PetscBool      iascii,isbinary,isdraw,issocket;
712 
713   PetscFunctionBegin;
714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
717   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
718 
719   if (iascii || issocket || isdraw) {
720     ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
721   } else if (isbinary) {
722     ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo * info)727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
728 {
729   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
730   Mat            mdn  = mat->A;
731   PetscErrorCode ierr;
732   PetscLogDouble isend[5],irecv[5];
733 
734   PetscFunctionBegin;
735   info->block_size = 1.0;
736 
737   ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr);
738 
739   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
740   isend[3] = info->memory;  isend[4] = info->mallocs;
741   if (flag == MAT_LOCAL) {
742     info->nz_used      = isend[0];
743     info->nz_allocated = isend[1];
744     info->nz_unneeded  = isend[2];
745     info->memory       = isend[3];
746     info->mallocs      = isend[4];
747   } else if (flag == MAT_GLOBAL_MAX) {
748     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
749 
750     info->nz_used      = irecv[0];
751     info->nz_allocated = irecv[1];
752     info->nz_unneeded  = irecv[2];
753     info->memory       = irecv[3];
754     info->mallocs      = irecv[4];
755   } else if (flag == MAT_GLOBAL_SUM) {
756     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
757 
758     info->nz_used      = irecv[0];
759     info->nz_allocated = irecv[1];
760     info->nz_unneeded  = irecv[2];
761     info->memory       = irecv[3];
762     info->mallocs      = irecv[4];
763   }
764   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
765   info->fill_ratio_needed = 0;
766   info->factor_mallocs    = 0;
767   PetscFunctionReturn(0);
768 }
769 
MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)770 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
771 {
772   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   switch (op) {
777   case MAT_NEW_NONZERO_LOCATIONS:
778   case MAT_NEW_NONZERO_LOCATION_ERR:
779   case MAT_NEW_NONZERO_ALLOCATION_ERR:
780     MatCheckPreallocated(A,1);
781     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
782     break;
783   case MAT_ROW_ORIENTED:
784     MatCheckPreallocated(A,1);
785     a->roworiented = flg;
786     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
787     break;
788   case MAT_NEW_DIAGONALS:
789   case MAT_KEEP_NONZERO_PATTERN:
790   case MAT_USE_HASH_TABLE:
791   case MAT_SORTED_FULL:
792     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
793     break;
794   case MAT_IGNORE_OFF_PROC_ENTRIES:
795     a->donotstash = flg;
796     break;
797   case MAT_SYMMETRIC:
798   case MAT_STRUCTURALLY_SYMMETRIC:
799   case MAT_HERMITIAN:
800   case MAT_SYMMETRY_ETERNAL:
801   case MAT_IGNORE_LOWER_TRIANGULAR:
802   case MAT_IGNORE_ZERO_ENTRIES:
803     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
804     break;
805   default:
806     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
807   }
808   PetscFunctionReturn(0);
809 }
810 
MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)811 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
812 {
813   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
814   const PetscScalar *l;
815   PetscScalar       x,*v,*vv,*r;
816   PetscErrorCode    ierr;
817   PetscInt          i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
818 
819   PetscFunctionBegin;
820   ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr);
821   ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr);
822   ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
823   if (ll) {
824     ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr);
825     if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
826     ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr);
827     for (i=0; i<m; i++) {
828       x = l[i];
829       v = vv + i;
830       for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
831     }
832     ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr);
833     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
834   }
835   if (rr) {
836     const PetscScalar *ar;
837 
838     ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr);
839     if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
840     ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr);
841     ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr);
842     ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
843     ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr);
844     ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr);
845     for (i=0; i<n; i++) {
846       x = r[i];
847       v = vv + i*lda;
848       for (j=0; j<m; j++) (*v++) *= x;
849     }
850     ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr);
851     ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr);
852   }
853   ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
MatNorm_MPIDense(Mat A,NormType type,PetscReal * nrm)857 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
858 {
859   Mat_MPIDense      *mdn = (Mat_MPIDense*)A->data;
860   PetscErrorCode    ierr;
861   PetscInt          i,j;
862   PetscMPIInt       size;
863   PetscReal         sum = 0.0;
864   const PetscScalar *av,*v;
865 
866   PetscFunctionBegin;
867   ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr);
868   v    = av;
869   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
870   if (size == 1) {
871     ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr);
872   } else {
873     if (type == NORM_FROBENIUS) {
874       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
875         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
876       }
877       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
878       *nrm = PetscSqrtReal(*nrm);
879       ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr);
880     } else if (type == NORM_1) {
881       PetscReal *tmp,*tmp2;
882       ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr);
883       *nrm = 0.0;
884       v    = av;
885       for (j=0; j<mdn->A->cmap->n; j++) {
886         for (i=0; i<mdn->A->rmap->n; i++) {
887           tmp[j] += PetscAbsScalar(*v);  v++;
888         }
889       }
890       ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
891       for (j=0; j<A->cmap->N; j++) {
892         if (tmp2[j] > *nrm) *nrm = tmp2[j];
893       }
894       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
895       ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr);
896     } else if (type == NORM_INFINITY) { /* max row norm */
897       PetscReal ntemp;
898       ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr);
899       ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
900     } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
901   }
902   ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat * matout)906 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
907 {
908   Mat_MPIDense   *a    = (Mat_MPIDense*)A->data;
909   Mat            B;
910   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
911   PetscErrorCode ierr;
912   PetscInt       j,i,lda;
913   PetscScalar    *v;
914 
915   PetscFunctionBegin;
916   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
917     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
918     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
919     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
920     ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr);
921   } else B = *matout;
922 
923   m    = a->A->rmap->n; n = a->A->cmap->n;
924   ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
925   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
926   ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr);
927   for (i=0; i<m; i++) rwork[i] = rstart + i;
928   for (j=0; j<n; j++) {
929     ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr);
930     v   += lda;
931   }
932   ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr);
933   ierr = PetscFree(rwork);CHKERRQ(ierr);
934   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
935   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
936   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
937     *matout = B;
938   } else {
939     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
945 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
946 
MatSetUp_MPIDense(Mat A)947 PetscErrorCode MatSetUp_MPIDense(Mat A)
948 {
949   PetscErrorCode ierr;
950 
951   PetscFunctionBegin;
952   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
953   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
954   if (!A->preallocated) {
955     ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
956   }
957   PetscFunctionReturn(0);
958 }
959 
MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)960 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
961 {
962   PetscErrorCode ierr;
963   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
964 
965   PetscFunctionBegin;
966   ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
MatConjugate_MPIDense(Mat mat)970 PetscErrorCode MatConjugate_MPIDense(Mat mat)
971 {
972   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatConjugate(a->A);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
MatRealPart_MPIDense(Mat A)980 PetscErrorCode MatRealPart_MPIDense(Mat A)
981 {
982   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatRealPart(a->A);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
MatImaginaryPart_MPIDense(Mat A)990 PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
991 {
992   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
993   PetscErrorCode ierr;
994 
995   PetscFunctionBegin;
996   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)1000 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
1001 {
1002   PetscErrorCode ierr;
1003   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1004 
1005   PetscFunctionBegin;
1006   if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1007   if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1008   ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1013 
MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal * norms)1014 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1015 {
1016   PetscErrorCode ierr;
1017   PetscInt       i,n;
1018   Mat_MPIDense   *a = (Mat_MPIDense*) A->data;
1019   PetscReal      *work;
1020 
1021   PetscFunctionBegin;
1022   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
1024   ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr);
1025   if (type == NORM_2) {
1026     for (i=0; i<n; i++) work[i] *= work[i];
1027   }
1028   if (type == NORM_INFINITY) {
1029     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
1030   } else {
1031     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(work);CHKERRQ(ierr);
1034   if (type == NORM_2) {
1035     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 #if defined(PETSC_HAVE_CUDA)
MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1041 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1042 {
1043   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1044   PetscErrorCode ierr;
1045   PetscInt       lda;
1046 
1047   PetscFunctionBegin;
1048   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1049   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1050   if (!a->cvec) {
1051     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1052     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1053   }
1054   a->vecinuse = col + 1;
1055   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1056   ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1057   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1058   *v   = a->cvec;
1059   PetscFunctionReturn(0);
1060 }
1061 
MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1062 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1063 {
1064   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1065   PetscErrorCode ierr;
1066 
1067   PetscFunctionBegin;
1068   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1069   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1070   a->vecinuse = 0;
1071   ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1072   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1073   *v   = NULL;
1074   PetscFunctionReturn(0);
1075 }
1076 
MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1077 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1078 {
1079   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1080   PetscInt       lda;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1085   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1086   if (!a->cvec) {
1087     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1088     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1089   }
1090   a->vecinuse = col + 1;
1091   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1092   ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1093   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1094   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1095   *v   = a->cvec;
1096   PetscFunctionReturn(0);
1097 }
1098 
MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1099 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1100 {
1101   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1106   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1107   a->vecinuse = 0;
1108   ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1109   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1110   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1111   *v   = NULL;
1112   PetscFunctionReturn(0);
1113 }
1114 
MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1115 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1116 {
1117   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1118   PetscErrorCode ierr;
1119   PetscInt       lda;
1120 
1121   PetscFunctionBegin;
1122   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1123   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1124   if (!a->cvec) {
1125     ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1126     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr);
1127   }
1128   a->vecinuse = col + 1;
1129   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1130   ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1131   ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1132   *v   = a->cvec;
1133   PetscFunctionReturn(0);
1134 }
1135 
MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec * v)1136 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1137 {
1138   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1139   PetscErrorCode ierr;
1140 
1141   PetscFunctionBegin;
1142   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1143   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1144   a->vecinuse = 0;
1145   ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1146   ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr);
1147   *v   = NULL;
1148   PetscFunctionReturn(0);
1149 }
1150 
MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A,const PetscScalar * a)1151 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1152 {
1153   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1158   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1159   ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)1163 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1164 {
1165   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1170   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1171   ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A,const PetscScalar * a)1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1176 {
1177   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1182   if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1183   ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A,PetscScalar ** a)1187 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1188 {
1189   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A,PetscScalar ** a)1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1198 {
1199   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A,const PetscScalar ** a)1207 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1208 {
1209   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A,const PetscScalar ** a)1217 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1218 {
1219   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1220   PetscErrorCode ierr;
1221 
1222   PetscFunctionBegin;
1223   ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
MatDenseCUDAGetArray_MPIDenseCUDA(Mat A,PetscScalar ** a)1227 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1228 {
1229   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1230   PetscErrorCode ierr;
1231 
1232   PetscFunctionBegin;
1233   ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A,PetscScalar ** a)1237 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1238 {
1239   Mat_MPIDense   *l = (Mat_MPIDense*) A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1248 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1249 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1250 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1251 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1252 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1253 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1254 
MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)1255 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1256 {
1257   Mat_MPIDense   *d = (Mat_MPIDense*)mat->data;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1262   if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1263   if (d->A) {
1264     ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr);
1265   }
1266   mat->boundtocpu = bind;
1267   if (!bind) {
1268     PetscBool iscuda;
1269 
1270     ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr);
1271     if (!iscuda) {
1272       ierr = VecDestroy(&d->cvec);CHKERRQ(ierr);
1273     }
1274     ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1275     if (!iscuda) {
1276       ierr = MatDestroy(&d->cmat);CHKERRQ(ierr);
1277     }
1278     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1279     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr);
1280     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1281     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr);
1282     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1283     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr);
1284   } else {
1285     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1286     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1287     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1288     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1289     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1290     ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1291   }
1292   if (d->cmat) {
1293     ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr);
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
MatMPIDenseCUDASetPreallocation(Mat A,PetscScalar * d_data)1298 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1299 {
1300   Mat_MPIDense   *d = (Mat_MPIDense*)A->data;
1301   PetscErrorCode ierr;
1302   PetscBool      iscuda;
1303 
1304   PetscFunctionBegin;
1305   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1306   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1307   if (!iscuda) PetscFunctionReturn(0);
1308   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1309   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1310   if (!d->A) {
1311     ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr);
1312     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr);
1313     ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr);
1314   }
1315   ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr);
1316   ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr);
1317   A->preallocated = PETSC_TRUE;
1318   PetscFunctionReturn(0);
1319 }
1320 #endif
1321 
MatSetRandom_MPIDense(Mat x,PetscRandom rctx)1322 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1323 {
1324   Mat_MPIDense   *d = (Mat_MPIDense*)x->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
MatMissingDiagonal_MPIDense(Mat A,PetscBool * missing,PetscInt * d)1332 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool  *missing,PetscInt *d)
1333 {
1334   PetscFunctionBegin;
1335   *missing = PETSC_FALSE;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1340 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1341 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1342 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1343 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1344 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1345 
1346 /* -------------------------------------------------------------------*/
1347 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1348                                         MatGetRow_MPIDense,
1349                                         MatRestoreRow_MPIDense,
1350                                         MatMult_MPIDense,
1351                                 /*  4*/ MatMultAdd_MPIDense,
1352                                         MatMultTranspose_MPIDense,
1353                                         MatMultTransposeAdd_MPIDense,
1354                                         NULL,
1355                                         NULL,
1356                                         NULL,
1357                                 /* 10*/ NULL,
1358                                         NULL,
1359                                         NULL,
1360                                         NULL,
1361                                         MatTranspose_MPIDense,
1362                                 /* 15*/ MatGetInfo_MPIDense,
1363                                         MatEqual_MPIDense,
1364                                         MatGetDiagonal_MPIDense,
1365                                         MatDiagonalScale_MPIDense,
1366                                         MatNorm_MPIDense,
1367                                 /* 20*/ MatAssemblyBegin_MPIDense,
1368                                         MatAssemblyEnd_MPIDense,
1369                                         MatSetOption_MPIDense,
1370                                         MatZeroEntries_MPIDense,
1371                                 /* 24*/ MatZeroRows_MPIDense,
1372                                         NULL,
1373                                         NULL,
1374                                         NULL,
1375                                         NULL,
1376                                 /* 29*/ MatSetUp_MPIDense,
1377                                         NULL,
1378                                         NULL,
1379                                         MatGetDiagonalBlock_MPIDense,
1380                                         NULL,
1381                                 /* 34*/ MatDuplicate_MPIDense,
1382                                         NULL,
1383                                         NULL,
1384                                         NULL,
1385                                         NULL,
1386                                 /* 39*/ MatAXPY_MPIDense,
1387                                         MatCreateSubMatrices_MPIDense,
1388                                         NULL,
1389                                         MatGetValues_MPIDense,
1390                                         NULL,
1391                                 /* 44*/ NULL,
1392                                         MatScale_MPIDense,
1393                                         MatShift_Basic,
1394                                         NULL,
1395                                         NULL,
1396                                 /* 49*/ MatSetRandom_MPIDense,
1397                                         NULL,
1398                                         NULL,
1399                                         NULL,
1400                                         NULL,
1401                                 /* 54*/ NULL,
1402                                         NULL,
1403                                         NULL,
1404                                         NULL,
1405                                         NULL,
1406                                 /* 59*/ MatCreateSubMatrix_MPIDense,
1407                                         MatDestroy_MPIDense,
1408                                         MatView_MPIDense,
1409                                         NULL,
1410                                         NULL,
1411                                 /* 64*/ NULL,
1412                                         NULL,
1413                                         NULL,
1414                                         NULL,
1415                                         NULL,
1416                                 /* 69*/ NULL,
1417                                         NULL,
1418                                         NULL,
1419                                         NULL,
1420                                         NULL,
1421                                 /* 74*/ NULL,
1422                                         NULL,
1423                                         NULL,
1424                                         NULL,
1425                                         NULL,
1426                                 /* 79*/ NULL,
1427                                         NULL,
1428                                         NULL,
1429                                         NULL,
1430                                 /* 83*/ MatLoad_MPIDense,
1431                                         NULL,
1432                                         NULL,
1433                                         NULL,
1434                                         NULL,
1435                                         NULL,
1436                                 /* 89*/ NULL,
1437                                         NULL,
1438                                         NULL,
1439                                         NULL,
1440                                         NULL,
1441                                 /* 94*/ NULL,
1442                                         NULL,
1443                                         MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1444                                         MatMatTransposeMultNumeric_MPIDense_MPIDense,
1445                                         NULL,
1446                                 /* 99*/ MatProductSetFromOptions_MPIDense,
1447                                         NULL,
1448                                         NULL,
1449                                         MatConjugate_MPIDense,
1450                                         NULL,
1451                                 /*104*/ NULL,
1452                                         MatRealPart_MPIDense,
1453                                         MatImaginaryPart_MPIDense,
1454                                         NULL,
1455                                         NULL,
1456                                 /*109*/ NULL,
1457                                         NULL,
1458                                         NULL,
1459                                         MatGetColumnVector_MPIDense,
1460                                         MatMissingDiagonal_MPIDense,
1461                                 /*114*/ NULL,
1462                                         NULL,
1463                                         NULL,
1464                                         NULL,
1465                                         NULL,
1466                                 /*119*/ NULL,
1467                                         NULL,
1468                                         NULL,
1469                                         NULL,
1470                                         NULL,
1471                                 /*124*/ NULL,
1472                                         MatGetColumnNorms_MPIDense,
1473                                         NULL,
1474                                         NULL,
1475                                         NULL,
1476                                 /*129*/ NULL,
1477                                         NULL,
1478                                         MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1479                                         MatTransposeMatMultNumeric_MPIDense_MPIDense,
1480                                         NULL,
1481                                 /*134*/ NULL,
1482                                         NULL,
1483                                         NULL,
1484                                         NULL,
1485                                         NULL,
1486                                 /*139*/ NULL,
1487                                         NULL,
1488                                         NULL,
1489                                         NULL,
1490                                         NULL,
1491                                         MatCreateMPIMatConcatenateSeqMat_MPIDense,
1492                                 /*145*/ NULL,
1493                                         NULL,
1494                                         NULL
1495 };
1496 
MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar * data)1497 PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1498 {
1499   Mat_MPIDense   *a = (Mat_MPIDense*)mat->data;
1500   PetscBool      iscuda;
1501   PetscErrorCode ierr;
1502 
1503   PetscFunctionBegin;
1504   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1505   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
1506   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
1507   if (!a->A) {
1508     ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr);
1509     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
1510     ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr);
1511   }
1512   ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr);
1513   ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr);
1514   ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr);
1515   mat->preallocated = PETSC_TRUE;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #if defined(PETSC_HAVE_ELEMENTAL)
MatConvert_MPIDense_Elemental(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1520 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1521 {
1522   Mat            mat_elemental;
1523   PetscErrorCode ierr;
1524   PetscScalar    *v;
1525   PetscInt       m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1526 
1527   PetscFunctionBegin;
1528   if (reuse == MAT_REUSE_MATRIX) {
1529     mat_elemental = *newmat;
1530     ierr = MatZeroEntries(*newmat);CHKERRQ(ierr);
1531   } else {
1532     ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr);
1533     ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
1534     ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr);
1535     ierr = MatSetUp(mat_elemental);CHKERRQ(ierr);
1536     ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1537   }
1538 
1539   ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr);
1540   for (i=0; i<N; i++) cols[i] = i;
1541   for (i=0; i<m; i++) rows[i] = rstart + i;
1542 
1543   /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1544   ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
1545   ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr);
1546   ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547   ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548   ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr);
1549   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1550 
1551   if (reuse == MAT_INPLACE_MATRIX) {
1552     ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr);
1553   } else {
1554     *newmat = mat_elemental;
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 #endif
1559 
MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar ** vals)1560 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1561 {
1562   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar ** vals)1570 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1571 {
1572   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat * outmat)1580 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1581 {
1582   PetscErrorCode ierr;
1583   Mat_MPIDense   *mat;
1584   PetscInt       m,nloc,N;
1585 
1586   PetscFunctionBegin;
1587   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
1588   ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr);
1589   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1590     PetscInt sum;
1591 
1592     if (n == PETSC_DECIDE) {
1593       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
1594     }
1595     /* Check sum(n) = N */
1596     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1597     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1598 
1599     ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr);
1600   }
1601 
1602   /* numeric phase */
1603   mat = (Mat_MPIDense*)(*outmat)->data;
1604   ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1605   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #if defined(PETSC_HAVE_CUDA)
MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat * newmat)1611 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1612 {
1613   Mat            B;
1614   Mat_MPIDense   *m;
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   if (reuse == MAT_INITIAL_MATRIX) {
1619     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1620   } else if (reuse == MAT_REUSE_MATRIX) {
1621     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1622   }
1623 
1624   B    = *newmat;
1625   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr);
1626   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1627   ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr);
1628   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr);
1633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr);
1635   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr);
1636   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr);
1637   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr);
1638   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr);
1639   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr);
1640   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr);
1641   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr);
1642   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr);
1643   m    = (Mat_MPIDense*)(B)->data;
1644   if (m->A) {
1645     ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1646     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1647   }
1648   B->ops->bindtocpu = NULL;
1649   B->offloadmask    = PETSC_OFFLOAD_CPU;
1650   PetscFunctionReturn(0);
1651 }
1652 
MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat * newmat)1653 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1654 {
1655   Mat            B;
1656   Mat_MPIDense   *m;
1657   PetscErrorCode ierr;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_INITIAL_MATRIX) {
1661     ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr);
1662   } else if (reuse == MAT_REUSE_MATRIX) {
1663     ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1664   }
1665 
1666   B    = *newmat;
1667   ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr);
1668   ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr);
1669   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr);
1670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",                    MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr);
1671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",        MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",        MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",                                MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr);
1676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",                            MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",                           MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",                            MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",                        MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr);
1680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",                       MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",                              MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",                              MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr);
1683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",                            MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr);
1684   m    = (Mat_MPIDense*)(B)->data;
1685   if (m->A) {
1686     ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr);
1687     ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr);
1688     B->offloadmask = PETSC_OFFLOAD_BOTH;
1689   } else {
1690     B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1691   }
1692   ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr);
1693 
1694   B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1695   PetscFunctionReturn(0);
1696 }
1697 #endif
1698 
MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1699 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1700 {
1701   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1702   PetscErrorCode ierr;
1703   PetscInt       lda;
1704 
1705   PetscFunctionBegin;
1706   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1707   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1708   if (!a->cvec) {
1709     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1710   }
1711   a->vecinuse = col + 1;
1712   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1713   ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1714   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1715   *v   = a->cvec;
1716   PetscFunctionReturn(0);
1717 }
1718 
MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec * v)1719 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1720 {
1721   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1726   if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1727   a->vecinuse = 0;
1728   ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1729   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1730   *v   = NULL;
1731   PetscFunctionReturn(0);
1732 }
1733 
MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1734 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1735 {
1736   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1737   PetscErrorCode ierr;
1738   PetscInt       lda;
1739 
1740   PetscFunctionBegin;
1741   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1742   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1743   if (!a->cvec) {
1744     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1745   }
1746   a->vecinuse = col + 1;
1747   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1748   ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1749   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1750   ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr);
1751   *v   = a->cvec;
1752   PetscFunctionReturn(0);
1753 }
1754 
MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec * v)1755 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1756 {
1757   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1762   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1763   a->vecinuse = 0;
1764   ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr);
1765   ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr);
1766   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1767   *v   = NULL;
1768   PetscFunctionReturn(0);
1769 }
1770 
MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1771 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1772 {
1773   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1774   PetscErrorCode ierr;
1775   PetscInt       lda;
1776 
1777   PetscFunctionBegin;
1778   if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1779   if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1780   if (!a->cvec) {
1781     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr);
1782   }
1783   a->vecinuse = col + 1;
1784   ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr);
1785   ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1786   ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr);
1787   *v   = a->cvec;
1788   PetscFunctionReturn(0);
1789 }
1790 
MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec * v)1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1792 {
1793   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1798   if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1799   a->vecinuse = 0;
1800   ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr);
1801   ierr = VecResetArray(a->cvec);CHKERRQ(ierr);
1802   *v   = NULL;
1803   PetscFunctionReturn(0);
1804 }
1805 
MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat * v)1806 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1807 {
1808   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1809   Mat_MPIDense   *c;
1810   PetscErrorCode ierr;
1811   MPI_Comm       comm;
1812   PetscBool      setup = PETSC_FALSE;
1813 
1814   PetscFunctionBegin;
1815   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1816   if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1817   if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1818   if (!a->cmat) {
1819     setup = PETSC_TRUE;
1820     ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr);
1821     ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr);
1822     ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr);
1823     ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr);
1824     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1825     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1826   } else if (cend-cbegin != a->cmat->cmap->N) {
1827     setup = PETSC_TRUE;
1828     ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr);
1829     ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr);
1830     ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr);
1831     ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr);
1832   }
1833   c = (Mat_MPIDense*)a->cmat->data;
1834   if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1835   ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr);
1836   if (setup) { /* do we really need this? */
1837     ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr);
1838   }
1839   a->cmat->preallocated = PETSC_TRUE;
1840   a->cmat->assembled = PETSC_TRUE;
1841   a->matinuse = cbegin + 1;
1842   *v = a->cmat;
1843   PetscFunctionReturn(0);
1844 }
1845 
MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat * v)1846 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1847 {
1848   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
1849   Mat_MPIDense   *c;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1854   if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1855   if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1856   a->matinuse = 0;
1857   c    = (Mat_MPIDense*)a->cmat->data;
1858   ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr);
1859   *v   = NULL;
1860   PetscFunctionReturn(0);
1861 }
1862 
MatCreate_MPIDense(Mat mat)1863 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1864 {
1865   Mat_MPIDense   *a;
1866   PetscErrorCode ierr;
1867 
1868   PetscFunctionBegin;
1869   ierr      = PetscNewLog(mat,&a);CHKERRQ(ierr);
1870   mat->data = (void*)a;
1871   ierr      = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1872 
1873   mat->insertmode = NOT_SET_VALUES;
1874 
1875   /* build cache for off array entries formed */
1876   a->donotstash = PETSC_FALSE;
1877 
1878   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr);
1879 
1880   /* stuff used for matrix vector multiply */
1881   a->lvec        = NULL;
1882   a->Mvctx       = NULL;
1883   a->roworiented = PETSC_TRUE;
1884 
1885   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr);
1889   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr);
1890   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr);
1891   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr);
1896   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr);
1897   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr);
1898   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr);
1900   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr);
1901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr);
1902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr);
1903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr);
1904 #if defined(PETSC_HAVE_ELEMENTAL)
1905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr);
1906 #endif
1907 #if defined(PETSC_HAVE_SCALAPACK)
1908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);CHKERRQ(ierr);
1909 #endif
1910 #if defined(PETSC_HAVE_CUDA)
1911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr);
1912 #endif
1913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr);
1914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1916 #if defined(PETSC_HAVE_CUDA)
1917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr);
1918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr);
1919 #endif
1920 
1921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr);
1922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr);
1923   ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 /*MC
1928    MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1929 
1930    Options Database Keys:
1931 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1932 
1933   Level: beginner
1934 
1935 .seealso:
1936 
1937 M*/
1938 #if defined(PETSC_HAVE_CUDA)
MatCreate_MPIDenseCUDA(Mat B)1939 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1940 {
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr = PetscCUDAInitializeCheck();CHKERRQ(ierr);
1945   ierr = MatCreate_MPIDense(B);CHKERRQ(ierr);
1946   ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 #endif
1950 
1951 /*MC
1952    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1953 
1954    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1955    and MATMPIDENSE otherwise.
1956 
1957    Options Database Keys:
1958 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1959 
1960   Level: beginner
1961 
1962 
1963 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1964 M*/
1965 
1966 /*MC
1967    MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1968 
1969    This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1970    and MATMPIDENSECUDA otherwise.
1971 
1972    Options Database Keys:
1973 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1974 
1975   Level: beginner
1976 
1977 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1978 M*/
1979 
1980 /*@C
1981    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1982 
1983    Collective
1984 
1985    Input Parameters:
1986 .  B - the matrix
1987 -  data - optional location of matrix data.  Set data=NULL for PETSc
1988    to control all matrix memory allocation.
1989 
1990    Notes:
1991    The dense format is fully compatible with standard Fortran 77
1992    storage by columns.
1993 
1994    The data input variable is intended primarily for Fortran programmers
1995    who wish to allocate their own matrix memory space.  Most users should
1996    set data=NULL.
1997 
1998    Level: intermediate
1999 
2000 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2001 @*/
MatMPIDenseSetPreallocation(Mat B,PetscScalar * data)2002 PetscErrorCode  MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
2003 {
2004   PetscErrorCode ierr;
2005 
2006   PetscFunctionBegin;
2007   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2008   ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr);
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 /*@
2013    MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2014    array provided by the user. This is useful to avoid copying an array
2015    into a matrix
2016 
2017    Not Collective
2018 
2019    Input Parameters:
2020 +  mat - the matrix
2021 -  array - the array in column major order
2022 
2023    Notes:
2024    You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2025    freed when the matrix is destroyed.
2026 
2027    Level: developer
2028 
2029 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2030 
2031 @*/
MatDensePlaceArray(Mat mat,const PetscScalar * array)2032 PetscErrorCode  MatDensePlaceArray(Mat mat,const PetscScalar *array)
2033 {
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2038   ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2039   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2040 #if defined(PETSC_HAVE_CUDA)
2041   mat->offloadmask = PETSC_OFFLOAD_CPU;
2042 #endif
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 /*@
2047    MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2048 
2049    Not Collective
2050 
2051    Input Parameters:
2052 .  mat - the matrix
2053 
2054    Notes:
2055    You can only call this after a call to MatDensePlaceArray()
2056 
2057    Level: developer
2058 
2059 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2060 
2061 @*/
MatDenseResetArray(Mat mat)2062 PetscErrorCode  MatDenseResetArray(Mat mat)
2063 {
2064   PetscErrorCode ierr;
2065 
2066   PetscFunctionBegin;
2067   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2068   ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2069   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 /*@
2074    MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2075    array provided by the user. This is useful to avoid copying an array
2076    into a matrix
2077 
2078    Not Collective
2079 
2080    Input Parameters:
2081 +  mat - the matrix
2082 -  array - the array in column major order
2083 
2084    Notes:
2085    The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2086    freed by the user. It will be freed when the matrix is destroyed.
2087 
2088    Level: developer
2089 
2090 .seealso: MatDenseGetArray(), VecReplaceArray()
2091 @*/
MatDenseReplaceArray(Mat mat,const PetscScalar * array)2092 PetscErrorCode  MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2093 {
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2098   ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2099   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2100 #if defined(PETSC_HAVE_CUDA)
2101   mat->offloadmask = PETSC_OFFLOAD_CPU;
2102 #endif
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 #if defined(PETSC_HAVE_CUDA)
2107 /*@C
2108    MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2109    array provided by the user. This is useful to avoid copying an array
2110    into a matrix
2111 
2112    Not Collective
2113 
2114    Input Parameters:
2115 +  mat - the matrix
2116 -  array - the array in column major order
2117 
2118    Notes:
2119    You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2120    freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2121 
2122    Level: developer
2123 
2124 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2125 @*/
MatDenseCUDAPlaceArray(Mat mat,const PetscScalar * array)2126 PetscErrorCode  MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2127 {
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2132   ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2133   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2134   mat->offloadmask = PETSC_OFFLOAD_GPU;
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 /*@C
2139    MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2140 
2141    Not Collective
2142 
2143    Input Parameters:
2144 .  mat - the matrix
2145 
2146    Notes:
2147    You can only call this after a call to MatDenseCUDAPlaceArray()
2148 
2149    Level: developer
2150 
2151 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2152 
2153 @*/
MatDenseCUDAResetArray(Mat mat)2154 PetscErrorCode  MatDenseCUDAResetArray(Mat mat)
2155 {
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2160   ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr);
2161   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 /*@C
2166    MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2167    array provided by the user. This is useful to avoid copying an array
2168    into a matrix
2169 
2170    Not Collective
2171 
2172    Input Parameters:
2173 +  mat - the matrix
2174 -  array - the array in column major order
2175 
2176    Notes:
2177    This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2178    The memory passed in CANNOT be freed by the user. It will be freed
2179    when the matrix is destroyed. The array should respect the matrix leading dimension.
2180 
2181    Level: developer
2182 
2183 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2184 @*/
MatDenseCUDAReplaceArray(Mat mat,const PetscScalar * array)2185 PetscErrorCode  MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2186 {
2187   PetscErrorCode ierr;
2188 
2189   PetscFunctionBegin;
2190   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2191   ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr);
2192   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
2193   mat->offloadmask = PETSC_OFFLOAD_GPU;
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 /*@C
2198    MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2199 
2200    Not Collective
2201 
2202    Input Parameters:
2203 .  A - the matrix
2204 
2205    Output Parameters
2206 .  array - the GPU array in column major order
2207 
2208    Notes:
2209    The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2210 
2211    Level: developer
2212 
2213 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2214 @*/
MatDenseCUDAGetArrayWrite(Mat A,PetscScalar ** a)2215 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2216 {
2217   PetscErrorCode ierr;
2218 
2219   PetscFunctionBegin;
2220   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2221   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2222   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 /*@C
2227    MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2228 
2229    Not Collective
2230 
2231    Input Parameters:
2232 +  A - the matrix
2233 -  array - the GPU array in column major order
2234 
2235    Notes:
2236 
2237    Level: developer
2238 
2239 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2240 @*/
MatDenseCUDARestoreArrayWrite(Mat A,PetscScalar ** a)2241 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2242 {
2243   PetscErrorCode ierr;
2244 
2245   PetscFunctionBegin;
2246   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2247   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2248   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2249   A->offloadmask = PETSC_OFFLOAD_GPU;
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 /*@C
2254    MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2255 
2256    Not Collective
2257 
2258    Input Parameters:
2259 .  A - the matrix
2260 
2261    Output Parameters
2262 .  array - the GPU array in column major order
2263 
2264    Notes:
2265    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2266 
2267    Level: developer
2268 
2269 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2270 @*/
MatDenseCUDAGetArrayRead(Mat A,const PetscScalar ** a)2271 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2272 {
2273   PetscErrorCode ierr;
2274 
2275   PetscFunctionBegin;
2276   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2277   ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 /*@C
2282    MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2283 
2284    Not Collective
2285 
2286    Input Parameters:
2287 +  A - the matrix
2288 -  array - the GPU array in column major order
2289 
2290    Notes:
2291    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2292 
2293    Level: developer
2294 
2295 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2296 @*/
MatDenseCUDARestoreArrayRead(Mat A,const PetscScalar ** a)2297 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 /*@C
2307    MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2308 
2309    Not Collective
2310 
2311    Input Parameters:
2312 .  A - the matrix
2313 
2314    Output Parameters
2315 .  array - the GPU array in column major order
2316 
2317    Notes:
2318    Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2319 
2320    Level: developer
2321 
2322 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2323 @*/
MatDenseCUDAGetArray(Mat A,PetscScalar ** a)2324 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2325 {
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2330   ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2331   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 /*@C
2336    MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2337 
2338    Not Collective
2339 
2340    Input Parameters:
2341 +  A - the matrix
2342 -  array - the GPU array in column major order
2343 
2344    Notes:
2345 
2346    Level: developer
2347 
2348 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2349 @*/
MatDenseCUDARestoreArray(Mat A,PetscScalar ** a)2350 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2351 {
2352   PetscErrorCode ierr;
2353 
2354   PetscFunctionBegin;
2355   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
2356   ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr);
2357   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
2358   A->offloadmask = PETSC_OFFLOAD_GPU;
2359   PetscFunctionReturn(0);
2360 }
2361 #endif
2362 
2363 /*@C
2364    MatCreateDense - Creates a matrix in dense format.
2365 
2366    Collective
2367 
2368    Input Parameters:
2369 +  comm - MPI communicator
2370 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2371 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2372 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2373 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2374 -  data - optional location of matrix data.  Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2375    to control all matrix memory allocation.
2376 
2377    Output Parameter:
2378 .  A - the matrix
2379 
2380    Notes:
2381    The dense format is fully compatible with standard Fortran 77
2382    storage by columns.
2383 
2384    The data input variable is intended primarily for Fortran programmers
2385    who wish to allocate their own matrix memory space.  Most users should
2386    set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2387 
2388    The user MUST specify either the local or global matrix dimensions
2389    (possibly both).
2390 
2391    Level: intermediate
2392 
2393 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2394 @*/
MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar * data,Mat * A)2395 PetscErrorCode  MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2396 {
2397   PetscErrorCode ierr;
2398   PetscMPIInt    size;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2402   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2403   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2404   if (size > 1) {
2405     PetscBool havedata = (PetscBool)!!data;
2406 
2407     ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr);
2408     ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2409     ierr = MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr);
2410     if (havedata) {  /* user provided data array, so no need to assemble */
2411       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2412       (*A)->assembled = PETSC_TRUE;
2413     }
2414   } else {
2415     ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr);
2416     ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr);
2417   }
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #if defined(PETSC_HAVE_CUDA)
2422 /*@C
2423    MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2424 
2425    Collective
2426 
2427    Input Parameters:
2428 +  comm - MPI communicator
2429 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2430 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2431 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2432 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2433 -  data - optional location of GPU matrix data.  Set data=NULL for PETSc
2434    to control matrix memory allocation.
2435 
2436    Output Parameter:
2437 .  A - the matrix
2438 
2439    Notes:
2440 
2441    Level: intermediate
2442 
2443 .seealso: MatCreate(), MatCreateDense()
2444 @*/
MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar * data,Mat * A)2445 PetscErrorCode  MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2446 {
2447   PetscErrorCode ierr;
2448   PetscMPIInt    size;
2449 
2450   PetscFunctionBegin;
2451   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2452   PetscValidLogicalCollectiveBool(*A,!!data,6);
2453   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2454   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2455   if (size > 1) {
2456     ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr);
2457     ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2458     if (data) {  /* user provided data array, so no need to assemble */
2459       ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr);
2460       (*A)->assembled = PETSC_TRUE;
2461     }
2462   } else {
2463     ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr);
2464     ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 #endif
2469 
MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat * newmat)2470 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2471 {
2472   Mat            mat;
2473   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;
2474   PetscErrorCode ierr;
2475 
2476   PetscFunctionBegin;
2477   *newmat = NULL;
2478   ierr    = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr);
2479   ierr    = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2480   ierr    = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr);
2481   a       = (Mat_MPIDense*)mat->data;
2482 
2483   mat->factortype   = A->factortype;
2484   mat->assembled    = PETSC_TRUE;
2485   mat->preallocated = PETSC_TRUE;
2486 
2487   mat->insertmode = NOT_SET_VALUES;
2488   a->donotstash   = oldmat->donotstash;
2489 
2490   ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr);
2491   ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr);
2492 
2493   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2494   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2495   ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr);
2496 
2497   *newmat = mat;
2498   PetscFunctionReturn(0);
2499 }
2500 
MatLoad_MPIDense(Mat newMat,PetscViewer viewer)2501 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2502 {
2503   PetscErrorCode ierr;
2504   PetscBool      isbinary;
2505 #if defined(PETSC_HAVE_HDF5)
2506   PetscBool      ishdf5;
2507 #endif
2508 
2509   PetscFunctionBegin;
2510   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2511   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2512   /* force binary viewer to load .info file if it has not yet done so */
2513   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2514   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2515 #if defined(PETSC_HAVE_HDF5)
2516   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2517 #endif
2518   if (isbinary) {
2519     ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr);
2520 #if defined(PETSC_HAVE_HDF5)
2521   } else if (ishdf5) {
2522     ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr);
2523 #endif
2524   } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2525   PetscFunctionReturn(0);
2526 }
2527 
MatEqual_MPIDense(Mat A,Mat B,PetscBool * flag)2528 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool  *flag)
2529 {
2530   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2531   Mat            a,b;
2532   PetscBool      flg;
2533   PetscErrorCode ierr;
2534 
2535   PetscFunctionBegin;
2536   a    = matA->A;
2537   b    = matB->A;
2538   ierr = MatEqual(a,b,&flg);CHKERRQ(ierr);
2539   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2540   PetscFunctionReturn(0);
2541 }
2542 
MatDestroy_MatTransMatMult_MPIDense_MPIDense(void * data)2543 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2544 {
2545   PetscErrorCode        ierr;
2546   Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2547 
2548   PetscFunctionBegin;
2549   ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr);
2550   ierr = MatDestroy(&atb->atb);CHKERRQ(ierr);
2551   ierr = PetscFree(atb);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
MatDestroy_MatMatTransMult_MPIDense_MPIDense(void * data)2555 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2556 {
2557   PetscErrorCode        ierr;
2558   Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2559 
2560   PetscFunctionBegin;
2561   ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr);
2562   ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr);
2563   ierr = PetscFree(abt);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2567 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2568 {
2569   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2570   Mat_TransMatMultDense *atb;
2571   PetscErrorCode        ierr;
2572   MPI_Comm              comm;
2573   PetscMPIInt           size,*recvcounts;
2574   PetscScalar           *carray,*sendbuf;
2575   const PetscScalar     *atbarray;
2576   PetscInt              i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2577   const PetscInt        *ranges;
2578 
2579   PetscFunctionBegin;
2580   MatCheckProduct(C,3);
2581   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2582   atb = (Mat_TransMatMultDense *)C->product->data;
2583   recvcounts = atb->recvcounts;
2584   sendbuf = atb->sendbuf;
2585 
2586   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2588 
2589   /* compute atbarray = aseq^T * bseq */
2590   ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr);
2591 
2592   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2593 
2594   /* arrange atbarray into sendbuf */
2595   ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2596   for (proc=0, k=0; proc<size; proc++) {
2597     for (j=0; j<cN; j++) {
2598       for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2599     }
2600   }
2601   ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr);
2602 
2603   /* sum all atbarray to local values of C */
2604   ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr);
2605   ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr);
2606   ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr);
2607   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2608   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2609   PetscFunctionReturn(0);
2610 }
2611 
MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2612 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2613 {
2614   PetscErrorCode        ierr;
2615   MPI_Comm              comm;
2616   PetscMPIInt           size;
2617   PetscInt              cm=A->cmap->n,cM,cN=B->cmap->N;
2618   Mat_TransMatMultDense *atb;
2619   PetscBool             cisdense;
2620   PetscInt              i;
2621   const PetscInt        *ranges;
2622 
2623   PetscFunctionBegin;
2624   MatCheckProduct(C,3);
2625   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2626   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2627   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2628     SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2629   }
2630 
2631   /* create matrix product C */
2632   ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr);
2633   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
2634   if (!cisdense) {
2635     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2636   }
2637   ierr = MatSetUp(C);CHKERRQ(ierr);
2638 
2639   /* create data structure for reuse C */
2640   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2641   ierr = PetscNew(&atb);CHKERRQ(ierr);
2642   cM   = C->rmap->N;
2643   ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr);
2644   ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr);
2645   for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2646 
2647   C->product->data    = atb;
2648   C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2649   PetscFunctionReturn(0);
2650 }
2651 
MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2652 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2653 {
2654   PetscErrorCode        ierr;
2655   MPI_Comm              comm;
2656   PetscMPIInt           i, size;
2657   PetscInt              maxRows, bufsiz;
2658   PetscMPIInt           tag;
2659   PetscInt              alg;
2660   Mat_MatTransMultDense *abt;
2661   Mat_Product           *product = C->product;
2662   PetscBool             flg;
2663 
2664   PetscFunctionBegin;
2665   MatCheckProduct(C,4);
2666   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2667   /* check local size of A and B */
2668   if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2669 
2670   ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr);
2671   alg  = flg ? 0 : 1;
2672 
2673   /* setup matrix product C */
2674   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr);
2675   ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr);
2676   ierr = MatSetUp(C);CHKERRQ(ierr);
2677   ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr);
2678 
2679   /* create data structure for reuse C */
2680   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2681   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2682   ierr = PetscNew(&abt);CHKERRQ(ierr);
2683   abt->tag = tag;
2684   abt->alg = alg;
2685   switch (alg) {
2686   case 1: /* alg: "cyclic" */
2687     for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2688     bufsiz = A->cmap->N * maxRows;
2689     ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr);
2690     break;
2691   default: /* alg: "allgatherv" */
2692     ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr);
2693     ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr);
2694     for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2695     for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2696     break;
2697   }
2698 
2699   C->product->data    = abt;
2700   C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2701   C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2702   PetscFunctionReturn(0);
2703 }
2704 
MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A,Mat B,Mat C)2705 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2706 {
2707   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2708   Mat_MatTransMultDense *abt;
2709   PetscErrorCode        ierr;
2710   MPI_Comm              comm;
2711   PetscMPIInt           rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2712   PetscScalar           *sendbuf, *recvbuf=NULL, *cv;
2713   PetscInt              i,cK=A->cmap->N,k,j,bn;
2714   PetscScalar           _DOne=1.0,_DZero=0.0;
2715   const PetscScalar     *av,*bv;
2716   PetscBLASInt          cm, cn, ck, alda, blda = 0, clda;
2717   MPI_Request           reqs[2];
2718   const PetscInt        *ranges;
2719 
2720   PetscFunctionBegin;
2721   MatCheckProduct(C,3);
2722   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2723   abt  = (Mat_MatTransMultDense*)C->product->data;
2724   ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr);
2725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2726   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2727   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2728   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2729   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2730   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2731   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2732   ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr);
2733   ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr);
2734   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2735   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2736   ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr);
2737   bn   = B->rmap->n;
2738   if (blda == bn) {
2739     sendbuf = (PetscScalar*)bv;
2740   } else {
2741     sendbuf = abt->buf[0];
2742     for (k = 0, i = 0; i < cK; i++) {
2743       for (j = 0; j < bn; j++, k++) {
2744         sendbuf[k] = bv[i * blda + j];
2745       }
2746     }
2747   }
2748   if (size > 1) {
2749     sendto = (rank + size - 1) % size;
2750     recvfrom = (rank + size + 1) % size;
2751   } else {
2752     sendto = recvfrom = 0;
2753   }
2754   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2755   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2756   recvisfrom = rank;
2757   for (i = 0; i < size; i++) {
2758     /* we have finished receiving in sending, bufs can be read/modified */
2759     PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2760     PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2761 
2762     if (nextrecvisfrom != rank) {
2763       /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2764       sendsiz = cK * bn;
2765       recvsiz = cK * nextbn;
2766       recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2767       ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr);
2768       ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr);
2769     }
2770 
2771     /* local aseq * sendbuf^T */
2772     ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr);
2773     if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2774 
2775     if (nextrecvisfrom != rank) {
2776       /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2777       ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2778     }
2779     bn = nextbn;
2780     recvisfrom = nextrecvisfrom;
2781     sendbuf = recvbuf;
2782   }
2783   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2784   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2785   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2786   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2787   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A,Mat B,Mat C)2791 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2792 {
2793   Mat_MPIDense          *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2794   Mat_MatTransMultDense *abt;
2795   PetscErrorCode        ierr;
2796   MPI_Comm              comm;
2797   PetscMPIInt           size;
2798   PetscScalar           *cv, *sendbuf, *recvbuf;
2799   const PetscScalar     *av,*bv;
2800   PetscInt              blda,i,cK=A->cmap->N,k,j,bn;
2801   PetscScalar           _DOne=1.0,_DZero=0.0;
2802   PetscBLASInt          cm, cn, ck, alda, clda;
2803 
2804   PetscFunctionBegin;
2805   MatCheckProduct(C,3);
2806   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2807   abt  = (Mat_MatTransMultDense*)C->product->data;
2808   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2810   ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr);
2811   ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr);
2812   ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr);
2813   ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr);
2814   ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr);
2815   ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr);
2816   ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr);
2817   ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr);
2818   /* copy transpose of B into buf[0] */
2819   bn      = B->rmap->n;
2820   sendbuf = abt->buf[0];
2821   recvbuf = abt->buf[1];
2822   for (k = 0, j = 0; j < bn; j++) {
2823     for (i = 0; i < cK; i++, k++) {
2824       sendbuf[k] = bv[i * blda + j];
2825     }
2826   }
2827   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2828   ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr);
2829   ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr);
2830   ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr);
2831   ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr);
2832   if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2833   ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr);
2834   ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr);
2835   ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr);
2836   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2841 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2842 {
2843   Mat_MatTransMultDense *abt;
2844   PetscErrorCode        ierr;
2845 
2846   PetscFunctionBegin;
2847   MatCheckProduct(C,3);
2848   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2849   abt = (Mat_MatTransMultDense*)C->product->data;
2850   switch (abt->alg) {
2851   case 1:
2852     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr);
2853     break;
2854   default:
2855     ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr);
2856     break;
2857   }
2858   PetscFunctionReturn(0);
2859 }
2860 
MatDestroy_MatMatMult_MPIDense_MPIDense(void * data)2861 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2862 {
2863   PetscErrorCode   ierr;
2864   Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2865 
2866   PetscFunctionBegin;
2867   ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr);
2868   ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr);
2869   ierr = MatDestroy(&ab->Be);CHKERRQ(ierr);
2870   ierr = PetscFree(ab);CHKERRQ(ierr);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #if defined(PETSC_HAVE_ELEMENTAL)
MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)2875 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2876 {
2877   PetscErrorCode   ierr;
2878   Mat_MatMultDense *ab;
2879 
2880   PetscFunctionBegin;
2881   MatCheckProduct(C,3);
2882   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2883   ab   = (Mat_MatMultDense*)C->product->data;
2884   ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr);
2885   ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr);
2886   ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr);
2887   ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890 
MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)2891 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2892 {
2893   PetscErrorCode   ierr;
2894   Mat              Ae,Be,Ce;
2895   Mat_MatMultDense *ab;
2896 
2897   PetscFunctionBegin;
2898   MatCheckProduct(C,4);
2899   if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2900   /* check local size of A and B */
2901   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2902     SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2903   }
2904 
2905   /* create elemental matrices Ae and Be */
2906   ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr);
2907   ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2908   ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr);
2909   ierr = MatSetUp(Ae);CHKERRQ(ierr);
2910   ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2911 
2912   ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr);
2913   ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr);
2914   ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr);
2915   ierr = MatSetUp(Be);CHKERRQ(ierr);
2916   ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2917 
2918   /* compute symbolic Ce = Ae*Be */
2919   ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr);
2920   ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr);
2921 
2922   /* setup C */
2923   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2924   ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr);
2925   ierr = MatSetUp(C);CHKERRQ(ierr);
2926 
2927   /* create data structure for reuse Cdense */
2928   ierr = PetscNew(&ab);CHKERRQ(ierr);
2929   ab->Ae = Ae;
2930   ab->Be = Be;
2931   ab->Ce = Ce;
2932 
2933   C->product->data    = ab;
2934   C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2935   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2936   PetscFunctionReturn(0);
2937 }
2938 #endif
2939 /* ----------------------------------------------- */
2940 #if defined(PETSC_HAVE_ELEMENTAL)
MatProductSetFromOptions_MPIDense_AB(Mat C)2941 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2942 {
2943   PetscFunctionBegin;
2944   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2945   C->ops->productsymbolic = MatProductSymbolic_AB;
2946   PetscFunctionReturn(0);
2947 }
2948 #endif
2949 
MatProductSetFromOptions_MPIDense_AtB(Mat C)2950 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2951 {
2952   Mat_Product *product = C->product;
2953   Mat         A = product->A,B=product->B;
2954 
2955   PetscFunctionBegin;
2956   if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2957     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2958   C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2959   C->ops->productsymbolic = MatProductSymbolic_AtB;
2960   PetscFunctionReturn(0);
2961 }
2962 
MatProductSetFromOptions_MPIDense_ABt(Mat C)2963 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2964 {
2965   PetscErrorCode ierr;
2966   Mat_Product    *product = C->product;
2967   const char     *algTypes[2] = {"allgatherv","cyclic"};
2968   PetscInt       alg,nalg = 2;
2969   PetscBool      flg = PETSC_FALSE;
2970 
2971   PetscFunctionBegin;
2972   /* Set default algorithm */
2973   alg = 0; /* default is allgatherv */
2974   ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr);
2975   if (flg) {
2976     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2977   }
2978 
2979   /* Get runtime option */
2980   if (product->api_user) {
2981     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr);
2982     ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2983     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2984   } else {
2985     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr);
2986     ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr);
2987     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2988   }
2989   if (flg) {
2990     ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr);
2991   }
2992 
2993   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2994   C->ops->productsymbolic          = MatProductSymbolic_ABt;
2995   PetscFunctionReturn(0);
2996 }
2997 
MatProductSetFromOptions_MPIDense(Mat C)2998 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2999 {
3000   PetscErrorCode ierr;
3001   Mat_Product    *product = C->product;
3002 
3003   PetscFunctionBegin;
3004   switch (product->type) {
3005 #if defined(PETSC_HAVE_ELEMENTAL)
3006   case MATPRODUCT_AB:
3007     ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr);
3008     break;
3009 #endif
3010   case MATPRODUCT_AtB:
3011     ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr);
3012     break;
3013   case MATPRODUCT_ABt:
3014     ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr);
3015     break;
3016   default:
3017     break;
3018   }
3019   PetscFunctionReturn(0);
3020 }
3021