1 #include <petsc/private/petscscalapack.h>  /*I "petscmat.h" I*/
2 
3 #define DEFAULT_BLOCKSIZE 64
4 
5 /*
6     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10 
MatView_ScaLAPACK(Mat A,PetscViewer viewer)11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12 {
13   PetscErrorCode    ierr;
14   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
15   PetscBool         iascii;
16   PetscViewerFormat format;
17   Mat               Adense;
18 
19   PetscFunctionBegin;
20   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21   if (iascii) {
22     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24       ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25       ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26       ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27       ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28       PetscFunctionReturn(0);
29     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30       PetscFunctionReturn(0);
31     }
32   }
33   /* convert to dense format and call MatView() */
34   ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35   ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo * info)40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41 {
42   PetscErrorCode ierr;
43   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
44   PetscLogDouble isend[2],irecv[2];
45 
46   PetscFunctionBegin;
47   info->block_size = 1.0;
48 
49   isend[0] = a->lld*a->locc;     /* locally allocated */
50   isend[1] = a->locr*a->locc;    /* used submatrix */
51   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52     info->nz_allocated   = isend[0];
53     info->nz_used        = isend[1];
54   } else if (flag == MAT_GLOBAL_MAX) {
55     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56     info->nz_allocated   = irecv[0];
57     info->nz_used        = irecv[1];
58   } else if (flag == MAT_GLOBAL_SUM) {
59     ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60     info->nz_allocated   = irecv[0];
61     info->nz_used        = irecv[1];
62   }
63 
64   info->nz_unneeded       = 0;
65   info->assemblies        = A->num_ass;
66   info->mallocs           = 0;
67   info->memory            = ((PetscObject)A)->mem;
68   info->fill_ratio_given  = 0;
69   info->fill_ratio_needed = 0;
70   info->factor_mallocs    = 0;
71   PetscFunctionReturn(0);
72 }
73 
MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75 {
76   PetscFunctionBegin;
77   switch (op) {
78     case MAT_NEW_NONZERO_LOCATIONS:
79     case MAT_NEW_NONZERO_LOCATION_ERR:
80     case MAT_NEW_NONZERO_ALLOCATION_ERR:
81     case MAT_SYMMETRIC:
82     case MAT_SORTED_FULL:
83     case MAT_HERMITIAN:
84       break;
85     default:
86       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87   }
88   PetscFunctionReturn(0);
89 }
90 
MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt * rows,PetscInt nc,const PetscInt * cols,const PetscScalar * vals,InsertMode imode)91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92 {
93   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
94   PetscErrorCode ierr;
95   PetscInt       i,j;
96   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
97 
98   PetscFunctionBegin;
99   for (i=0;i<nr;i++) {
100     if (rows[i] < 0) continue;
101     ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102     for (j=0;j<nc;j++) {
103       if (cols[j] < 0) continue;
104       ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107         switch (imode) {
108           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110           default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111         }
112       } else {
113         if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114         A->assembled = PETSC_FALSE;
115         ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116       }
117     }
118   }
119   PetscFunctionReturn(0);
120 }
121 
MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar * x,PetscScalar * y)122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123 {
124   PetscErrorCode ierr;
125   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
126   PetscScalar    *x2d,*y2d,alpha=1.0;
127   const PetscInt *ranges;
128   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129 
130   PetscFunctionBegin;
131   if (transpose) {
132 
133     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
136     xlld = PetscMax(1,A->rmap->n);
137     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138     PetscCheckScaLapackInfo("descinit",info);
139     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* y block size */
141     ylld = 1;
142     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143     PetscCheckScaLapackInfo("descinit",info);
144 
145     /* allocate 2d vectors */
146     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149     xlld = PetscMax(1,lszx);
150 
151     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153     PetscCheckScaLapackInfo("descinit",info);
154     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155     PetscCheckScaLapackInfo("descinit",info);
156 
157     /* redistribute x as a column of a 2d matrix */
158     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159 
160     /* redistribute y as a row of a 2d matrix */
161     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162 
163     /* call PBLAS subroutine */
164     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165 
166     /* redistribute y from a row of a 2d matrix */
167     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168 
169   } else {   /* non-transpose */
170 
171     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* x block size */
174     xlld = 1;
175     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176     PetscCheckScaLapackInfo("descinit",info);
177     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* y block size */
179     ylld = PetscMax(1,A->rmap->n);
180     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181     PetscCheckScaLapackInfo("descinit",info);
182 
183     /* allocate 2d vectors */
184     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186     ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187     ylld = PetscMax(1,lszy);
188 
189     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191     PetscCheckScaLapackInfo("descinit",info);
192     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193     PetscCheckScaLapackInfo("descinit",info);
194 
195     /* redistribute x as a row of a 2d matrix */
196     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197 
198     /* redistribute y as a column of a 2d matrix */
199     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200 
201     /* call PBLAS subroutine */
202     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203 
204     /* redistribute y from a column of a 2d matrix */
205     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206 
207   }
208   ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
MatMult_ScaLAPACK(Mat A,Vec x,Vec y)212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213 {
214   PetscErrorCode    ierr;
215   const PetscScalar *xarray;
216   PetscScalar       *yarray;
217 
218   PetscFunctionBegin;
219   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229   PetscErrorCode    ierr;
230   const PetscScalar *xarray;
231   PetscScalar       *yarray;
232 
233   PetscFunctionBegin;
234   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235   ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238   ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243 {
244   PetscErrorCode    ierr;
245   const PetscScalar *xarray;
246   PetscScalar       *zarray;
247 
248   PetscFunctionBegin;
249   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259 {
260   PetscErrorCode    ierr;
261   const PetscScalar *xarray;
262   PetscScalar       *zarray;
263 
264   PetscFunctionBegin;
265   if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266   ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267   ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268   ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269   ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270   ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275 {
276   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279   PetscScalar   sone=1.0,zero=0.0;
280   PetscBLASInt  one=1;
281 
282   PetscFunctionBegin;
283   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284   C->assembled = PETSC_TRUE;
285   PetscFunctionReturn(0);
286 }
287 
MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289 {
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295   ierr = MatSetUp(C);CHKERRQ(ierr);
296   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297   PetscFunctionReturn(0);
298 }
299 
MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301 {
302   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305   PetscScalar   sone=1.0,zero=0.0;
306   PetscBLASInt  one=1;
307 
308   PetscFunctionBegin;
309   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310   C->assembled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315 {
316   PetscErrorCode ierr;
317 
318   PetscFunctionBegin;
319   ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320   ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321   ierr = MatSetUp(C);CHKERRQ(ierr);
322   PetscFunctionReturn(0);
323 }
324 
325 /* --------------------------------------- */
MatProductSetFromOptions_ScaLAPACK_AB(Mat C)326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327 {
328   PetscFunctionBegin;
329   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330   C->ops->productsymbolic = MatProductSymbolic_AB;
331   PetscFunctionReturn(0);
332 }
333 
MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335 {
336   PetscFunctionBegin;
337   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338   C->ops->productsymbolic          = MatProductSymbolic_ABt;
339   PetscFunctionReturn(0);
340 }
341 
MatProductSetFromOptions_ScaLAPACK(Mat C)342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343 {
344   PetscErrorCode ierr;
345   Mat_Product    *product = C->product;
346 
347   PetscFunctionBegin;
348   switch (product->type) {
349     case MATPRODUCT_AB:
350       ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351       break;
352     case MATPRODUCT_ABt:
353       ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354       break;
355     default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356   }
357   PetscFunctionReturn(0);
358 }
359 /* --------------------------------------- */
360 
MatGetDiagonal_ScaLAPACK(Mat A,Vec D)361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362 {
363   PetscErrorCode    ierr;
364   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
365   PetscScalar       *darray,*d2d,v;
366   const PetscInt    *ranges;
367   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368 
369   PetscFunctionBegin;
370   ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371 
372   if (A->rmap->N<=A->cmap->N) {   /* row version */
373 
374     /* create ScaLAPACK descriptor for vector (1d block distribution) */
375     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
377     dlld = PetscMax(1,A->rmap->n);
378     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379     PetscCheckScaLapackInfo("descinit",info);
380 
381     /* allocate 2d vector */
382     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384     dlld = PetscMax(1,lszd);
385 
386     /* create ScaLAPACK descriptor for vector (2d block distribution) */
387     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388     PetscCheckScaLapackInfo("descinit",info);
389 
390     /* collect diagonal */
391     for (j=1;j<=a->M;j++) {
392       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394     }
395 
396     /* redistribute d from a column of a 2d matrix */
397     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398     ierr = PetscFree(d2d);CHKERRQ(ierr);
399 
400   } else {   /* column version */
401 
402     /* create ScaLAPACK descriptor for vector (1d block distribution) */
403     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
405     dlld = 1;
406     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407     PetscCheckScaLapackInfo("descinit",info);
408 
409     /* allocate 2d vector */
410     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412 
413     /* create ScaLAPACK descriptor for vector (2d block distribution) */
414     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415     PetscCheckScaLapackInfo("descinit",info);
416 
417     /* collect diagonal */
418     for (j=1;j<=a->N;j++) {
419       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421     }
422 
423     /* redistribute d from a row of a 2d matrix */
424     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425     ierr = PetscFree(d2d);CHKERRQ(ierr);
426   }
427 
428   ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429   ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430   ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431   PetscFunctionReturn(0);
432 }
433 
MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435 {
436   PetscErrorCode    ierr;
437   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
438   const PetscScalar *d;
439   const PetscInt    *ranges;
440   PetscScalar       *d2d;
441   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442 
443   PetscFunctionBegin;
444   if (R) {
445     ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446     /* create ScaLAPACK descriptor for vector (1d block distribution) */
447     ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448     ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr);  /* D block size */
449     dlld = 1;
450     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451     PetscCheckScaLapackInfo("descinit",info);
452 
453     /* allocate 2d vector */
454     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456 
457     /* create ScaLAPACK descriptor for vector (2d block distribution) */
458     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459     PetscCheckScaLapackInfo("descinit",info);
460 
461     /* redistribute d to a row of a 2d matrix */
462     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463 
464     /* broadcast along process columns */
465     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467 
468     /* local scaling */
469     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470 
471     ierr = PetscFree(d2d);CHKERRQ(ierr);
472     ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473   }
474   if (L) {
475     ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476     /* create ScaLAPACK descriptor for vector (1d block distribution) */
477     ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478     ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* D block size */
479     dlld = PetscMax(1,A->rmap->n);
480     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481     PetscCheckScaLapackInfo("descinit",info);
482 
483     /* allocate 2d vector */
484     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485     ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486     dlld = PetscMax(1,lszd);
487 
488     /* create ScaLAPACK descriptor for vector (2d block distribution) */
489     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490     PetscCheckScaLapackInfo("descinit",info);
491 
492     /* redistribute d to a column of a 2d matrix */
493     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494 
495     /* broadcast along process rows */
496     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498 
499     /* local scaling */
500     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501 
502     ierr = PetscFree(d2d);CHKERRQ(ierr);
503     ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool * missing,PetscInt * d)508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509 {
510   PetscFunctionBegin;
511   *missing = PETSC_FALSE;
512   PetscFunctionReturn(0);
513 }
514 
MatScale_ScaLAPACK(Mat X,PetscScalar a)515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516 {
517   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518   PetscBLASInt  n,one=1;
519 
520   PetscFunctionBegin;
521   n = x->lld*x->locc;
522   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523   PetscFunctionReturn(0);
524 }
525 
MatShift_ScaLAPACK(Mat X,PetscScalar alpha)526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527 {
528   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529   PetscBLASInt  i,n;
530   PetscScalar   v;
531 
532   PetscFunctionBegin;
533   n = PetscMin(x->M,x->N);
534   for (i=1;i<=n;i++) {
535     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536     v += alpha;
537     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538   }
539   PetscFunctionReturn(0);
540 }
541 
MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543 {
544   PetscErrorCode ierr;
545   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
546   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
547   PetscBLASInt   one=1;
548   PetscScalar    beta=1.0;
549 
550   PetscFunctionBegin;
551   MatScaLAPACKCheckDistribution(Y,1,X,3);
552   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553   ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558 {
559   PetscErrorCode ierr;
560   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
561   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;
562 
563   PetscFunctionBegin;
564   ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566   PetscFunctionReturn(0);
567 }
568 
MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat * B)569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570 {
571   Mat            Bs;
572   MPI_Comm       comm;
573   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;
574   PetscErrorCode ierr;
575 
576   PetscFunctionBegin;
577   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578   ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579   ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580   ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581   b = (Mat_ScaLAPACK*)Bs->data;
582   b->M    = a->M;
583   b->N    = a->N;
584   b->mb   = a->mb;
585   b->nb   = a->nb;
586   b->rsrc = a->rsrc;
587   b->csrc = a->csrc;
588   ierr = MatSetUp(Bs);CHKERRQ(ierr);
589   *B = Bs;
590   if (op == MAT_COPY_VALUES) {
591     ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592   }
593   Bs->assembled = PETSC_TRUE;
594   PetscFunctionReturn(0);
595 }
596 
MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598 {
599   PetscErrorCode ierr;
600   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
601   Mat            Bs = *B;
602   PetscBLASInt   one=1;
603   PetscScalar    sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605   PetscInt       i,j;
606 #endif
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613     ierr = MatSetUp(Bs);CHKERRQ(ierr);
614     *B = Bs;
615   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616   b = (Mat_ScaLAPACK*)Bs->data;
617   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618 #if defined(PETSC_USE_COMPLEX)
619   /* undo conjugation */
620   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621 #endif
622   Bs->assembled = PETSC_TRUE;
623   PetscFunctionReturn(0);
624 }
625 
MatConjugate_ScaLAPACK(Mat A)626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627 {
628   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629   PetscInt      i,j;
630 
631   PetscFunctionBegin;
632   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633   PetscFunctionReturn(0);
634 }
635 
MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637 {
638   PetscErrorCode ierr;
639   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
640   Mat            Bs = *B;
641   PetscBLASInt   one=1;
642   PetscScalar    sone=1.0,zero=0.0;
643 
644   PetscFunctionBegin;
645   if (reuse == MAT_INITIAL_MATRIX) {
646     ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647     ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648     ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649     ierr = MatSetUp(Bs);CHKERRQ(ierr);
650     *B = Bs;
651   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652   b = (Mat_ScaLAPACK*)Bs->data;
653   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654   Bs->assembled = PETSC_TRUE;
655   PetscFunctionReturn(0);
656 }
657 
MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659 {
660   PetscErrorCode ierr;
661   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
662   PetscScalar    *x,*x2d;
663   const PetscInt *ranges;
664   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665 
666   PetscFunctionBegin;
667   ierr = VecCopy(B,X);CHKERRQ(ierr);
668   ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669 
670   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672   ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr);  /* x block size */
673   xlld = PetscMax(1,A->rmap->n);
674   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675   PetscCheckScaLapackInfo("descinit",info);
676 
677   /* allocate 2d vector */
678   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679   ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680   xlld = PetscMax(1,lszx);
681 
682   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684   PetscCheckScaLapackInfo("descinit",info);
685 
686   /* redistribute x as a column of a 2d matrix */
687   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688 
689   /* call ScaLAPACK subroutine */
690   switch (A->factortype) {
691     case MAT_FACTOR_LU:
692       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693       PetscCheckScaLapackInfo("getrs",info);
694       break;
695     case MAT_FACTOR_CHOLESKY:
696       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697       PetscCheckScaLapackInfo("potrs",info);
698       break;
699     default:
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701       break;
702   }
703 
704   /* redistribute x from a column of a 2d matrix */
705   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
706 
707   ierr = PetscFree(x2d);CHKERRQ(ierr);
708   ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)712 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713 {
714   PetscErrorCode ierr;
715 
716   PetscFunctionBegin;
717   ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
718   ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)722 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723 {
724   PetscErrorCode ierr;
725   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726   PetscBool      flg1,flg2;
727   PetscBLASInt   one=1,info;
728 
729   PetscFunctionBegin;
730   ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
732   if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733   MatScaLAPACKCheckDistribution(B,1,X,2);
734   b = (Mat_ScaLAPACK*)B->data;
735   x = (Mat_ScaLAPACK*)X->data;
736   ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
737 
738   switch (A->factortype) {
739     case MAT_FACTOR_LU:
740       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
741       PetscCheckScaLapackInfo("getrs",info);
742       break;
743     case MAT_FACTOR_CHOLESKY:
744       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
745       PetscCheckScaLapackInfo("potrs",info);
746       break;
747     default:
748       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749       break;
750   }
751   PetscFunctionReturn(0);
752 }
753 
MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo * factorinfo)754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755 {
756   PetscErrorCode ierr;
757   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
758   PetscBLASInt   one=1,info;
759 
760   PetscFunctionBegin;
761   if (!a->pivots) {
762     ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
763     ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
764   }
765   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766   PetscCheckScaLapackInfo("getrf",info);
767   A->factortype = MAT_FACTOR_LU;
768   A->assembled  = PETSC_TRUE;
769 
770   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
771   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776 {
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781   ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
782   PetscFunctionReturn(0);
783 }
784 
MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786 {
787   PetscFunctionBegin;
788   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789   PetscFunctionReturn(0);
790 }
791 
MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo * factorinfo)792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793 {
794   PetscErrorCode ierr;
795   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
796   PetscBLASInt   one=1,info;
797 
798   PetscFunctionBegin;
799   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
800   PetscCheckScaLapackInfo("potrf",info);
801   A->factortype = MAT_FACTOR_CHOLESKY;
802   A->assembled  = PETSC_TRUE;
803 
804   ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
805   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)809 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810 {
811   PetscErrorCode ierr;
812 
813   PetscFunctionBegin;
814   ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815   ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo * info)819 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820 {
821   PetscFunctionBegin;
822   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823   PetscFunctionReturn(0);
824 }
825 
MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType * type)826 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827 {
828   PetscFunctionBegin;
829   *type = MATSOLVERSCALAPACK;
830   PetscFunctionReturn(0);
831 }
832 
MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat * F)833 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834 {
835   Mat            B;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   /* Create the factorization matrix */
840   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
841   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
842   ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
843   ierr = MatSetUp(B);CHKERRQ(ierr);
844   B->factortype = ftype;
845   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846   ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847 
848   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849   *F = B;
850   PetscFunctionReturn(0);
851 }
852 
MatSolverTypeRegister_ScaLAPACK(void)853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854 {
855   PetscErrorCode ierr;
856 
857   PetscFunctionBegin;
858   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859   ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal * nrm)863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864 {
865   PetscErrorCode ierr;
866   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
867   PetscBLASInt   one=1,lwork=0;
868   const char     *ntype;
869   PetscScalar    *work=NULL,dummy;
870 
871   PetscFunctionBegin;
872   switch (type){
873     case NORM_1:
874       ntype = "1";
875       lwork = PetscMax(a->locr,a->locc);
876       break;
877     case NORM_FROBENIUS:
878       ntype = "F";
879       work  = &dummy;
880       break;
881     case NORM_INFINITY:
882       ntype = "I";
883       lwork = PetscMax(a->locr,a->locc);
884       break;
885     default:
886       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887   }
888   if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890   if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891   PetscFunctionReturn(0);
892 }
893 
MatZeroEntries_ScaLAPACK(Mat A)894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895 {
896   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
897   PetscErrorCode ierr;
898 
899   PetscFunctionBegin;
900   ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
MatGetOwnershipIS_ScaLAPACK(Mat A,IS * rows,IS * cols)904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905 {
906   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;
909 
910   PetscFunctionBegin;
911   if (rows) {
912     n     = a->locr;
913     nb    = a->mb;
914     isrc  = a->rsrc;
915     nproc = a->grid->nprow;
916     iproc = a->grid->myrow;
917     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920   }
921   if (cols) {
922     n     = a->locc;
923     nb    = a->nb;
924     isrc  = a->csrc;
925     nproc = a->grid->npcol;
926     iproc = a->grid->mycol;
927     ierr  = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat * B)934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935 {
936   PetscErrorCode ierr;
937   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
938   Mat            Bmpi;
939   MPI_Comm       comm;
940   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
941   const PetscInt *ranges,*branges,*cwork;
942   const PetscScalar *vwork;
943   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
944   PetscScalar    *barray;
945   PetscBool      differ=PETSC_FALSE;
946   PetscMPIInt    size;
947 
948   PetscFunctionBegin;
949   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
950   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
951 
952   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
953     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
954     ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
955     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
956   }
957 
958   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
959     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
960     m = PETSC_DECIDE;
961     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
962     n = PETSC_DECIDE;
963     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
964     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
965     ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
966     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
967 
968     /* create ScaLAPACK descriptor for B (1d block distribution) */
969     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
970     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
971     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
972     PetscCheckScaLapackInfo("descinit",info);
973 
974     /* redistribute matrix */
975     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
976     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
977     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
978     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980 
981     /* transfer rows of auxiliary matrix to the final matrix B */
982     ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
983     for (i=rstart;i<rend;i++) {
984       ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
985       ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
986       ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
987     }
988     ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
989     ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
991 
992   } else {  /* normal cases */
993 
994     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
995     else {
996       ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
997       m = PETSC_DECIDE;
998       ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
999       n = PETSC_DECIDE;
1000       ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1001       ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1002       ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1003       ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1004     }
1005 
1006     /* create ScaLAPACK descriptor for B (1d block distribution) */
1007     ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr);  /* row block size */
1008     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
1009     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1010     PetscCheckScaLapackInfo("descinit",info);
1011 
1012     /* redistribute matrix */
1013     ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1014     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1015     ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1016 
1017     ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1018     ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1019     if (reuse == MAT_INPLACE_MATRIX) {
1020       ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1021     } else *B = Bmpi;
1022   }
1023   PetscFunctionReturn(0);
1024 }
1025 
MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * B)1026 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1027 {
1028   PetscErrorCode ierr;
1029   Mat_ScaLAPACK  *b;
1030   Mat            Bmpi;
1031   MPI_Comm       comm;
1032   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
1033   const PetscInt *ranges;
1034   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
1035   PetscScalar    *aarray;
1036   PetscInt       lda;
1037 
1038   PetscFunctionBegin;
1039   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1040 
1041   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1042   else {
1043     ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1044     m = PETSC_DECIDE;
1045     ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1046     n = PETSC_DECIDE;
1047     ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1048     ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1049     ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1050     ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1051   }
1052   b = (Mat_ScaLAPACK*)Bmpi->data;
1053 
1054   /* create ScaLAPACK descriptor for A (1d block distribution) */
1055   ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1056   ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr);  /* row block size */
1057   ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1058   lld = PetscMax(lda,1);  /* local leading dimension */
1059   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1060   PetscCheckScaLapackInfo("descinit",info);
1061 
1062   /* redistribute matrix */
1063   ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1064   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1065   ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1066 
1067   ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1068   ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069   if (reuse == MAT_INPLACE_MATRIX) {
1070     ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1071   } else *B = Bmpi;
1072   PetscFunctionReturn(0);
1073 }
1074 
MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1075 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1076 {
1077   Mat               mat_scal;
1078   PetscErrorCode    ierr;
1079   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1080   const PetscInt    *cols;
1081   const PetscScalar *vals;
1082 
1083   PetscFunctionBegin;
1084   if (reuse == MAT_REUSE_MATRIX) {
1085     mat_scal = *newmat;
1086     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1087   } else {
1088     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1089     m = PETSC_DECIDE;
1090     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1091     n = PETSC_DECIDE;
1092     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1093     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1094     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1095     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1096   }
1097   for (row=rstart;row<rend;row++) {
1098     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1099     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1100     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1101   }
1102   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1103   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104 
1105   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1106   else *newmat = mat_scal;
1107   PetscFunctionReturn(0);
1108 }
1109 
MatConvert_SBAIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1110 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1111 {
1112   Mat               mat_scal;
1113   PetscErrorCode    ierr;
1114   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1115   const PetscInt    *cols;
1116   const PetscScalar *vals;
1117   PetscScalar       v;
1118 
1119   PetscFunctionBegin;
1120   if (reuse == MAT_REUSE_MATRIX) {
1121     mat_scal = *newmat;
1122     ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1123   } else {
1124     ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1125     m = PETSC_DECIDE;
1126     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1127     n = PETSC_DECIDE;
1128     ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1129     ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1130     ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1131     ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1132   }
1133   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1134   for (row=rstart;row<rend;row++) {
1135     ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1136     ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1137     for (j=0;j<ncols;j++) { /* lower triangular part */
1138       if (cols[j] == row) continue;
1139       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1140       ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1141     }
1142     ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1143   }
1144   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1145   ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146   ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147 
1148   if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1149   else *newmat = mat_scal;
1150   PetscFunctionReturn(0);
1151 }
1152 
MatScaLAPACKSetPreallocation(Mat A)1153 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1154 {
1155   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1156   PetscErrorCode ierr;
1157   PetscInt       sz=0;
1158 
1159   PetscFunctionBegin;
1160   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1161   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1162   if (!a->lld) a->lld = a->locr;
1163 
1164   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1165   ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1166   ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1167   ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1168 
1169   A->preallocated = PETSC_TRUE;
1170   PetscFunctionReturn(0);
1171 }
1172 
MatDestroy_ScaLAPACK(Mat A)1173 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1174 {
1175   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1176   PetscErrorCode     ierr;
1177   Mat_ScaLAPACK_Grid *grid;
1178   PetscBool          flg;
1179   MPI_Comm           icomm;
1180 
1181   PetscFunctionBegin;
1182   ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1183   ierr = PetscFree(a->loc);CHKERRQ(ierr);
1184   ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1185   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1186   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1187   if (--grid->grid_refct == 0) {
1188     Cblacs_gridexit(grid->ictxt);
1189     Cblacs_gridexit(grid->ictxrow);
1190     Cblacs_gridexit(grid->ictxcol);
1191     ierr = PetscFree(grid);CHKERRQ(ierr);
1192     ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1193   }
1194   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1199   ierr = PetscFree(A->data);CHKERRQ(ierr);
1200   PetscFunctionReturn(0);
1201 }
1202 
MatScaLAPACKCheckLayout(PetscLayout map)1203 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1204 {
1205   PetscErrorCode ierr;
1206   const PetscInt *ranges;
1207   PetscMPIInt    size;
1208   PetscInt       i,n;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1212   if (size>2) {
1213     ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1214     n = ranges[1]-ranges[0];
1215     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1216     if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
MatSetUp_ScaLAPACK(Mat A)1221 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1222 {
1223   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1224   PetscErrorCode ierr;
1225   PetscBLASInt   info=0;
1226 
1227   PetscFunctionBegin;
1228   ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1229   ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1230 
1231   /* check that the layout is as enforced by MatCreateScaLAPACK */
1232   ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1233   ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1234 
1235   /* compute local sizes */
1236   ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1237   ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1238   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1239   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1240   a->lld  = PetscMax(1,a->locr);
1241 
1242   /* allocate local array */
1243   ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1244 
1245   /* set up ScaLAPACK descriptor */
1246   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1247   PetscCheckScaLapackInfo("descinit",info);
1248   PetscFunctionReturn(0);
1249 }
1250 
MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)1251 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1252 {
1253   PetscErrorCode ierr;
1254   PetscInt       nstash,reallocs;
1255 
1256   PetscFunctionBegin;
1257   if (A->nooffprocentries) PetscFunctionReturn(0);
1258   ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1259   ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1260   ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)1264 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265 {
1266   PetscErrorCode ierr;
1267   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1268   PetscMPIInt    n;
1269   PetscInt       i,flg,*row,*col;
1270   PetscScalar    *val;
1271   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;
1272 
1273   PetscFunctionBegin;
1274   if (A->nooffprocentries) PetscFunctionReturn(0);
1275   while (1) {
1276     ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1277     if (!flg) break;
1278     for (i=0;i<n;i++) {
1279       ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1280       ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1281       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1282       if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1283       switch (A->insertmode) {
1284         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1285         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1286         default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1287       }
1288     }
1289   }
1290   ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)1294 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1295 {
1296   PetscErrorCode ierr;
1297   Mat            Adense,As;
1298   MPI_Comm       comm;
1299 
1300   PetscFunctionBegin;
1301   ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1302   ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1303   ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1304   ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1305   ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1306   ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1307   ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 /* -------------------------------------------------------------------*/
1312 static struct _MatOps MatOps_Values = {
1313        MatSetValues_ScaLAPACK,
1314        0,
1315        0,
1316        MatMult_ScaLAPACK,
1317 /* 4*/ MatMultAdd_ScaLAPACK,
1318        MatMultTranspose_ScaLAPACK,
1319        MatMultTransposeAdd_ScaLAPACK,
1320        MatSolve_ScaLAPACK,
1321        MatSolveAdd_ScaLAPACK,
1322        0,
1323 /*10*/ 0,
1324        MatLUFactor_ScaLAPACK,
1325        MatCholeskyFactor_ScaLAPACK,
1326        0,
1327        MatTranspose_ScaLAPACK,
1328 /*15*/ MatGetInfo_ScaLAPACK,
1329        0,
1330        MatGetDiagonal_ScaLAPACK,
1331        MatDiagonalScale_ScaLAPACK,
1332        MatNorm_ScaLAPACK,
1333 /*20*/ MatAssemblyBegin_ScaLAPACK,
1334        MatAssemblyEnd_ScaLAPACK,
1335        MatSetOption_ScaLAPACK,
1336        MatZeroEntries_ScaLAPACK,
1337 /*24*/ 0,
1338        MatLUFactorSymbolic_ScaLAPACK,
1339        MatLUFactorNumeric_ScaLAPACK,
1340        MatCholeskyFactorSymbolic_ScaLAPACK,
1341        MatCholeskyFactorNumeric_ScaLAPACK,
1342 /*29*/ MatSetUp_ScaLAPACK,
1343        0,
1344        0,
1345        0,
1346        0,
1347 /*34*/ MatDuplicate_ScaLAPACK,
1348        0,
1349        0,
1350        0,
1351        0,
1352 /*39*/ MatAXPY_ScaLAPACK,
1353        0,
1354        0,
1355        0,
1356        MatCopy_ScaLAPACK,
1357 /*44*/ 0,
1358        MatScale_ScaLAPACK,
1359        MatShift_ScaLAPACK,
1360        0,
1361        0,
1362 /*49*/ 0,
1363        0,
1364        0,
1365        0,
1366        0,
1367 /*54*/ 0,
1368        0,
1369        0,
1370        0,
1371        0,
1372 /*59*/ 0,
1373        MatDestroy_ScaLAPACK,
1374        MatView_ScaLAPACK,
1375        0,
1376        0,
1377 /*64*/ 0,
1378        0,
1379        0,
1380        0,
1381        0,
1382 /*69*/ 0,
1383        0,
1384        MatConvert_ScaLAPACK_Dense,
1385        0,
1386        0,
1387 /*74*/ 0,
1388        0,
1389        0,
1390        0,
1391        0,
1392 /*79*/ 0,
1393        0,
1394        0,
1395        0,
1396        MatLoad_ScaLAPACK,
1397 /*84*/ 0,
1398        0,
1399        0,
1400        0,
1401        0,
1402 /*89*/ 0,
1403        0,
1404        MatMatMultNumeric_ScaLAPACK,
1405        0,
1406        0,
1407 /*94*/ 0,
1408        0,
1409        0,
1410        MatMatTransposeMultNumeric_ScaLAPACK,
1411        0,
1412 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1413        0,
1414        0,
1415        MatConjugate_ScaLAPACK,
1416        0,
1417 /*104*/0,
1418        0,
1419        0,
1420        0,
1421        0,
1422 /*109*/MatMatSolve_ScaLAPACK,
1423        0,
1424        0,
1425        0,
1426        MatMissingDiagonal_ScaLAPACK,
1427 /*114*/0,
1428        0,
1429        0,
1430        0,
1431        0,
1432 /*119*/0,
1433        MatHermitianTranspose_ScaLAPACK,
1434        0,
1435        0,
1436        0,
1437 /*124*/0,
1438        0,
1439        0,
1440        0,
1441        0,
1442 /*129*/0,
1443        0,
1444        0,
1445        0,
1446        0,
1447 /*134*/0,
1448        0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*140*/0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*145*/0,
1459        0,
1460        0
1461 };
1462 
MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash * stash,PetscInt * owners)1463 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1464 {
1465   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1466   PetscInt           size=stash->size,nsends;
1467   PetscErrorCode     ierr;
1468   PetscInt           count,*sindices,**rindices,i,j,l;
1469   PetscScalar        **rvalues,*svalues;
1470   MPI_Comm           comm = stash->comm;
1471   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472   PetscMPIInt        *sizes,*nlengths,nreceives;
1473   PetscInt           *sp_idx,*sp_idy;
1474   PetscScalar        *sp_val;
1475   PetscMatStashSpace space,space_next;
1476   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;
1478 
1479   PetscFunctionBegin;
1480   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1481     InsertMode addv;
1482     ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1483     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484     mat->insertmode = addv; /* in case this processor had no cache */
1485   }
1486 
1487   bs2 = stash->bs*stash->bs;
1488 
1489   /*  first count number of contributors to each processor */
1490   ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1491   ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1492 
1493   i     = j    = 0;
1494   space = stash->space_head;
1495   while (space) {
1496     space_next = space->next;
1497     for (l=0; l<space->local_used; l++) {
1498       ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1499       ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1500       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502       nlengths[j]++; owner[i] = j;
1503       i++;
1504     }
1505     space = space_next;
1506   }
1507 
1508   /* Now check what procs get messages - and compute nsends. */
1509   ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1510   for (i=0, nsends=0; i<size; i++) {
1511     if (nlengths[i]) {
1512       sizes[i] = 1; nsends++;
1513     }
1514   }
1515 
1516   {PetscMPIInt *onodes,*olengths;
1517    /* Determine the number of messages to expect, their lengths, from from-ids */
1518    ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1519    ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1520    /* since clubbing row,col - lengths are multiplied by 2 */
1521    for (i=0; i<nreceives; i++) olengths[i] *=2;
1522    ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1523    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1525    ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1526    ierr = PetscFree(onodes);CHKERRQ(ierr);
1527    ierr = PetscFree(olengths);CHKERRQ(ierr);}
1528 
1529   /* do sends:
1530       1) starts[i] gives the starting index in svalues for stuff going to
1531          the ith processor
1532   */
1533   ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1534   ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1535   ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1536   /* use 2 sends the first with all_a, the next with all_i and all_j */
1537   startv[0] = 0; starti[0] = 0;
1538   for (i=1; i<size; i++) {
1539     startv[i] = startv[i-1] + nlengths[i-1];
1540     starti[i] = starti[i-1] + 2*nlengths[i-1];
1541   }
1542 
1543   i     = 0;
1544   space = stash->space_head;
1545   while (space) {
1546     space_next = space->next;
1547     sp_idx     = space->idx;
1548     sp_idy     = space->idy;
1549     sp_val     = space->val;
1550     for (l=0; l<space->local_used; l++) {
1551       j = owner[i];
1552       if (bs2 == 1) {
1553         svalues[startv[j]] = sp_val[l];
1554       } else {
1555         PetscInt    k;
1556         PetscScalar *buf1,*buf2;
1557         buf1 = svalues+bs2*startv[j];
1558         buf2 = space->val + bs2*l;
1559         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560       }
1561       sindices[starti[j]]             = sp_idx[l];
1562       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563       startv[j]++;
1564       starti[j]++;
1565       i++;
1566     }
1567     space = space_next;
1568   }
1569   startv[0] = 0;
1570   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1571 
1572   for (i=0,count=0; i<size; i++) {
1573     if (sizes[i]) {
1574       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1575       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1576     }
1577   }
1578 #if defined(PETSC_USE_INFO)
1579   ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1580   for (i=0; i<size; i++) {
1581     if (sizes[i]) {
1582       ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1583     }
1584   }
1585 #endif
1586   ierr = PetscFree(nlengths);CHKERRQ(ierr);
1587   ierr = PetscFree(owner);CHKERRQ(ierr);
1588   ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1589   ierr = PetscFree(sizes);CHKERRQ(ierr);
1590 
1591   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1592   ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1593 
1594   for (i=0; i<nreceives; i++) {
1595     recv_waits[2*i]   = recv_waits1[i];
1596     recv_waits[2*i+1] = recv_waits2[i];
1597   }
1598   stash->recv_waits = recv_waits;
1599 
1600   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1601   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1602 
1603   stash->svalues         = svalues;
1604   stash->sindices        = sindices;
1605   stash->rvalues         = rvalues;
1606   stash->rindices        = rindices;
1607   stash->send_waits      = send_waits;
1608   stash->nsends          = nsends;
1609   stash->nrecvs          = nreceives;
1610   stash->reproduce_count = 0;
1611   PetscFunctionReturn(0);
1612 }
1613 
MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)1614 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615 {
1616   PetscErrorCode ierr;
1617   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1618 
1619   PetscFunctionBegin;
1620   if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1621   if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1622   if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1623   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1624   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 /*@
1629    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1630    the ScaLAPACK matrix
1631 
1632    Logically Collective on A
1633 
1634    Input Parameter:
1635 +  A  - a MATSCALAPACK matrix
1636 .  mb - the row block size
1637 -  nb - the column block size
1638 
1639    Level: intermediate
1640 
1641 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1642 @*/
MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)1643 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1644 {
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1649   PetscValidLogicalCollectiveInt(A,mb,2);
1650   PetscValidLogicalCollectiveInt(A,nb,3);
1651   ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1652   PetscFunctionReturn(0);
1653 }
1654 
MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt * mb,PetscInt * nb)1655 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1656 {
1657   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1658 
1659   PetscFunctionBegin;
1660   if (mb) *mb = a->mb;
1661   if (nb) *nb = a->nb;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 /*@
1666    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1667    the ScaLAPACK matrix
1668 
1669    Not collective
1670 
1671    Input Parameter:
1672 .  A  - a MATSCALAPACK matrix
1673 
1674    Output Parameters:
1675 +  mb - the row block size
1676 -  nb - the column block size
1677 
1678    Level: intermediate
1679 
1680 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1681 @*/
MatScaLAPACKGetBlockSizes(Mat A,PetscInt * mb,PetscInt * nb)1682 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1683 {
1684   PetscErrorCode ierr;
1685 
1686   PetscFunctionBegin;
1687   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1688   ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1693 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1694 
1695 /*MC
1696    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1697 
1698    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1699 
1700    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1701 
1702    Options Database Keys:
1703 +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1704 .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1705 -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1706 
1707    Level: beginner
1708 
1709 .seealso: MATDENSE, MATELEMENTAL
1710 M*/
1711 
MatCreate_ScaLAPACK(Mat A)1712 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1713 {
1714   Mat_ScaLAPACK      *a;
1715   PetscErrorCode     ierr;
1716   PetscBool          flg,flg1;
1717   Mat_ScaLAPACK_Grid *grid;
1718   MPI_Comm           icomm;
1719   PetscBLASInt       nprow,npcol,myrow,mycol;
1720   PetscInt           optv1,k=2,array[2]={0,0};
1721   PetscMPIInt        size;
1722 
1723   PetscFunctionBegin;
1724   ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1725   A->insertmode = NOT_SET_VALUES;
1726 
1727   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1728   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1729   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1730   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1731   A->stash.ScatterDestroy = NULL;
1732 
1733   ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1734   A->data = (void*)a;
1735 
1736   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1737   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1738     ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1739   }
1740   ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1741   ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1742   if (!flg) {
1743     ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1744 
1745     ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1746     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1747 
1748     ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1749     ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1750     if (flg1) {
1751       if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1752       grid->nprow = optv1;
1753     }
1754     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1755 
1756     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1757     grid->npcol = size/grid->nprow;
1758     ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1759     ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1760     Cblacs_get(-1,0,&grid->ictxt);
1761     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1762     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1763     grid->grid_refct = 1;
1764     grid->nprow      = nprow;
1765     grid->npcol      = npcol;
1766     grid->myrow      = myrow;
1767     grid->mycol      = mycol;
1768     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1769     Cblacs_get(-1,0,&grid->ictxrow);
1770     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1771     Cblacs_get(-1,0,&grid->ictxcol);
1772     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1773     ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1774 
1775   } else grid->grid_refct++;
1776   ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1777   a->grid = grid;
1778   a->mb   = DEFAULT_BLOCKSIZE;
1779   a->nb   = DEFAULT_BLOCKSIZE;
1780 
1781   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1782   ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     a->mb = array[0];
1785     a->nb = (k>1)? array[1]: a->mb;
1786   }
1787   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1788 
1789   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1791   ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1792   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 /*@C
1797    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1798    (2D block cyclic distribution).
1799 
1800    Collective
1801 
1802    Input Parameters:
1803 +  comm - MPI communicator
1804 .  mb   - row block size (or PETSC_DECIDE to have it set)
1805 .  nb   - column block size (or PETSC_DECIDE to have it set)
1806 .  M    - number of global rows
1807 .  N    - number of global columns
1808 .  rsrc - coordinate of process that owns the first row of the distributed matrix
1809 -  csrc - coordinate of process that owns the first column of the distributed matrix
1810 
1811    Output Parameter:
1812 .  A - the matrix
1813 
1814    Options Database Keys:
1815 .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1816 
1817    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1818    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1819    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1820 
1821    Notes:
1822    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1823    is chosen.
1824 
1825    Storage Information:
1826    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1827    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1828    significance and are thus ignored. The block sizes refer to the values
1829    used for the distributed matrix, not the same meaning as in BAIJ.
1830 
1831    Level: intermediate
1832 
1833 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1834 @*/
MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat * A)1835 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1836 {
1837   PetscErrorCode ierr;
1838   Mat_ScaLAPACK  *a;
1839   PetscInt       m,n;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1843   ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1844   if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1845   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1846   m = PETSC_DECIDE;
1847   ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1848   n = PETSC_DECIDE;
1849   ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1850   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1851   a = (Mat_ScaLAPACK*)(*A)->data;
1852   ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1853   ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1854   ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1855   ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1856   ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1857   ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1858   ierr = MatSetUp(*A);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862