1 #include <petsc/private/petscscalapack.h> /*I "petscmat.h" I*/
2
3 #define DEFAULT_BLOCKSIZE 64
4
5 /*
6 The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
7 is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
8 */
9 static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
10
MatView_ScaLAPACK(Mat A,PetscViewer viewer)11 static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
12 {
13 PetscErrorCode ierr;
14 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
15 PetscBool iascii;
16 PetscViewerFormat format;
17 Mat Adense;
18
19 PetscFunctionBegin;
20 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
21 if (iascii) {
22 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
23 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
24 ierr = PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);CHKERRQ(ierr);
25 ierr = PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);CHKERRQ(ierr);
26 ierr = PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);CHKERRQ(ierr);
27 ierr = PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);CHKERRQ(ierr);
28 PetscFunctionReturn(0);
29 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
30 PetscFunctionReturn(0);
31 }
32 }
33 /* convert to dense format and call MatView() */
34 ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);CHKERRQ(ierr);
35 ierr = MatView(Adense,viewer);CHKERRQ(ierr);
36 ierr = MatDestroy(&Adense);CHKERRQ(ierr);
37 PetscFunctionReturn(0);
38 }
39
MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo * info)40 static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
41 {
42 PetscErrorCode ierr;
43 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
44 PetscLogDouble isend[2],irecv[2];
45
46 PetscFunctionBegin;
47 info->block_size = 1.0;
48
49 isend[0] = a->lld*a->locc; /* locally allocated */
50 isend[1] = a->locr*a->locc; /* used submatrix */
51 if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
52 info->nz_allocated = isend[0];
53 info->nz_used = isend[1];
54 } else if (flag == MAT_GLOBAL_MAX) {
55 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
56 info->nz_allocated = irecv[0];
57 info->nz_used = irecv[1];
58 } else if (flag == MAT_GLOBAL_SUM) {
59 ierr = MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
60 info->nz_allocated = irecv[0];
61 info->nz_used = irecv[1];
62 }
63
64 info->nz_unneeded = 0;
65 info->assemblies = A->num_ass;
66 info->mallocs = 0;
67 info->memory = ((PetscObject)A)->mem;
68 info->fill_ratio_given = 0;
69 info->fill_ratio_needed = 0;
70 info->factor_mallocs = 0;
71 PetscFunctionReturn(0);
72 }
73
MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)74 PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
75 {
76 PetscFunctionBegin;
77 switch (op) {
78 case MAT_NEW_NONZERO_LOCATIONS:
79 case MAT_NEW_NONZERO_LOCATION_ERR:
80 case MAT_NEW_NONZERO_ALLOCATION_ERR:
81 case MAT_SYMMETRIC:
82 case MAT_SORTED_FULL:
83 case MAT_HERMITIAN:
84 break;
85 default:
86 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
87 }
88 PetscFunctionReturn(0);
89 }
90
MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt * rows,PetscInt nc,const PetscInt * cols,const PetscScalar * vals,InsertMode imode)91 static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
92 {
93 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
94 PetscErrorCode ierr;
95 PetscInt i,j;
96 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
97
98 PetscFunctionBegin;
99 for (i=0;i<nr;i++) {
100 if (rows[i] < 0) continue;
101 ierr = PetscBLASIntCast(rows[i]+1,&gridx);CHKERRQ(ierr);
102 for (j=0;j<nc;j++) {
103 if (cols[j] < 0) continue;
104 ierr = PetscBLASIntCast(cols[j]+1,&gcidx);CHKERRQ(ierr);
105 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
106 if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
107 switch (imode) {
108 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
109 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
110 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
111 }
112 } else {
113 if (A->nooffprocentries) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
114 A->assembled = PETSC_FALSE;
115 ierr = MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));CHKERRQ(ierr);
116 }
117 }
118 }
119 PetscFunctionReturn(0);
120 }
121
MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar * x,PetscScalar * y)122 static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
123 {
124 PetscErrorCode ierr;
125 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
126 PetscScalar *x2d,*y2d,alpha=1.0;
127 const PetscInt *ranges;
128 PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
129
130 PetscFunctionBegin;
131 if (transpose) {
132
133 /* create ScaLAPACK descriptors for vectors (1d block distribution) */
134 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
135 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */
136 xlld = PetscMax(1,A->rmap->n);
137 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
138 PetscCheckScaLapackInfo("descinit",info);
139 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
140 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* y block size */
141 ylld = 1;
142 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
143 PetscCheckScaLapackInfo("descinit",info);
144
145 /* allocate 2d vectors */
146 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
147 lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
148 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
149 xlld = PetscMax(1,lszx);
150
151 /* create ScaLAPACK descriptors for vectors (2d block distribution) */
152 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
153 PetscCheckScaLapackInfo("descinit",info);
154 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
155 PetscCheckScaLapackInfo("descinit",info);
156
157 /* redistribute x as a column of a 2d matrix */
158 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
159
160 /* redistribute y as a row of a 2d matrix */
161 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
162
163 /* call PBLAS subroutine */
164 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
165
166 /* redistribute y from a row of a 2d matrix */
167 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
168
169 } else { /* non-transpose */
170
171 /* create ScaLAPACK descriptors for vectors (1d block distribution) */
172 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
173 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* x block size */
174 xlld = 1;
175 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
176 PetscCheckScaLapackInfo("descinit",info);
177 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
178 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* y block size */
179 ylld = PetscMax(1,A->rmap->n);
180 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
181 PetscCheckScaLapackInfo("descinit",info);
182
183 /* allocate 2d vectors */
184 lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
185 lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
186 ierr = PetscMalloc2(lszx,&x2d,lszy,&y2d);CHKERRQ(ierr);
187 ylld = PetscMax(1,lszy);
188
189 /* create ScaLAPACK descriptors for vectors (2d block distribution) */
190 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
191 PetscCheckScaLapackInfo("descinit",info);
192 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
193 PetscCheckScaLapackInfo("descinit",info);
194
195 /* redistribute x as a row of a 2d matrix */
196 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
197
198 /* redistribute y as a column of a 2d matrix */
199 if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
200
201 /* call PBLAS subroutine */
202 PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
203
204 /* redistribute y from a column of a 2d matrix */
205 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
206
207 }
208 ierr = PetscFree2(x2d,y2d);CHKERRQ(ierr);
209 PetscFunctionReturn(0);
210 }
211
MatMult_ScaLAPACK(Mat A,Vec x,Vec y)212 static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
213 {
214 PetscErrorCode ierr;
215 const PetscScalar *xarray;
216 PetscScalar *yarray;
217
218 PetscFunctionBegin;
219 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
220 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
221 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);CHKERRQ(ierr);
222 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
223 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
224 PetscFunctionReturn(0);
225 }
226
MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)227 static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
228 {
229 PetscErrorCode ierr;
230 const PetscScalar *xarray;
231 PetscScalar *yarray;
232
233 PetscFunctionBegin;
234 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
235 ierr = VecGetArray(y,&yarray);CHKERRQ(ierr);
236 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);CHKERRQ(ierr);
237 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
238 ierr = VecRestoreArray(y,&yarray);CHKERRQ(ierr);
239 PetscFunctionReturn(0);
240 }
241
MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)242 static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
243 {
244 PetscErrorCode ierr;
245 const PetscScalar *xarray;
246 PetscScalar *zarray;
247
248 PetscFunctionBegin;
249 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
250 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
251 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
252 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);CHKERRQ(ierr);
253 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
254 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
255 PetscFunctionReturn(0);
256 }
257
MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)258 static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
259 {
260 PetscErrorCode ierr;
261 const PetscScalar *xarray;
262 PetscScalar *zarray;
263
264 PetscFunctionBegin;
265 if (y != z) { ierr = VecCopy(y,z);CHKERRQ(ierr); }
266 ierr = VecGetArrayRead(x,&xarray);CHKERRQ(ierr);
267 ierr = VecGetArray(z,&zarray);CHKERRQ(ierr);
268 ierr = MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);CHKERRQ(ierr);
269 ierr = VecRestoreArrayRead(x,&xarray);CHKERRQ(ierr);
270 ierr = VecRestoreArray(z,&zarray);CHKERRQ(ierr);
271 PetscFunctionReturn(0);
272 }
273
MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)274 PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
275 {
276 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
277 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
278 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
279 PetscScalar sone=1.0,zero=0.0;
280 PetscBLASInt one=1;
281
282 PetscFunctionBegin;
283 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284 C->assembled = PETSC_TRUE;
285 PetscFunctionReturn(0);
286 }
287
MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)288 PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289 {
290 PetscErrorCode ierr;
291
292 PetscFunctionBegin;
293 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
294 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
295 ierr = MatSetUp(C);CHKERRQ(ierr);
296 C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
297 PetscFunctionReturn(0);
298 }
299
MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)300 static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
301 {
302 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
303 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
304 Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
305 PetscScalar sone=1.0,zero=0.0;
306 PetscBLASInt one=1;
307
308 PetscFunctionBegin;
309 PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
310 C->assembled = PETSC_TRUE;
311 PetscFunctionReturn(0);
312 }
313
MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)314 static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
315 {
316 PetscErrorCode ierr;
317
318 PetscFunctionBegin;
319 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
320 ierr = MatSetType(C,MATSCALAPACK);CHKERRQ(ierr);
321 ierr = MatSetUp(C);CHKERRQ(ierr);
322 PetscFunctionReturn(0);
323 }
324
325 /* --------------------------------------- */
MatProductSetFromOptions_ScaLAPACK_AB(Mat C)326 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
327 {
328 PetscFunctionBegin;
329 C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
330 C->ops->productsymbolic = MatProductSymbolic_AB;
331 PetscFunctionReturn(0);
332 }
333
MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)334 static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
335 {
336 PetscFunctionBegin;
337 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
338 C->ops->productsymbolic = MatProductSymbolic_ABt;
339 PetscFunctionReturn(0);
340 }
341
MatProductSetFromOptions_ScaLAPACK(Mat C)342 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
343 {
344 PetscErrorCode ierr;
345 Mat_Product *product = C->product;
346
347 PetscFunctionBegin;
348 switch (product->type) {
349 case MATPRODUCT_AB:
350 ierr = MatProductSetFromOptions_ScaLAPACK_AB(C);CHKERRQ(ierr);
351 break;
352 case MATPRODUCT_ABt:
353 ierr = MatProductSetFromOptions_ScaLAPACK_ABt(C);CHKERRQ(ierr);
354 break;
355 default: SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
356 }
357 PetscFunctionReturn(0);
358 }
359 /* --------------------------------------- */
360
MatGetDiagonal_ScaLAPACK(Mat A,Vec D)361 static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
362 {
363 PetscErrorCode ierr;
364 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
365 PetscScalar *darray,*d2d,v;
366 const PetscInt *ranges;
367 PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
368
369 PetscFunctionBegin;
370 ierr = VecGetArray(D,&darray);CHKERRQ(ierr);
371
372 if (A->rmap->N<=A->cmap->N) { /* row version */
373
374 /* create ScaLAPACK descriptor for vector (1d block distribution) */
375 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
376 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */
377 dlld = PetscMax(1,A->rmap->n);
378 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
379 PetscCheckScaLapackInfo("descinit",info);
380
381 /* allocate 2d vector */
382 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
383 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
384 dlld = PetscMax(1,lszd);
385
386 /* create ScaLAPACK descriptor for vector (2d block distribution) */
387 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
388 PetscCheckScaLapackInfo("descinit",info);
389
390 /* collect diagonal */
391 for (j=1;j<=a->M;j++) {
392 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
393 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
394 }
395
396 /* redistribute d from a column of a 2d matrix */
397 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
398 ierr = PetscFree(d2d);CHKERRQ(ierr);
399
400 } else { /* column version */
401
402 /* create ScaLAPACK descriptor for vector (1d block distribution) */
403 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
404 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */
405 dlld = 1;
406 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
407 PetscCheckScaLapackInfo("descinit",info);
408
409 /* allocate 2d vector */
410 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
411 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
412
413 /* create ScaLAPACK descriptor for vector (2d block distribution) */
414 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
415 PetscCheckScaLapackInfo("descinit",info);
416
417 /* collect diagonal */
418 for (j=1;j<=a->N;j++) {
419 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
420 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
421 }
422
423 /* redistribute d from a row of a 2d matrix */
424 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
425 ierr = PetscFree(d2d);CHKERRQ(ierr);
426 }
427
428 ierr = VecRestoreArray(D,&darray);CHKERRQ(ierr);
429 ierr = VecAssemblyBegin(D);CHKERRQ(ierr);
430 ierr = VecAssemblyEnd(D);CHKERRQ(ierr);
431 PetscFunctionReturn(0);
432 }
433
MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)434 static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
435 {
436 PetscErrorCode ierr;
437 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
438 const PetscScalar *d;
439 const PetscInt *ranges;
440 PetscScalar *d2d;
441 PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
442
443 PetscFunctionBegin;
444 if (R) {
445 ierr = VecGetArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
446 /* create ScaLAPACK descriptor for vector (1d block distribution) */
447 ierr = PetscLayoutGetRanges(A->cmap,&ranges);CHKERRQ(ierr);
448 ierr = PetscBLASIntCast(ranges[1],&nb);CHKERRQ(ierr); /* D block size */
449 dlld = 1;
450 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
451 PetscCheckScaLapackInfo("descinit",info);
452
453 /* allocate 2d vector */
454 lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
455 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
456
457 /* create ScaLAPACK descriptor for vector (2d block distribution) */
458 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
459 PetscCheckScaLapackInfo("descinit",info);
460
461 /* redistribute d to a row of a 2d matrix */
462 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
463
464 /* broadcast along process columns */
465 if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
466 else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
467
468 /* local scaling */
469 for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
470
471 ierr = PetscFree(d2d);CHKERRQ(ierr);
472 ierr = VecRestoreArrayRead(R,(const PetscScalar **)&d);CHKERRQ(ierr);
473 }
474 if (L) {
475 ierr = VecGetArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
476 /* create ScaLAPACK descriptor for vector (1d block distribution) */
477 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
478 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* D block size */
479 dlld = PetscMax(1,A->rmap->n);
480 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
481 PetscCheckScaLapackInfo("descinit",info);
482
483 /* allocate 2d vector */
484 lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
485 ierr = PetscCalloc1(lszd,&d2d);CHKERRQ(ierr);
486 dlld = PetscMax(1,lszd);
487
488 /* create ScaLAPACK descriptor for vector (2d block distribution) */
489 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
490 PetscCheckScaLapackInfo("descinit",info);
491
492 /* redistribute d to a column of a 2d matrix */
493 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
494
495 /* broadcast along process rows */
496 if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
497 else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
498
499 /* local scaling */
500 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
501
502 ierr = PetscFree(d2d);CHKERRQ(ierr);
503 ierr = VecRestoreArrayRead(L,(const PetscScalar **)&d);CHKERRQ(ierr);
504 }
505 PetscFunctionReturn(0);
506 }
507
MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool * missing,PetscInt * d)508 static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
509 {
510 PetscFunctionBegin;
511 *missing = PETSC_FALSE;
512 PetscFunctionReturn(0);
513 }
514
MatScale_ScaLAPACK(Mat X,PetscScalar a)515 static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
516 {
517 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
518 PetscBLASInt n,one=1;
519
520 PetscFunctionBegin;
521 n = x->lld*x->locc;
522 PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
523 PetscFunctionReturn(0);
524 }
525
MatShift_ScaLAPACK(Mat X,PetscScalar alpha)526 static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
527 {
528 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
529 PetscBLASInt i,n;
530 PetscScalar v;
531
532 PetscFunctionBegin;
533 n = PetscMin(x->M,x->N);
534 for (i=1;i<=n;i++) {
535 PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
536 v += alpha;
537 PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
538 }
539 PetscFunctionReturn(0);
540 }
541
MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)542 static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
543 {
544 PetscErrorCode ierr;
545 Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
546 Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data;
547 PetscBLASInt one=1;
548 PetscScalar beta=1.0;
549
550 PetscFunctionBegin;
551 MatScaLAPACKCheckDistribution(Y,1,X,3);
552 PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
553 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
554 PetscFunctionReturn(0);
555 }
556
MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)557 static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
558 {
559 PetscErrorCode ierr;
560 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
561 Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
562
563 PetscFunctionBegin;
564 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
565 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
566 PetscFunctionReturn(0);
567 }
568
MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat * B)569 static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
570 {
571 Mat Bs;
572 MPI_Comm comm;
573 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b;
574 PetscErrorCode ierr;
575
576 PetscFunctionBegin;
577 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
578 ierr = MatCreate(comm,&Bs);CHKERRQ(ierr);
579 ierr = MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
580 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
581 b = (Mat_ScaLAPACK*)Bs->data;
582 b->M = a->M;
583 b->N = a->N;
584 b->mb = a->mb;
585 b->nb = a->nb;
586 b->rsrc = a->rsrc;
587 b->csrc = a->csrc;
588 ierr = MatSetUp(Bs);CHKERRQ(ierr);
589 *B = Bs;
590 if (op == MAT_COPY_VALUES) {
591 ierr = PetscArraycpy(b->loc,a->loc,a->lld*a->locc);CHKERRQ(ierr);
592 }
593 Bs->assembled = PETSC_TRUE;
594 PetscFunctionReturn(0);
595 }
596
MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)597 static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
598 {
599 PetscErrorCode ierr;
600 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
601 Mat Bs = *B;
602 PetscBLASInt one=1;
603 PetscScalar sone=1.0,zero=0.0;
604 #if defined(PETSC_USE_COMPLEX)
605 PetscInt i,j;
606 #endif
607
608 PetscFunctionBegin;
609 if (reuse == MAT_INITIAL_MATRIX) {
610 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
611 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
612 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
613 ierr = MatSetUp(Bs);CHKERRQ(ierr);
614 *B = Bs;
615 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
616 b = (Mat_ScaLAPACK*)Bs->data;
617 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
618 #if defined(PETSC_USE_COMPLEX)
619 /* undo conjugation */
620 for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
621 #endif
622 Bs->assembled = PETSC_TRUE;
623 PetscFunctionReturn(0);
624 }
625
MatConjugate_ScaLAPACK(Mat A)626 static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
627 {
628 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
629 PetscInt i,j;
630
631 PetscFunctionBegin;
632 for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
633 PetscFunctionReturn(0);
634 }
635
MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat * B)636 static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
637 {
638 PetscErrorCode ierr;
639 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
640 Mat Bs = *B;
641 PetscBLASInt one=1;
642 PetscScalar sone=1.0,zero=0.0;
643
644 PetscFunctionBegin;
645 if (reuse == MAT_INITIAL_MATRIX) {
646 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Bs);CHKERRQ(ierr);
647 ierr = MatSetSizes(Bs,A->cmap->n,A->rmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
648 ierr = MatSetType(Bs,MATSCALAPACK);CHKERRQ(ierr);
649 ierr = MatSetUp(Bs);CHKERRQ(ierr);
650 *B = Bs;
651 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
652 b = (Mat_ScaLAPACK*)Bs->data;
653 PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
654 Bs->assembled = PETSC_TRUE;
655 PetscFunctionReturn(0);
656 }
657
MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)658 static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
659 {
660 PetscErrorCode ierr;
661 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
662 PetscScalar *x,*x2d;
663 const PetscInt *ranges;
664 PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
665
666 PetscFunctionBegin;
667 ierr = VecCopy(B,X);CHKERRQ(ierr);
668 ierr = VecGetArray(X,&x);CHKERRQ(ierr);
669
670 /* create ScaLAPACK descriptor for a vector (1d block distribution) */
671 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
672 ierr = PetscBLASIntCast(ranges[1],&mb);CHKERRQ(ierr); /* x block size */
673 xlld = PetscMax(1,A->rmap->n);
674 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
675 PetscCheckScaLapackInfo("descinit",info);
676
677 /* allocate 2d vector */
678 lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
679 ierr = PetscMalloc1(lszx,&x2d);CHKERRQ(ierr);
680 xlld = PetscMax(1,lszx);
681
682 /* create ScaLAPACK descriptor for a vector (2d block distribution) */
683 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
684 PetscCheckScaLapackInfo("descinit",info);
685
686 /* redistribute x as a column of a 2d matrix */
687 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
688
689 /* call ScaLAPACK subroutine */
690 switch (A->factortype) {
691 case MAT_FACTOR_LU:
692 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
693 PetscCheckScaLapackInfo("getrs",info);
694 break;
695 case MAT_FACTOR_CHOLESKY:
696 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
697 PetscCheckScaLapackInfo("potrs",info);
698 break;
699 default:
700 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
701 break;
702 }
703
704 /* redistribute x from a column of a 2d matrix */
705 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
706
707 ierr = PetscFree(x2d);CHKERRQ(ierr);
708 ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
709 PetscFunctionReturn(0);
710 }
711
MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)712 static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
713 {
714 PetscErrorCode ierr;
715
716 PetscFunctionBegin;
717 ierr = MatSolve_ScaLAPACK(A,B,X);CHKERRQ(ierr);
718 ierr = VecAXPY(X,1,Y);CHKERRQ(ierr);
719 PetscFunctionReturn(0);
720 }
721
MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)722 static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
723 {
724 PetscErrorCode ierr;
725 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
726 PetscBool flg1,flg2;
727 PetscBLASInt one=1,info;
728
729 PetscFunctionBegin;
730 ierr = PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);CHKERRQ(ierr);
731 ierr = PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);CHKERRQ(ierr);
732 if (!(flg1 && flg2)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Both B and X must be of type MATSCALAPACK");
733 MatScaLAPACKCheckDistribution(B,1,X,2);
734 b = (Mat_ScaLAPACK*)B->data;
735 x = (Mat_ScaLAPACK*)X->data;
736 ierr = PetscArraycpy(x->loc,b->loc,b->lld*b->locc);CHKERRQ(ierr);
737
738 switch (A->factortype) {
739 case MAT_FACTOR_LU:
740 PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
741 PetscCheckScaLapackInfo("getrs",info);
742 break;
743 case MAT_FACTOR_CHOLESKY:
744 PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
745 PetscCheckScaLapackInfo("potrs",info);
746 break;
747 default:
748 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
749 break;
750 }
751 PetscFunctionReturn(0);
752 }
753
MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo * factorinfo)754 static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
755 {
756 PetscErrorCode ierr;
757 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
758 PetscBLASInt one=1,info;
759
760 PetscFunctionBegin;
761 if (!a->pivots) {
762 ierr = PetscMalloc1(a->locr+a->mb,&a->pivots);CHKERRQ(ierr);
763 ierr = PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));CHKERRQ(ierr);
764 }
765 PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
766 PetscCheckScaLapackInfo("getrf",info);
767 A->factortype = MAT_FACTOR_LU;
768 A->assembled = PETSC_TRUE;
769
770 ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
771 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
772 PetscFunctionReturn(0);
773 }
774
MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)775 static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
776 {
777 PetscErrorCode ierr;
778
779 PetscFunctionBegin;
780 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781 ierr = MatLUFactor_ScaLAPACK(F,0,0,info);CHKERRQ(ierr);
782 PetscFunctionReturn(0);
783 }
784
MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo * info)785 static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
786 {
787 PetscFunctionBegin;
788 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789 PetscFunctionReturn(0);
790 }
791
MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo * factorinfo)792 static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
793 {
794 PetscErrorCode ierr;
795 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
796 PetscBLASInt one=1,info;
797
798 PetscFunctionBegin;
799 PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
800 PetscCheckScaLapackInfo("potrf",info);
801 A->factortype = MAT_FACTOR_CHOLESKY;
802 A->assembled = PETSC_TRUE;
803
804 ierr = PetscFree(A->solvertype);CHKERRQ(ierr);
805 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);CHKERRQ(ierr);
806 PetscFunctionReturn(0);
807 }
808
MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo * info)809 static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
810 {
811 PetscErrorCode ierr;
812
813 PetscFunctionBegin;
814 ierr = MatCopy(A,F,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815 ierr = MatCholeskyFactor_ScaLAPACK(F,0,info);CHKERRQ(ierr);
816 PetscFunctionReturn(0);
817 }
818
MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo * info)819 static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
820 {
821 PetscFunctionBegin;
822 /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
823 PetscFunctionReturn(0);
824 }
825
MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType * type)826 PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
827 {
828 PetscFunctionBegin;
829 *type = MATSOLVERSCALAPACK;
830 PetscFunctionReturn(0);
831 }
832
MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat * F)833 static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
834 {
835 Mat B;
836 PetscErrorCode ierr;
837
838 PetscFunctionBegin;
839 /* Create the factorization matrix */
840 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
841 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
842 ierr = MatSetType(B,MATSCALAPACK);CHKERRQ(ierr);
843 ierr = MatSetUp(B);CHKERRQ(ierr);
844 B->factortype = ftype;
845 ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
846 ierr = PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);CHKERRQ(ierr);
847
848 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);CHKERRQ(ierr);
849 *F = B;
850 PetscFunctionReturn(0);
851 }
852
MatSolverTypeRegister_ScaLAPACK(void)853 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
854 {
855 PetscErrorCode ierr;
856
857 PetscFunctionBegin;
858 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
859 ierr = MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);CHKERRQ(ierr);
860 PetscFunctionReturn(0);
861 }
862
MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal * nrm)863 static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
864 {
865 PetscErrorCode ierr;
866 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
867 PetscBLASInt one=1,lwork=0;
868 const char *ntype;
869 PetscScalar *work=NULL,dummy;
870
871 PetscFunctionBegin;
872 switch (type){
873 case NORM_1:
874 ntype = "1";
875 lwork = PetscMax(a->locr,a->locc);
876 break;
877 case NORM_FROBENIUS:
878 ntype = "F";
879 work = &dummy;
880 break;
881 case NORM_INFINITY:
882 ntype = "I";
883 lwork = PetscMax(a->locr,a->locc);
884 break;
885 default:
886 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
887 }
888 if (lwork) { ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); }
889 *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
890 if (lwork) { ierr = PetscFree(work);CHKERRQ(ierr); }
891 PetscFunctionReturn(0);
892 }
893
MatZeroEntries_ScaLAPACK(Mat A)894 static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
895 {
896 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
897 PetscErrorCode ierr;
898
899 PetscFunctionBegin;
900 ierr = PetscArrayzero(a->loc,a->lld*a->locc);CHKERRQ(ierr);
901 PetscFunctionReturn(0);
902 }
903
MatGetOwnershipIS_ScaLAPACK(Mat A,IS * rows,IS * cols)904 static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
905 {
906 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
907 PetscErrorCode ierr;
908 PetscInt i,n,nb,isrc,nproc,iproc,*idx;
909
910 PetscFunctionBegin;
911 if (rows) {
912 n = a->locr;
913 nb = a->mb;
914 isrc = a->rsrc;
915 nproc = a->grid->nprow;
916 iproc = a->grid->myrow;
917 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr);
918 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
919 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);CHKERRQ(ierr);
920 }
921 if (cols) {
922 n = a->locc;
923 nb = a->nb;
924 isrc = a->csrc;
925 nproc = a->grid->npcol;
926 iproc = a->grid->mycol;
927 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr);
928 for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
929 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);CHKERRQ(ierr);
930 }
931 PetscFunctionReturn(0);
932 }
933
MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat * B)934 static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
935 {
936 PetscErrorCode ierr;
937 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
938 Mat Bmpi;
939 MPI_Comm comm;
940 PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
941 const PetscInt *ranges,*branges,*cwork;
942 const PetscScalar *vwork;
943 PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info;
944 PetscScalar *barray;
945 PetscBool differ=PETSC_FALSE;
946 PetscMPIInt size;
947
948 PetscFunctionBegin;
949 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
950 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
951
952 if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
953 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
954 ierr = PetscLayoutGetRanges((*B)->rmap,&branges);CHKERRQ(ierr);
955 for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
956 }
957
958 if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
959 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
960 m = PETSC_DECIDE;
961 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
962 n = PETSC_DECIDE;
963 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
964 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
965 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
966 ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
967
968 /* create ScaLAPACK descriptor for B (1d block distribution) */
969 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */
970 lld = PetscMax(A->rmap->n,1); /* local leading dimension */
971 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
972 PetscCheckScaLapackInfo("descinit",info);
973
974 /* redistribute matrix */
975 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
976 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
977 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
978 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980
981 /* transfer rows of auxiliary matrix to the final matrix B */
982 ierr = MatGetOwnershipRange(Bmpi,&rstart,&rend);CHKERRQ(ierr);
983 for (i=rstart;i<rend;i++) {
984 ierr = MatGetRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
985 ierr = MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
986 ierr = MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);CHKERRQ(ierr);
987 }
988 ierr = MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
989 ierr = MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990 ierr = MatDestroy(&Bmpi);CHKERRQ(ierr);
991
992 } else { /* normal cases */
993
994 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
995 else {
996 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
997 m = PETSC_DECIDE;
998 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
999 n = PETSC_DECIDE;
1000 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1001 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1002 ierr = MatSetType(Bmpi,MATDENSE);CHKERRQ(ierr);
1003 ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1004 }
1005
1006 /* create ScaLAPACK descriptor for B (1d block distribution) */
1007 ierr = PetscBLASIntCast(ranges[1],&bmb);CHKERRQ(ierr); /* row block size */
1008 lld = PetscMax(A->rmap->n,1); /* local leading dimension */
1009 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
1010 PetscCheckScaLapackInfo("descinit",info);
1011
1012 /* redistribute matrix */
1013 ierr = MatDenseGetArray(Bmpi,&barray);CHKERRQ(ierr);
1014 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
1015 ierr = MatDenseRestoreArray(Bmpi,&barray);CHKERRQ(ierr);
1016
1017 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1018 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1019 if (reuse == MAT_INPLACE_MATRIX) {
1020 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1021 } else *B = Bmpi;
1022 }
1023 PetscFunctionReturn(0);
1024 }
1025
MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * B)1026 PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
1027 {
1028 PetscErrorCode ierr;
1029 Mat_ScaLAPACK *b;
1030 Mat Bmpi;
1031 MPI_Comm comm;
1032 PetscInt M=A->rmap->N,N=A->cmap->N,m,n;
1033 const PetscInt *ranges;
1034 PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info;
1035 PetscScalar *aarray;
1036 PetscInt lda;
1037
1038 PetscFunctionBegin;
1039 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1040
1041 if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1042 else {
1043 ierr = MatCreate(comm,&Bmpi);CHKERRQ(ierr);
1044 m = PETSC_DECIDE;
1045 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1046 n = PETSC_DECIDE;
1047 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1048 ierr = MatSetSizes(Bmpi,m,n,M,N);CHKERRQ(ierr);
1049 ierr = MatSetType(Bmpi,MATSCALAPACK);CHKERRQ(ierr);
1050 ierr = MatSetUp(Bmpi);CHKERRQ(ierr);
1051 }
1052 b = (Mat_ScaLAPACK*)Bmpi->data;
1053
1054 /* create ScaLAPACK descriptor for A (1d block distribution) */
1055 ierr = PetscLayoutGetRanges(A->rmap,&ranges);CHKERRQ(ierr);
1056 ierr = PetscBLASIntCast(ranges[1],&amb);CHKERRQ(ierr); /* row block size */
1057 ierr = MatDenseGetLDA(A,&lda);CHKERRQ(ierr);
1058 lld = PetscMax(lda,1); /* local leading dimension */
1059 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
1060 PetscCheckScaLapackInfo("descinit",info);
1061
1062 /* redistribute matrix */
1063 ierr = MatDenseGetArray(A,&aarray);CHKERRQ(ierr);
1064 PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
1065 ierr = MatDenseRestoreArray(A,&aarray);CHKERRQ(ierr);
1066
1067 ierr = MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1068 ierr = MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1069 if (reuse == MAT_INPLACE_MATRIX) {
1070 ierr = MatHeaderReplace(A,&Bmpi);CHKERRQ(ierr);
1071 } else *B = Bmpi;
1072 PetscFunctionReturn(0);
1073 }
1074
MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1075 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1076 {
1077 Mat mat_scal;
1078 PetscErrorCode ierr;
1079 PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1080 const PetscInt *cols;
1081 const PetscScalar *vals;
1082
1083 PetscFunctionBegin;
1084 if (reuse == MAT_REUSE_MATRIX) {
1085 mat_scal = *newmat;
1086 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1087 } else {
1088 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1089 m = PETSC_DECIDE;
1090 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1091 n = PETSC_DECIDE;
1092 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1093 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1094 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1095 ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1096 }
1097 for (row=rstart;row<rend;row++) {
1098 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1099 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1100 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1101 }
1102 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1103 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1104
1105 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1106 else *newmat = mat_scal;
1107 PetscFunctionReturn(0);
1108 }
1109
MatConvert_SBAIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat * newmat)1110 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1111 {
1112 Mat mat_scal;
1113 PetscErrorCode ierr;
1114 PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1115 const PetscInt *cols;
1116 const PetscScalar *vals;
1117 PetscScalar v;
1118
1119 PetscFunctionBegin;
1120 if (reuse == MAT_REUSE_MATRIX) {
1121 mat_scal = *newmat;
1122 ierr = MatZeroEntries(mat_scal);CHKERRQ(ierr);
1123 } else {
1124 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);CHKERRQ(ierr);
1125 m = PETSC_DECIDE;
1126 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);CHKERRQ(ierr);
1127 n = PETSC_DECIDE;
1128 ierr = PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);CHKERRQ(ierr);
1129 ierr = MatSetSizes(mat_scal,m,n,M,N);CHKERRQ(ierr);
1130 ierr = MatSetType(mat_scal,MATSCALAPACK);CHKERRQ(ierr);
1131 ierr = MatSetUp(mat_scal);CHKERRQ(ierr);
1132 }
1133 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1134 for (row=rstart;row<rend;row++) {
1135 ierr = MatGetRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1136 ierr = MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr);
1137 for (j=0;j<ncols;j++) { /* lower triangular part */
1138 if (cols[j] == row) continue;
1139 v = A->hermitian ? PetscConj(vals[j]) : vals[j];
1140 ierr = MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);CHKERRQ(ierr);
1141 }
1142 ierr = MatRestoreRow(A,row,&ncols,&cols,&vals);CHKERRQ(ierr);
1143 }
1144 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1145 ierr = MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1146 ierr = MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1147
1148 if (reuse == MAT_INPLACE_MATRIX) { ierr = MatHeaderReplace(A,&mat_scal);CHKERRQ(ierr); }
1149 else *newmat = mat_scal;
1150 PetscFunctionReturn(0);
1151 }
1152
MatScaLAPACKSetPreallocation(Mat A)1153 static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1154 {
1155 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1156 PetscErrorCode ierr;
1157 PetscInt sz=0;
1158
1159 PetscFunctionBegin;
1160 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1161 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1162 if (!a->lld) a->lld = a->locr;
1163
1164 ierr = PetscFree(a->loc);CHKERRQ(ierr);
1165 ierr = PetscIntMultError(a->lld,a->locc,&sz);CHKERRQ(ierr);
1166 ierr = PetscCalloc1(sz,&a->loc);CHKERRQ(ierr);
1167 ierr = PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));CHKERRQ(ierr);
1168
1169 A->preallocated = PETSC_TRUE;
1170 PetscFunctionReturn(0);
1171 }
1172
MatDestroy_ScaLAPACK(Mat A)1173 static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1174 {
1175 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1176 PetscErrorCode ierr;
1177 Mat_ScaLAPACK_Grid *grid;
1178 PetscBool flg;
1179 MPI_Comm icomm;
1180
1181 PetscFunctionBegin;
1182 ierr = MatStashDestroy_Private(&A->stash);CHKERRQ(ierr);
1183 ierr = PetscFree(a->loc);CHKERRQ(ierr);
1184 ierr = PetscFree(a->pivots);CHKERRQ(ierr);
1185 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1186 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1187 if (--grid->grid_refct == 0) {
1188 Cblacs_gridexit(grid->ictxt);
1189 Cblacs_gridexit(grid->ictxrow);
1190 Cblacs_gridexit(grid->ictxcol);
1191 ierr = PetscFree(grid);CHKERRQ(ierr);
1192 ierr = MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);CHKERRQ(ierr);
1193 }
1194 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1195 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);CHKERRQ(ierr);
1196 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1197 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);CHKERRQ(ierr);
1198 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);CHKERRQ(ierr);
1199 ierr = PetscFree(A->data);CHKERRQ(ierr);
1200 PetscFunctionReturn(0);
1201 }
1202
MatScaLAPACKCheckLayout(PetscLayout map)1203 PETSC_STATIC_INLINE PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1204 {
1205 PetscErrorCode ierr;
1206 const PetscInt *ranges;
1207 PetscMPIInt size;
1208 PetscInt i,n;
1209
1210 PetscFunctionBegin;
1211 ierr = MPI_Comm_size(map->comm,&size);CHKERRQ(ierr);
1212 if (size>2) {
1213 ierr = PetscLayoutGetRanges(map,&ranges);CHKERRQ(ierr);
1214 n = ranges[1]-ranges[0];
1215 for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1216 if (i<size-1 && ranges[i+1]-ranges[i]!=0 && ranges[i+2]-ranges[i+1]!=0) SETERRQ(map->comm,PETSC_ERR_SUP,"MATSCALAPACK must have equal local sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1217 }
1218 PetscFunctionReturn(0);
1219 }
1220
MatSetUp_ScaLAPACK(Mat A)1221 PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1222 {
1223 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1224 PetscErrorCode ierr;
1225 PetscBLASInt info=0;
1226
1227 PetscFunctionBegin;
1228 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
1229 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
1230
1231 /* check that the layout is as enforced by MatCreateScaLAPACK */
1232 ierr = MatScaLAPACKCheckLayout(A->rmap);CHKERRQ(ierr);
1233 ierr = MatScaLAPACKCheckLayout(A->cmap);CHKERRQ(ierr);
1234
1235 /* compute local sizes */
1236 ierr = PetscBLASIntCast(A->rmap->N,&a->M);CHKERRQ(ierr);
1237 ierr = PetscBLASIntCast(A->cmap->N,&a->N);CHKERRQ(ierr);
1238 a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1239 a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1240 a->lld = PetscMax(1,a->locr);
1241
1242 /* allocate local array */
1243 ierr = MatScaLAPACKSetPreallocation(A);CHKERRQ(ierr);
1244
1245 /* set up ScaLAPACK descriptor */
1246 PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1247 PetscCheckScaLapackInfo("descinit",info);
1248 PetscFunctionReturn(0);
1249 }
1250
MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)1251 PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1252 {
1253 PetscErrorCode ierr;
1254 PetscInt nstash,reallocs;
1255
1256 PetscFunctionBegin;
1257 if (A->nooffprocentries) PetscFunctionReturn(0);
1258 ierr = MatStashScatterBegin_Private(A,&A->stash,NULL);CHKERRQ(ierr);
1259 ierr = MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);CHKERRQ(ierr);
1260 ierr = PetscInfo2(A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
1261 PetscFunctionReturn(0);
1262 }
1263
MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)1264 PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1265 {
1266 PetscErrorCode ierr;
1267 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1268 PetscMPIInt n;
1269 PetscInt i,flg,*row,*col;
1270 PetscScalar *val;
1271 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1272
1273 PetscFunctionBegin;
1274 if (A->nooffprocentries) PetscFunctionReturn(0);
1275 while (1) {
1276 ierr = MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1277 if (!flg) break;
1278 for (i=0;i<n;i++) {
1279 ierr = PetscBLASIntCast(row[i]+1,&gridx);CHKERRQ(ierr);
1280 ierr = PetscBLASIntCast(col[i]+1,&gcidx);CHKERRQ(ierr);
1281 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1282 if (rsrc!=a->grid->myrow || csrc!=a->grid->mycol) SETERRQ(PetscObjectComm((PetscObject)A),1,"Something went wrong, received value does not belong to this process");
1283 switch (A->insertmode) {
1284 case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1285 case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1286 default: SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1287 }
1288 }
1289 }
1290 ierr = MatStashScatterEnd_Private(&A->stash);CHKERRQ(ierr);
1291 PetscFunctionReturn(0);
1292 }
1293
MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)1294 PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1295 {
1296 PetscErrorCode ierr;
1297 Mat Adense,As;
1298 MPI_Comm comm;
1299
1300 PetscFunctionBegin;
1301 ierr = PetscObjectGetComm((PetscObject)newMat,&comm);CHKERRQ(ierr);
1302 ierr = MatCreate(comm,&Adense);CHKERRQ(ierr);
1303 ierr = MatSetType(Adense,MATDENSE);CHKERRQ(ierr);
1304 ierr = MatLoad(Adense,viewer);CHKERRQ(ierr);
1305 ierr = MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);CHKERRQ(ierr);
1306 ierr = MatDestroy(&Adense);CHKERRQ(ierr);
1307 ierr = MatHeaderReplace(newMat,&As);CHKERRQ(ierr);
1308 PetscFunctionReturn(0);
1309 }
1310
1311 /* -------------------------------------------------------------------*/
1312 static struct _MatOps MatOps_Values = {
1313 MatSetValues_ScaLAPACK,
1314 0,
1315 0,
1316 MatMult_ScaLAPACK,
1317 /* 4*/ MatMultAdd_ScaLAPACK,
1318 MatMultTranspose_ScaLAPACK,
1319 MatMultTransposeAdd_ScaLAPACK,
1320 MatSolve_ScaLAPACK,
1321 MatSolveAdd_ScaLAPACK,
1322 0,
1323 /*10*/ 0,
1324 MatLUFactor_ScaLAPACK,
1325 MatCholeskyFactor_ScaLAPACK,
1326 0,
1327 MatTranspose_ScaLAPACK,
1328 /*15*/ MatGetInfo_ScaLAPACK,
1329 0,
1330 MatGetDiagonal_ScaLAPACK,
1331 MatDiagonalScale_ScaLAPACK,
1332 MatNorm_ScaLAPACK,
1333 /*20*/ MatAssemblyBegin_ScaLAPACK,
1334 MatAssemblyEnd_ScaLAPACK,
1335 MatSetOption_ScaLAPACK,
1336 MatZeroEntries_ScaLAPACK,
1337 /*24*/ 0,
1338 MatLUFactorSymbolic_ScaLAPACK,
1339 MatLUFactorNumeric_ScaLAPACK,
1340 MatCholeskyFactorSymbolic_ScaLAPACK,
1341 MatCholeskyFactorNumeric_ScaLAPACK,
1342 /*29*/ MatSetUp_ScaLAPACK,
1343 0,
1344 0,
1345 0,
1346 0,
1347 /*34*/ MatDuplicate_ScaLAPACK,
1348 0,
1349 0,
1350 0,
1351 0,
1352 /*39*/ MatAXPY_ScaLAPACK,
1353 0,
1354 0,
1355 0,
1356 MatCopy_ScaLAPACK,
1357 /*44*/ 0,
1358 MatScale_ScaLAPACK,
1359 MatShift_ScaLAPACK,
1360 0,
1361 0,
1362 /*49*/ 0,
1363 0,
1364 0,
1365 0,
1366 0,
1367 /*54*/ 0,
1368 0,
1369 0,
1370 0,
1371 0,
1372 /*59*/ 0,
1373 MatDestroy_ScaLAPACK,
1374 MatView_ScaLAPACK,
1375 0,
1376 0,
1377 /*64*/ 0,
1378 0,
1379 0,
1380 0,
1381 0,
1382 /*69*/ 0,
1383 0,
1384 MatConvert_ScaLAPACK_Dense,
1385 0,
1386 0,
1387 /*74*/ 0,
1388 0,
1389 0,
1390 0,
1391 0,
1392 /*79*/ 0,
1393 0,
1394 0,
1395 0,
1396 MatLoad_ScaLAPACK,
1397 /*84*/ 0,
1398 0,
1399 0,
1400 0,
1401 0,
1402 /*89*/ 0,
1403 0,
1404 MatMatMultNumeric_ScaLAPACK,
1405 0,
1406 0,
1407 /*94*/ 0,
1408 0,
1409 0,
1410 MatMatTransposeMultNumeric_ScaLAPACK,
1411 0,
1412 /*99*/ MatProductSetFromOptions_ScaLAPACK,
1413 0,
1414 0,
1415 MatConjugate_ScaLAPACK,
1416 0,
1417 /*104*/0,
1418 0,
1419 0,
1420 0,
1421 0,
1422 /*109*/MatMatSolve_ScaLAPACK,
1423 0,
1424 0,
1425 0,
1426 MatMissingDiagonal_ScaLAPACK,
1427 /*114*/0,
1428 0,
1429 0,
1430 0,
1431 0,
1432 /*119*/0,
1433 MatHermitianTranspose_ScaLAPACK,
1434 0,
1435 0,
1436 0,
1437 /*124*/0,
1438 0,
1439 0,
1440 0,
1441 0,
1442 /*129*/0,
1443 0,
1444 0,
1445 0,
1446 0,
1447 /*134*/0,
1448 0,
1449 0,
1450 0,
1451 0,
1452 0,
1453 /*140*/0,
1454 0,
1455 0,
1456 0,
1457 0,
1458 /*145*/0,
1459 0,
1460 0
1461 };
1462
MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash * stash,PetscInt * owners)1463 static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1464 {
1465 PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1466 PetscInt size=stash->size,nsends;
1467 PetscErrorCode ierr;
1468 PetscInt count,*sindices,**rindices,i,j,l;
1469 PetscScalar **rvalues,*svalues;
1470 MPI_Comm comm = stash->comm;
1471 MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1472 PetscMPIInt *sizes,*nlengths,nreceives;
1473 PetscInt *sp_idx,*sp_idy;
1474 PetscScalar *sp_val;
1475 PetscMatStashSpace space,space_next;
1476 PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1477 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data;
1478
1479 PetscFunctionBegin;
1480 { /* make sure all processors are either in INSERTMODE or ADDMODE */
1481 InsertMode addv;
1482 ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1483 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1484 mat->insertmode = addv; /* in case this processor had no cache */
1485 }
1486
1487 bs2 = stash->bs*stash->bs;
1488
1489 /* first count number of contributors to each processor */
1490 ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr);
1491 ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr);
1492
1493 i = j = 0;
1494 space = stash->space_head;
1495 while (space) {
1496 space_next = space->next;
1497 for (l=0; l<space->local_used; l++) {
1498 ierr = PetscBLASIntCast(space->idx[l]+1,&gridx);CHKERRQ(ierr);
1499 ierr = PetscBLASIntCast(space->idy[l]+1,&gcidx);CHKERRQ(ierr);
1500 PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1501 j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1502 nlengths[j]++; owner[i] = j;
1503 i++;
1504 }
1505 space = space_next;
1506 }
1507
1508 /* Now check what procs get messages - and compute nsends. */
1509 ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr);
1510 for (i=0, nsends=0; i<size; i++) {
1511 if (nlengths[i]) {
1512 sizes[i] = 1; nsends++;
1513 }
1514 }
1515
1516 {PetscMPIInt *onodes,*olengths;
1517 /* Determine the number of messages to expect, their lengths, from from-ids */
1518 ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr);
1519 ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
1520 /* since clubbing row,col - lengths are multiplied by 2 */
1521 for (i=0; i<nreceives; i++) olengths[i] *=2;
1522 ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
1523 /* values are size 'bs2' lengths (and remove earlier factor 2 */
1524 for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1525 ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
1526 ierr = PetscFree(onodes);CHKERRQ(ierr);
1527 ierr = PetscFree(olengths);CHKERRQ(ierr);}
1528
1529 /* do sends:
1530 1) starts[i] gives the starting index in svalues for stuff going to
1531 the ith processor
1532 */
1533 ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr);
1534 ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr);
1535 ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr);
1536 /* use 2 sends the first with all_a, the next with all_i and all_j */
1537 startv[0] = 0; starti[0] = 0;
1538 for (i=1; i<size; i++) {
1539 startv[i] = startv[i-1] + nlengths[i-1];
1540 starti[i] = starti[i-1] + 2*nlengths[i-1];
1541 }
1542
1543 i = 0;
1544 space = stash->space_head;
1545 while (space) {
1546 space_next = space->next;
1547 sp_idx = space->idx;
1548 sp_idy = space->idy;
1549 sp_val = space->val;
1550 for (l=0; l<space->local_used; l++) {
1551 j = owner[i];
1552 if (bs2 == 1) {
1553 svalues[startv[j]] = sp_val[l];
1554 } else {
1555 PetscInt k;
1556 PetscScalar *buf1,*buf2;
1557 buf1 = svalues+bs2*startv[j];
1558 buf2 = space->val + bs2*l;
1559 for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1560 }
1561 sindices[starti[j]] = sp_idx[l];
1562 sindices[starti[j]+nlengths[j]] = sp_idy[l];
1563 startv[j]++;
1564 starti[j]++;
1565 i++;
1566 }
1567 space = space_next;
1568 }
1569 startv[0] = 0;
1570 for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1571
1572 for (i=0,count=0; i<size; i++) {
1573 if (sizes[i]) {
1574 ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
1575 ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
1576 }
1577 }
1578 #if defined(PETSC_USE_INFO)
1579 ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr);
1580 for (i=0; i<size; i++) {
1581 if (sizes[i]) {
1582 ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr);
1583 }
1584 }
1585 #endif
1586 ierr = PetscFree(nlengths);CHKERRQ(ierr);
1587 ierr = PetscFree(owner);CHKERRQ(ierr);
1588 ierr = PetscFree2(startv,starti);CHKERRQ(ierr);
1589 ierr = PetscFree(sizes);CHKERRQ(ierr);
1590
1591 /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1592 ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr);
1593
1594 for (i=0; i<nreceives; i++) {
1595 recv_waits[2*i] = recv_waits1[i];
1596 recv_waits[2*i+1] = recv_waits2[i];
1597 }
1598 stash->recv_waits = recv_waits;
1599
1600 ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
1601 ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
1602
1603 stash->svalues = svalues;
1604 stash->sindices = sindices;
1605 stash->rvalues = rvalues;
1606 stash->rindices = rindices;
1607 stash->send_waits = send_waits;
1608 stash->nsends = nsends;
1609 stash->nrecvs = nreceives;
1610 stash->reproduce_count = 0;
1611 PetscFunctionReturn(0);
1612 }
1613
MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)1614 static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1615 {
1616 PetscErrorCode ierr;
1617 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1618
1619 PetscFunctionBegin;
1620 if (A->preallocated) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot change block sizes after MatSetUp");
1621 if (mb<1 && mb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"mb %D must be at least 1",mb);
1622 if (nb<1 && nb!=PETSC_DECIDE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nb %D must be at least 1",nb);
1623 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1624 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1625 PetscFunctionReturn(0);
1626 }
1627
1628 /*@
1629 MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1630 the ScaLAPACK matrix
1631
1632 Logically Collective on A
1633
1634 Input Parameter:
1635 + A - a MATSCALAPACK matrix
1636 . mb - the row block size
1637 - nb - the column block size
1638
1639 Level: intermediate
1640
1641 .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1642 @*/
MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)1643 PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1644 {
1645 PetscErrorCode ierr;
1646
1647 PetscFunctionBegin;
1648 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1649 PetscValidLogicalCollectiveInt(A,mb,2);
1650 PetscValidLogicalCollectiveInt(A,nb,3);
1651 ierr = PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));CHKERRQ(ierr);
1652 PetscFunctionReturn(0);
1653 }
1654
MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt * mb,PetscInt * nb)1655 static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1656 {
1657 Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1658
1659 PetscFunctionBegin;
1660 if (mb) *mb = a->mb;
1661 if (nb) *nb = a->nb;
1662 PetscFunctionReturn(0);
1663 }
1664
1665 /*@
1666 MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1667 the ScaLAPACK matrix
1668
1669 Not collective
1670
1671 Input Parameter:
1672 . A - a MATSCALAPACK matrix
1673
1674 Output Parameters:
1675 + mb - the row block size
1676 - nb - the column block size
1677
1678 Level: intermediate
1679
1680 .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1681 @*/
MatScaLAPACKGetBlockSizes(Mat A,PetscInt * mb,PetscInt * nb)1682 PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1683 {
1684 PetscErrorCode ierr;
1685
1686 PetscFunctionBegin;
1687 PetscValidHeaderSpecific(A,MAT_CLASSID,1);
1688 ierr = PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));CHKERRQ(ierr);
1689 PetscFunctionReturn(0);
1690 }
1691
1692 PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1693 PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1694
1695 /*MC
1696 MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1697
1698 Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1699
1700 Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1701
1702 Options Database Keys:
1703 + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1704 . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1705 - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1706
1707 Level: beginner
1708
1709 .seealso: MATDENSE, MATELEMENTAL
1710 M*/
1711
MatCreate_ScaLAPACK(Mat A)1712 PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1713 {
1714 Mat_ScaLAPACK *a;
1715 PetscErrorCode ierr;
1716 PetscBool flg,flg1;
1717 Mat_ScaLAPACK_Grid *grid;
1718 MPI_Comm icomm;
1719 PetscBLASInt nprow,npcol,myrow,mycol;
1720 PetscInt optv1,k=2,array[2]={0,0};
1721 PetscMPIInt size;
1722
1723 PetscFunctionBegin;
1724 ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1725 A->insertmode = NOT_SET_VALUES;
1726
1727 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);CHKERRQ(ierr);
1728 A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1729 A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1730 A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1731 A->stash.ScatterDestroy = NULL;
1732
1733 ierr = PetscNewLog(A,&a);CHKERRQ(ierr);
1734 A->data = (void*)a;
1735
1736 /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1737 if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1738 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);CHKERRQ(ierr);
1739 }
1740 ierr = PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);CHKERRQ(ierr);
1741 ierr = MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);CHKERRQ(ierr);
1742 if (!flg) {
1743 ierr = PetscNewLog(A,&grid);CHKERRQ(ierr);
1744
1745 ierr = MPI_Comm_size(icomm,&size);CHKERRQ(ierr);
1746 grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1747
1748 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");CHKERRQ(ierr);
1749 ierr = PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);CHKERRQ(ierr);
1750 if (flg1) {
1751 if (size % optv1) SETERRQ2(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_INCOMP,"Grid Height %D must evenly divide CommSize %D",optv1,size);
1752 grid->nprow = optv1;
1753 }
1754 ierr = PetscOptionsEnd();CHKERRQ(ierr);
1755
1756 if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1757 grid->npcol = size/grid->nprow;
1758 ierr = PetscBLASIntCast(grid->nprow,&nprow);CHKERRQ(ierr);
1759 ierr = PetscBLASIntCast(grid->npcol,&npcol);CHKERRQ(ierr);
1760 Cblacs_get(-1,0,&grid->ictxt);
1761 Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1762 Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1763 grid->grid_refct = 1;
1764 grid->nprow = nprow;
1765 grid->npcol = npcol;
1766 grid->myrow = myrow;
1767 grid->mycol = mycol;
1768 /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1769 Cblacs_get(-1,0,&grid->ictxrow);
1770 Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1771 Cblacs_get(-1,0,&grid->ictxcol);
1772 Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1773 ierr = MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);CHKERRQ(ierr);
1774
1775 } else grid->grid_refct++;
1776 ierr = PetscCommDestroy(&icomm);CHKERRQ(ierr);
1777 a->grid = grid;
1778 a->mb = DEFAULT_BLOCKSIZE;
1779 a->nb = DEFAULT_BLOCKSIZE;
1780
1781 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");CHKERRQ(ierr);
1782 ierr = PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);CHKERRQ(ierr);
1783 if (flg) {
1784 a->mb = array[0];
1785 a->nb = (k>1)? array[1]: a->mb;
1786 }
1787 ierr = PetscOptionsEnd();CHKERRQ(ierr);
1788
1789 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);CHKERRQ(ierr);
1790 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1791 ierr = PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);CHKERRQ(ierr);
1792 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);CHKERRQ(ierr);
1793 PetscFunctionReturn(0);
1794 }
1795
1796 /*@C
1797 MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1798 (2D block cyclic distribution).
1799
1800 Collective
1801
1802 Input Parameters:
1803 + comm - MPI communicator
1804 . mb - row block size (or PETSC_DECIDE to have it set)
1805 . nb - column block size (or PETSC_DECIDE to have it set)
1806 . M - number of global rows
1807 . N - number of global columns
1808 . rsrc - coordinate of process that owns the first row of the distributed matrix
1809 - csrc - coordinate of process that owns the first column of the distributed matrix
1810
1811 Output Parameter:
1812 . A - the matrix
1813
1814 Options Database Keys:
1815 . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1816
1817 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1818 MatXXXXSetPreallocation() paradigm instead of this routine directly.
1819 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1820
1821 Notes:
1822 If PETSC_DECIDE is used for the block sizes, then an appropriate value
1823 is chosen.
1824
1825 Storage Information:
1826 Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1827 configured with ScaLAPACK. In particular, PETSc's local sizes lose
1828 significance and are thus ignored. The block sizes refer to the values
1829 used for the distributed matrix, not the same meaning as in BAIJ.
1830
1831 Level: intermediate
1832
1833 .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1834 @*/
MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat * A)1835 PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1836 {
1837 PetscErrorCode ierr;
1838 Mat_ScaLAPACK *a;
1839 PetscInt m,n;
1840
1841 PetscFunctionBegin;
1842 ierr = MatCreate(comm,A);CHKERRQ(ierr);
1843 ierr = MatSetType(*A,MATSCALAPACK);CHKERRQ(ierr);
1844 if (M==PETSC_DECIDE || N==PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot use PETSC_DECIDE for matrix dimensions");
1845 /* rows and columns are NOT distributed according to PetscSplitOwnership */
1846 m = PETSC_DECIDE;
1847 ierr = PetscSplitOwnershipEqual(comm,&m,&M);CHKERRQ(ierr);
1848 n = PETSC_DECIDE;
1849 ierr = PetscSplitOwnershipEqual(comm,&n,&N);CHKERRQ(ierr);
1850 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1851 a = (Mat_ScaLAPACK*)(*A)->data;
1852 ierr = PetscBLASIntCast(M,&a->M);CHKERRQ(ierr);
1853 ierr = PetscBLASIntCast(N,&a->N);CHKERRQ(ierr);
1854 ierr = PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);CHKERRQ(ierr);
1855 ierr = PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);CHKERRQ(ierr);
1856 ierr = PetscBLASIntCast(rsrc,&a->rsrc);CHKERRQ(ierr);
1857 ierr = PetscBLASIntCast(csrc,&a->csrc);CHKERRQ(ierr);
1858 ierr = MatSetUp(*A);CHKERRQ(ierr);
1859 PetscFunctionReturn(0);
1860 }
1861
1862