1
2 /*
3 Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7
MatSetUpMultiply_MPIBAIJ(Mat mat)8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data);
12 PetscErrorCode ierr;
13 PetscInt i,j,*aj = B->j,ec = 0,*garray;
14 PetscInt bs = mat->rmap->bs,*stmp;
15 IS from,to;
16 Vec gvec;
17 #if defined(PETSC_USE_CTABLE)
18 PetscTable gid1_lid1;
19 PetscTablePosition tpos;
20 PetscInt gid,lid;
21 #else
22 PetscInt Nbs = baij->Nbs,*indices;
23 #endif
24
25 PetscFunctionBegin;
26 #if defined(PETSC_USE_CTABLE)
27 /* use a table - Mark Adams */
28 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
29 for (i=0; i<B->mbs; i++) {
30 for (j=0; j<B->ilen[i]; j++) {
31 PetscInt data,gid1 = aj[B->i[i]+j] + 1;
32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
33 if (!data) {
34 /* one based table */
35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
36 }
37 }
38 }
39 /* form array of columns we need */
40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
42 while (tpos) {
43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
44 gid--; lid--;
45 garray[lid] = gid;
46 }
47 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
48 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
49 for (i=0; i<ec; i++) {
50 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
51 }
52 /* compact out the extra columns in B */
53 for (i=0; i<B->mbs; i++) {
54 for (j=0; j<B->ilen[i]; j++) {
55 PetscInt gid1 = aj[B->i[i] + j] + 1;
56 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
57 lid--;
58 aj[B->i[i]+j] = lid;
59 }
60 }
61 B->nbs = ec;
62 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr);
63 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr);
64 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
65 #else
66 /* Make an array as long as the number of columns */
67 /* mark those columns that are in baij->B */
68 ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr);
69 for (i=0; i<B->mbs; i++) {
70 for (j=0; j<B->ilen[i]; j++) {
71 if (!indices[aj[B->i[i] + j]]) ec++;
72 indices[aj[B->i[i] + j]] = 1;
73 }
74 }
75
76 /* form array of columns we need */
77 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
78 ec = 0;
79 for (i=0; i<Nbs; i++) {
80 if (indices[i]) {
81 garray[ec++] = i;
82 }
83 }
84
85 /* make indices now point into garray */
86 for (i=0; i<ec; i++) {
87 indices[garray[i]] = i;
88 }
89
90 /* compact out the extra columns in B */
91 for (i=0; i<B->mbs; i++) {
92 for (j=0; j<B->ilen[i]; j++) {
93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94 }
95 }
96 B->nbs = ec;
97 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr);
98 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr);
99 ierr = PetscFree(indices);CHKERRQ(ierr);
100 #endif
101
102 /* create local vector that is used to scatter into */
103 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
104
105 /* create two temporary index sets for building scatter-gather */
106 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
107
108 ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr);
109 for (i=0; i<ec; i++) stmp[i] = i;
110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
111
112 /* create temporary global vector to generate scatter context */
113 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
114
115 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
116
117 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
118 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
120 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
121
122 baij->garray = garray;
123
124 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
125 ierr = ISDestroy(&from);CHKERRQ(ierr);
126 ierr = ISDestroy(&to);CHKERRQ(ierr);
127 ierr = VecDestroy(&gvec);CHKERRQ(ierr);
128 PetscFunctionReturn(0);
129 }
130
131 /*
132 Takes the local part of an already assembled MPIBAIJ matrix
133 and disassembles it. This is to allow new nonzeros into the matrix
134 that require more communication in the matrix vector multiply.
135 Thus certain data-structures must be rebuilt.
136
137 Kind of slow! But that's what application programmers get when
138 they are sloppy.
139 */
MatDisAssemble_MPIBAIJ(Mat A)140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
141 {
142 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
143 Mat B = baij->B,Bnew;
144 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
145 PetscErrorCode ierr;
146 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
147 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
148 MatScalar *a = Bbaij->a;
149 MatScalar *atmp;
150
151
152 PetscFunctionBegin;
153 /* free stuff related to matrix-vec multiply */
154 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
155 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = NULL;
156 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = NULL;
157 if (baij->colmap) {
158 #if defined(PETSC_USE_CTABLE)
159 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
160 #else
161 ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
162 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
163 #endif
164 }
165
166 /* make sure that B is assembled so we can access its values */
167 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
168 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
169
170 /* invent new B and copy stuff over */
171 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
172 for (i=0; i<mbs; i++) {
173 nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
174 }
175 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr);
176 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
177 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
178 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
179 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
180 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
181 }
182
183 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
184 /*
185 Ensure that B's nonzerostate is monotonically increasing.
186 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
187 */
188 Bnew->nonzerostate = B->nonzerostate;
189
190 for (i=0; i<mbs; i++) {
191 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
192 col = garray[Bbaij->j[j]];
193 atmp = a + j*bs2;
194 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
195 }
196 }
197 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
198
199 ierr = PetscFree(nz);CHKERRQ(ierr);
200 ierr = PetscFree(baij->garray);CHKERRQ(ierr);
201 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
202 ierr = MatDestroy(&B);CHKERRQ(ierr);
203 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
204
205 baij->B = Bnew;
206 A->was_assembled = PETSC_FALSE;
207 A->assembled = PETSC_FALSE;
208 PetscFunctionReturn(0);
209 }
210
211 /* ugly stuff added for Glenn someday we should fix this up */
212
213 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
214 static Vec uglydd = NULL,uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
215
216
MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)217 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
218 {
219 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
220 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data;
221 PetscErrorCode ierr;
222 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
223 PetscInt *r_rmapd,*r_rmapo;
224
225 PetscFunctionBegin;
226 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
227 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
228 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr);
229 nt = 0;
230 for (i=0; i<inA->rmap->mapping->n; i++) {
231 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
232 nt++;
233 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
234 }
235 }
236 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
237 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr);
238 for (i=0; i<inA->rmap->mapping->n; i++) {
239 if (r_rmapd[i]) {
240 for (j=0; j<bs; j++) {
241 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
242 }
243 }
244 }
245 ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
246 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
247
248 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr);
249 for (i=0; i<B->nbs; i++) {
250 lindices[garray[i]] = i+1;
251 }
252 no = inA->rmap->mapping->n - nt;
253 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr);
254 nt = 0;
255 for (i=0; i<inA->rmap->mapping->n; i++) {
256 if (lindices[inA->rmap->mapping->indices[i]]) {
257 nt++;
258 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
259 }
260 }
261 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
262 ierr = PetscFree(lindices);CHKERRQ(ierr);
263 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr);
264 for (i=0; i<inA->rmap->mapping->n; i++) {
265 if (r_rmapo[i]) {
266 for (j=0; j<bs; j++) {
267 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
268 }
269 }
270 }
271 ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
272 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
273 PetscFunctionReturn(0);
274 }
275
MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)276 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
277 {
278 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
279 PetscErrorCode ierr;
280
281 PetscFunctionBegin;
282 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
283 PetscFunctionReturn(0);
284 }
285
MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)286 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
287 {
288 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
289 PetscErrorCode ierr;
290 PetscInt n,i;
291 PetscScalar *d,*o;
292 const PetscScalar *s;
293
294 PetscFunctionBegin;
295 if (!uglyrmapd) {
296 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
297 }
298
299 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr);
300
301 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
302 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
303 for (i=0; i<n; i++) {
304 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
305 }
306 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
307 /* column scale "diagonal" portion of local matrix */
308 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr);
309
310 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
311 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
312 for (i=0; i<n; i++) {
313 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
314 }
315 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr);
316 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
317 /* column scale "off-diagonal" portion of local matrix */
318 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr);
319 PetscFunctionReturn(0);
320 }
321