1 
2 /*
3    Support for the parallel BAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/baij/mpi/mpibaij.h>
6 #include <petsc/private/isimpl.h>    /* needed because accesses data structure of ISLocalToGlobalMapping directly */
7 
MatSetUpMultiply_MPIBAIJ(Mat mat)8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9 {
10   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
11   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)(baij->B->data);
12   PetscErrorCode ierr;
13   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
14   PetscInt       bs = mat->rmap->bs,*stmp;
15   IS             from,to;
16   Vec            gvec;
17 #if defined(PETSC_USE_CTABLE)
18   PetscTable         gid1_lid1;
19   PetscTablePosition tpos;
20   PetscInt           gid,lid;
21 #else
22   PetscInt Nbs = baij->Nbs,*indices;
23 #endif
24 
25   PetscFunctionBegin;
26 #if defined(PETSC_USE_CTABLE)
27   /* use a table - Mark Adams */
28   ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr);
29   for (i=0; i<B->mbs; i++) {
30     for (j=0; j<B->ilen[i]; j++) {
31       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
32       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
33       if (!data) {
34         /* one based table */
35         ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);
36       }
37     }
38   }
39   /* form array of columns we need */
40   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
41   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
42   while (tpos) {
43     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
44     gid--; lid--;
45     garray[lid] = gid;
46   }
47   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
48   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
49   for (i=0; i<ec; i++) {
50     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
51   }
52   /* compact out the extra columns in B */
53   for (i=0; i<B->mbs; i++) {
54     for (j=0; j<B->ilen[i]; j++) {
55       PetscInt gid1 = aj[B->i[i] + j] + 1;
56       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
57       lid--;
58       aj[B->i[i]+j] = lid;
59     }
60   }
61   B->nbs           = ec;
62   ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr);
63   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr);
64   ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr);
65 #else
66   /* Make an array as long as the number of columns */
67   /* mark those columns that are in baij->B */
68   ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr);
69   for (i=0; i<B->mbs; i++) {
70     for (j=0; j<B->ilen[i]; j++) {
71       if (!indices[aj[B->i[i] + j]]) ec++;
72       indices[aj[B->i[i] + j]] = 1;
73     }
74   }
75 
76   /* form array of columns we need */
77   ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr);
78   ec   = 0;
79   for (i=0; i<Nbs; i++) {
80     if (indices[i]) {
81       garray[ec++] = i;
82     }
83   }
84 
85   /* make indices now point into garray */
86   for (i=0; i<ec; i++) {
87     indices[garray[i]] = i;
88   }
89 
90   /* compact out the extra columns in B */
91   for (i=0; i<B->mbs; i++) {
92     for (j=0; j<B->ilen[i]; j++) {
93       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
94     }
95   }
96   B->nbs           = ec;
97   ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr);
98   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr);
99   ierr = PetscFree(indices);CHKERRQ(ierr);
100 #endif
101 
102   /* create local vector that is used to scatter into */
103   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
104 
105   /* create two temporary index sets for building scatter-gather */
106   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
107 
108   ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr);
109   for (i=0; i<ec; i++) stmp[i] = i;
110   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr);
111 
112   /* create temporary global vector to generate scatter context */
113   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
114 
115   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
116 
117   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr);
118   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr);
119   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
120   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
121 
122   baij->garray = garray;
123 
124   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = ISDestroy(&from);CHKERRQ(ierr);
126   ierr = ISDestroy(&to);CHKERRQ(ierr);
127   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /*
132      Takes the local part of an already assembled MPIBAIJ matrix
133    and disassembles it. This is to allow new nonzeros into the matrix
134    that require more communication in the matrix vector multiply.
135    Thus certain data-structures must be rebuilt.
136 
137    Kind of slow! But that's what application programmers get when
138    they are sloppy.
139 */
MatDisAssemble_MPIBAIJ(Mat A)140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
141 {
142   Mat_MPIBAIJ    *baij  = (Mat_MPIBAIJ*)A->data;
143   Mat            B      = baij->B,Bnew;
144   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
145   PetscErrorCode ierr;
146   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
147   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->rmap->n;
148   MatScalar      *a  = Bbaij->a;
149   MatScalar      *atmp;
150 
151 
152   PetscFunctionBegin;
153   /* free stuff related to matrix-vec multiply */
154   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
155   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = NULL;
156   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = NULL;
157   if (baij->colmap) {
158 #if defined(PETSC_USE_CTABLE)
159     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
160 #else
161     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
162     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
163 #endif
164   }
165 
166   /* make sure that B is assembled so we can access its values */
167   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
168   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
169 
170   /* invent new B and copy stuff over */
171   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
172   for (i=0; i<mbs; i++) {
173     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
174   }
175   ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr);
176   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
177   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
178   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
179   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
180     ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
181   }
182 
183   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
184   /*
185    Ensure that B's nonzerostate is monotonically increasing.
186    Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
187    */
188   Bnew->nonzerostate = B->nonzerostate;
189 
190   for (i=0; i<mbs; i++) {
191     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
192       col  = garray[Bbaij->j[j]];
193       atmp = a + j*bs2;
194       ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
195     }
196   }
197   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
198 
199   ierr = PetscFree(nz);CHKERRQ(ierr);
200   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
201   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
202   ierr = MatDestroy(&B);CHKERRQ(ierr);
203   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
204 
205   baij->B          = Bnew;
206   A->was_assembled = PETSC_FALSE;
207   A->assembled     = PETSC_FALSE;
208   PetscFunctionReturn(0);
209 }
210 
211 /*      ugly stuff added for Glenn someday we should fix this up */
212 
213 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL;  /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
214 static Vec      uglydd     = NULL,uglyoo     = NULL;  /* work vectors used to scale the two parts of the local matrix */
215 
216 
MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)217 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
218 {
219   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
220   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)ina->B->data;
221   PetscErrorCode ierr;
222   PetscInt       bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
223   PetscInt       *r_rmapd,*r_rmapo;
224 
225   PetscFunctionBegin;
226   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
227   ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr);
228   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr);
229   nt   = 0;
230   for (i=0; i<inA->rmap->mapping->n; i++) {
231     if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) {
232       nt++;
233       r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
234     }
235   }
236   if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
237   ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr);
238   for (i=0; i<inA->rmap->mapping->n; i++) {
239     if (r_rmapd[i]) {
240       for (j=0; j<bs; j++) {
241         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
242       }
243     }
244   }
245   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
246   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
247 
248   ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr);
249   for (i=0; i<B->nbs; i++) {
250     lindices[garray[i]] = i+1;
251   }
252   no   = inA->rmap->mapping->n - nt;
253   ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr);
254   nt   = 0;
255   for (i=0; i<inA->rmap->mapping->n; i++) {
256     if (lindices[inA->rmap->mapping->indices[i]]) {
257       nt++;
258       r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
259     }
260   }
261   if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
262   ierr = PetscFree(lindices);CHKERRQ(ierr);
263   ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr);
264   for (i=0; i<inA->rmap->mapping->n; i++) {
265     if (r_rmapo[i]) {
266       for (j=0; j<bs; j++) {
267         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
268       }
269     }
270   }
271   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
272   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)276 PetscErrorCode  MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
277 {
278   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
279   PetscErrorCode ierr;
280 
281   PetscFunctionBegin;
282   ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr);
283   PetscFunctionReturn(0);
284 }
285 
MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)286 PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
287 {
288   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
289   PetscErrorCode    ierr;
290   PetscInt          n,i;
291   PetscScalar       *d,*o;
292   const PetscScalar *s;
293 
294   PetscFunctionBegin;
295   if (!uglyrmapd) {
296     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
297   }
298 
299   ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr);
300 
301   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
302   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
303   for (i=0; i<n; i++) {
304     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
305   }
306   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
307   /* column scale "diagonal" portion of local matrix */
308   ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr);
309 
310   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
311   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
312   for (i=0; i<n; i++) {
313     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
314   }
315   ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr);
316   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
317   /* column scale "off-diagonal" portion of local matrix */
318   ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321