1 
2 #include <petsc/private/matimpl.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/aij/seq/aij.h>
4 #include <../src/mat/impls/aij/mpi/mpiaij.h>
5 
6 /* linked list methods
7  *
8  *  PetscCDCreate
9  */
PetscCDCreate(PetscInt a_size,PetscCoarsenData ** a_out)10 PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
11 {
12   PetscErrorCode   ierr;
13   PetscCoarsenData *ail;
14 
15   PetscFunctionBegin;
16   /* alocate pool, partially */
17   ierr                 = PetscNew(&ail);CHKERRQ(ierr);
18   *a_out               = ail;
19   ail->pool_list.next  = NULL;
20   ail->pool_list.array = NULL;
21   ail->chk_sz          = 0;
22   /* allocate array */
23   ail->size            = a_size;
24   ierr                 = PetscCalloc1(a_size, &ail->array);CHKERRQ(ierr);
25   ail->extra_nodes     = NULL;
26   ail->mat             = NULL;
27   PetscFunctionReturn(0);
28 }
29 
30 /* NPDestroy
31  */
PetscCDDestroy(PetscCoarsenData * ail)32 PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
33 {
34   PetscErrorCode ierr;
35   PetscCDArrNd   *n = &ail->pool_list;
36 
37   PetscFunctionBegin;
38   n = n->next;
39   while (n) {
40     PetscCDArrNd *lstn = n;
41     n    = n->next;
42     ierr = PetscFree(lstn);CHKERRQ(ierr);
43   }
44   if (ail->pool_list.array) {
45     ierr = PetscFree(ail->pool_list.array);CHKERRQ(ierr);
46   }
47   ierr = PetscFree(ail->array);CHKERRQ(ierr);
48   /* delete this (+agg+pool array) */
49   ierr = PetscFree(ail);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 /* PetscCDSetChuckSize
54  */
PetscCDSetChuckSize(PetscCoarsenData * ail,PetscInt a_sz)55 PetscErrorCode PetscCDSetChuckSize(PetscCoarsenData *ail, PetscInt a_sz)
56 {
57   PetscFunctionBegin;
58   ail->chk_sz = a_sz;
59   PetscFunctionReturn(0);
60 }
61 
62 /*  PetscCDGetNewNode
63  */
PetscCDGetNewNode(PetscCoarsenData * ail,PetscCDIntNd ** a_out,PetscInt a_id)64 PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
65 {
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   *a_out = NULL;                /* squelch -Wmaybe-uninitialized */
70   if (ail->extra_nodes) {
71     PetscCDIntNd *node = ail->extra_nodes;
72     ail->extra_nodes = node->next;
73     node->gid        = a_id;
74     node->next       = NULL;
75     *a_out           = node;
76   } else {
77     if (!ail->pool_list.array) {
78       if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
79       ierr                = PetscMalloc1(ail->chk_sz, &ail->pool_list.array);CHKERRQ(ierr);
80       ail->new_node       = ail->pool_list.array;
81       ail->new_left       = ail->chk_sz;
82       ail->new_node->next = NULL;
83     } else if (!ail->new_left) {
84       PetscCDArrNd *node;
85       ierr                = PetscMalloc(ail->chk_sz*sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node);CHKERRQ(ierr);
86       node->array         = (PetscCDIntNd*)(node + 1);
87       node->next          = ail->pool_list.next;
88       ail->pool_list.next = node;
89       ail->new_left       = ail->chk_sz;
90       ail->new_node       = node->array;
91     }
92     ail->new_node->gid  = a_id;
93     ail->new_node->next = NULL;
94     *a_out              = ail->new_node++; ail->new_left--;
95   }
96   PetscFunctionReturn(0);
97 }
98 
99 /* PetscCDIntNdSetID
100  */
PetscCDIntNdSetID(PetscCDIntNd * a_this,PetscInt a_id)101 PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
102 {
103   PetscFunctionBegin;
104   a_this->gid = a_id;
105   PetscFunctionReturn(0);
106 }
107 
108 /* PetscCDIntNdGetID
109  */
PetscCDIntNdGetID(const PetscCDIntNd * a_this,PetscInt * a_gid)110 PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
111 {
112   PetscFunctionBegin;
113   *a_gid = a_this->gid;
114   PetscFunctionReturn(0);
115 }
116 
117 /* PetscCDGetHeadPos
118  */
PetscCDGetHeadPos(const PetscCoarsenData * ail,PetscInt a_idx,PetscCDIntNd ** pos)119 PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
120 {
121   PetscFunctionBegin;
122   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"a_idx >= ail->size: a_idx=%D.",a_idx);
123   *pos = ail->array[a_idx];
124   PetscFunctionReturn(0);
125 }
126 
127 /* PetscCDGetNextPos
128  */
PetscCDGetNextPos(const PetscCoarsenData * ail,PetscInt l_idx,PetscCDIntNd ** pos)129 PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
130 {
131   PetscFunctionBegin;
132   if (!(*pos)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NULL input position.");
133   *pos = (*pos)->next;
134   PetscFunctionReturn(0);
135 }
136 
137 /* PetscCDAppendID
138  */
PetscCDAppendID(PetscCoarsenData * ail,PetscInt a_idx,PetscInt a_id)139 PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
140 {
141   PetscErrorCode ierr;
142   PetscCDIntNd   *n,*n2;
143 
144   PetscFunctionBegin;
145   ierr = PetscCDGetNewNode(ail, &n, a_id);CHKERRQ(ierr);
146   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_idx);
147   if (!(n2=ail->array[a_idx])) ail->array[a_idx] = n;
148   else {
149     do {
150       if (!n2->next) {
151         n2->next = n;
152         if (n->next) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"n should not have a next");
153         break;
154       }
155       n2 = n2->next;
156     } while (n2);
157     if (!n2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"n2 should be non-null");
158   }
159   PetscFunctionReturn(0);
160 }
161 
162 /* PetscCDAppendNode
163  */
PetscCDAppendNode(PetscCoarsenData * ail,PetscInt a_idx,PetscCDIntNd * a_n)164 PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx,  PetscCDIntNd *a_n)
165 {
166   PetscCDIntNd *n2;
167 
168   PetscFunctionBegin;
169   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_idx);
170   if (!(n2=ail->array[a_idx])) ail->array[a_idx] = a_n;
171   else {
172     do {
173       if (!n2->next) {
174         n2->next  = a_n;
175         a_n->next = NULL;
176         break;
177       }
178       n2 = n2->next;
179     } while (n2);
180     if (!n2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"n2 should be non-null");
181   }
182   PetscFunctionReturn(0);
183 }
184 
185 /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API
186  */
PetscCDRemoveNextNode(PetscCoarsenData * ail,PetscInt a_idx,PetscCDIntNd * a_last)187 PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx,  PetscCDIntNd *a_last)
188 {
189   PetscCDIntNd *del;
190 
191   PetscFunctionBegin;
192   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_idx);
193   if (!a_last->next) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"a_last should have a next");
194   del          = a_last->next;
195   a_last->next = del->next;
196   /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
197   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
198   PetscFunctionReturn(0);
199 }
200 
201 /* PetscCDPrint
202  */
PetscCDPrint(const PetscCoarsenData * ail,MPI_Comm comm)203 PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, MPI_Comm comm)
204 {
205   PetscErrorCode ierr;
206   PetscCDIntNd   *n;
207   PetscInt       ii,kk;
208   PetscMPIInt    rank;
209 
210   PetscFunctionBegin;
211   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
212   for (ii=0; ii<ail->size; ii++) {
213     kk = 0;
214     n  = ail->array[ii];
215     if (n) {ierr = PetscPrintf(comm,"[%d]%s list %d:\n",rank,PETSC_FUNCTION_NAME,ii);CHKERRQ(ierr);}
216     while (n) {
217       ierr = PetscPrintf(comm,"\t[%d] %D) id %D\n",rank,++kk,n->gid);CHKERRQ(ierr);
218       n = n->next;
219     }
220   }
221   PetscFunctionReturn(0);
222 }
223 
224 /* PetscCDAppendRemove
225  */
PetscCDAppendRemove(PetscCoarsenData * ail,PetscInt a_destidx,PetscInt a_srcidx)226 PetscErrorCode PetscCDAppendRemove(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
227 {
228   PetscCDIntNd *n;
229 
230   PetscFunctionBegin;
231   if (a_srcidx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_srcidx);
232   if (a_destidx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_destidx);
233   if (a_destidx==a_srcidx) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"a_destidx==a_srcidx %D.",a_destidx);
234   n = ail->array[a_destidx];
235   if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
236   else {
237     do {
238       if (!n->next) {
239         n->next = ail->array[a_srcidx];
240         break;
241       }
242       n = n->next;
243     } while (1);
244   }
245   ail->array[a_srcidx] = NULL;
246   PetscFunctionReturn(0);
247 }
248 
249 /* PetscCDRemoveAll
250  */
PetscCDRemoveAll(PetscCoarsenData * ail,PetscInt a_idx)251 PetscErrorCode PetscCDRemoveAll(PetscCoarsenData *ail, PetscInt a_idx)
252 {
253   PetscCDIntNd *rem,*n1;
254 
255   PetscFunctionBegin;
256   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_idx);
257   rem               = ail->array[a_idx];
258   ail->array[a_idx] = NULL;
259   if (!(n1=ail->extra_nodes)) ail->extra_nodes = rem;
260   else {
261     while (n1->next) n1 = n1->next;
262     n1->next = rem;
263   }
264   PetscFunctionReturn(0);
265 }
266 
267 /* PetscCDSizeAt
268  */
PetscCDSizeAt(const PetscCoarsenData * ail,PetscInt a_idx,PetscInt * a_sz)269 PetscErrorCode PetscCDSizeAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
270 {
271   PetscCDIntNd *n1;
272   PetscInt     sz = 0;
273 
274   PetscFunctionBegin;
275   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_idx);
276   n1 = ail->array[a_idx];
277   while (n1) {
278     n1 = n1->next;
279     sz++;
280   }
281   *a_sz = sz;
282   PetscFunctionReturn(0);
283 }
284 
285 /* PetscCDEmptyAt
286  */
PetscCDEmptyAt(const PetscCoarsenData * ail,PetscInt a_idx,PetscBool * a_e)287 PetscErrorCode PetscCDEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
288 {
289   PetscFunctionBegin;
290   if (a_idx>=ail->size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Index %D out of range.",a_idx);
291   *a_e = (PetscBool)(ail->array[a_idx]==NULL);
292   PetscFunctionReturn(0);
293 }
294 
295 /* PetscCDGetMIS
296  */
PetscCDGetMIS(PetscCoarsenData * ail,IS * a_mis)297 PetscErrorCode PetscCDGetMIS(PetscCoarsenData *ail, IS *a_mis)
298 {
299   PetscErrorCode ierr;
300   PetscCDIntNd   *n;
301   PetscInt       ii,kk;
302   PetscInt       *permute;
303 
304   PetscFunctionBegin;
305   for (ii=kk=0; ii<ail->size; ii++) {
306     n = ail->array[ii];
307     if (n) kk++;
308   }
309   ierr = PetscMalloc1(kk, &permute);CHKERRQ(ierr);
310   for (ii=kk=0; ii<ail->size; ii++) {
311     n = ail->array[ii];
312     if (n) permute[kk++] = ii;
313   }
314   ierr = ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 /* PetscCDGetMat
319  */
PetscCDGetMat(const PetscCoarsenData * ail,Mat * a_mat)320 PetscErrorCode PetscCDGetMat(const PetscCoarsenData *ail, Mat *a_mat)
321 {
322   PetscFunctionBegin;
323   *a_mat = ail->mat;
324   PetscFunctionReturn(0);
325 }
326 
327 /* PetscCDSetMat
328  */
PetscCDSetMat(PetscCoarsenData * ail,Mat a_mat)329 PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
330 {
331   PetscFunctionBegin;
332   ail->mat = a_mat;
333   PetscFunctionReturn(0);
334 }
335 
336 /* PetscCDGetASMBlocks
337  */
PetscCDGetASMBlocks(const PetscCoarsenData * ail,const PetscInt a_bs,Mat mat,PetscInt * a_sz,IS ** a_local_is)338 PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, Mat mat, PetscInt *a_sz, IS **a_local_is)
339 {
340   PetscErrorCode ierr;
341   PetscCDIntNd   *n;
342   PetscInt       lsz,ii,kk,*idxs,jj,s,e,gid;
343   IS             *is_loc,is_bcs;
344 
345   PetscFunctionBegin;
346   for (ii=kk=0; ii<ail->size; ii++) {
347     if (ail->array[ii]) kk++;
348   }
349   /* count BCs */
350   ierr = MatGetOwnershipRange(mat, &s, &e);CHKERRQ(ierr);
351   for (gid=s,lsz=0; gid<e; gid++) {
352     ierr = MatGetRow(mat,gid,&jj,NULL,NULL);CHKERRQ(ierr);
353     if (jj<2) lsz++;
354     ierr = MatRestoreRow(mat,gid,&jj,NULL,NULL);CHKERRQ(ierr);
355   }
356   if (lsz) {
357     ierr = PetscMalloc1(a_bs*lsz, &idxs);CHKERRQ(ierr);
358     for (gid=s,lsz=0; gid<e; gid++) {
359       ierr = MatGetRow(mat,gid,&jj,NULL,NULL);CHKERRQ(ierr);
360       if (jj<2) {
361         for (jj=0; jj<a_bs; lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
362       }
363       ierr = MatRestoreRow(mat,gid,&jj,NULL,NULL);CHKERRQ(ierr);
364     }
365     ierr = ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_bcs);CHKERRQ(ierr);
366     *a_sz = kk + 1; /* out */
367   } else {
368     is_bcs=NULL;
369     *a_sz = kk; /* out */
370   }
371   ierr = PetscMalloc1(*a_sz, &is_loc);CHKERRQ(ierr);
372 
373   for (ii=kk=0; ii<ail->size; ii++) {
374     for (lsz=0, n=ail->array[ii]; n; lsz++, n=n->next) /* void */;
375     if (lsz) {
376       ierr = PetscMalloc1(a_bs*lsz, &idxs);CHKERRQ(ierr);
377       for (lsz = 0, n=ail->array[ii]; n; n = n->next) {
378         ierr = PetscCDIntNdGetID(n, &gid);CHKERRQ(ierr);
379         for (jj=0; jj<a_bs; lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
380       }
381       ierr = ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]);CHKERRQ(ierr);
382     }
383   }
384   if (is_bcs) {
385     is_loc[kk++] = is_bcs;
386   }
387   if (*a_sz != kk) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"*a_sz %D != kk %D",*a_sz,kk);
388   *a_local_is = is_loc; /* out */
389 
390   PetscFunctionReturn(0);
391 }
392 
393 /* ********************************************************************** */
394 /* edge for priority queue */
395 typedef struct edge_tag {
396   PetscReal weight;
397   PetscInt  lid0,gid1,cpid1;
398 } Edge;
399 
gamg_hem_compare(const void * a,const void * b)400 static int gamg_hem_compare(const void *a, const void *b)
401 {
402   PetscReal va = ((Edge*)a)->weight, vb = ((Edge*)b)->weight;
403   return (va < vb) ? 1 : (va == vb) ? 0 : -1; /* 0 for equal */
404 }
405 
406 /* -------------------------------------------------------------------------- */
407 /*
408    heavyEdgeMatchAgg - parallel heavy edge matching (HEM). MatAIJ specific!!!
409 
410    Input Parameter:
411    . perm - permutation
412    . a_Gmat - global matrix of graph (data not defined)
413 
414    Output Parameter:
415    . a_locals_llist - array of list of local nodes rooted at local node
416 */
heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscCoarsenData ** a_locals_llist)417 static PetscErrorCode heavyEdgeMatchAgg(IS perm,Mat a_Gmat,PetscCoarsenData **a_locals_llist)
418 {
419   PetscErrorCode   ierr;
420   PetscBool        isMPI;
421   MPI_Comm         comm;
422   PetscInt         sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0;
423   PetscMPIInt      rank,size;
424   const PetscInt   nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */
425   PetscInt         *lid_cprowID,*lid_gid;
426   PetscBool        *lid_matched;
427   Mat_SeqAIJ       *matA, *matB=NULL;
428   Mat_MPIAIJ       *mpimat     =NULL;
429   PetscScalar      one         =1.;
430   PetscCoarsenData *agg_llists = NULL,*deleted_list = NULL;
431   Mat              cMat,tMat,P;
432   MatScalar        *ap;
433   PetscMPIInt      tag1,tag2;
434 
435   PetscFunctionBegin;
436   ierr = PetscObjectGetComm((PetscObject)a_Gmat,&comm);CHKERRQ(ierr);
437   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
438   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
439   ierr = MatGetOwnershipRange(a_Gmat, &my0, &Iend);CHKERRQ(ierr);
440   ierr = PetscCommGetNewTag(comm, &tag1);CHKERRQ(ierr);
441   ierr = PetscCommGetNewTag(comm, &tag2);CHKERRQ(ierr);
442 
443   ierr = PetscMalloc1(nloc, &lid_gid);CHKERRQ(ierr); /* explicit array needed */
444   ierr = PetscMalloc1(nloc, &lid_cprowID);CHKERRQ(ierr);
445   ierr = PetscMalloc1(nloc, &lid_matched);CHKERRQ(ierr);
446 
447   ierr = PetscCDCreate(nloc, &agg_llists);CHKERRQ(ierr);
448   /* ierr = PetscCDSetChuckSize(agg_llists, nloc+1);CHKERRQ(ierr); */
449   *a_locals_llist = agg_llists;
450   ierr            = PetscCDCreate(size, &deleted_list);CHKERRQ(ierr);
451   ierr            = PetscCDSetChuckSize(deleted_list, 100);CHKERRQ(ierr);
452   /* setup 'lid_gid' for scatters and add self to all lists */
453   for (kk=0; kk<nloc; kk++) {
454     lid_gid[kk] = kk + my0;
455     ierr        = PetscCDAppendID(agg_llists, kk, my0+kk);CHKERRQ(ierr);
456   }
457 
458   /* make a copy of the graph, this gets destroyed in iterates */
459   ierr = MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);CHKERRQ(ierr);
460   ierr = PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI);CHKERRQ(ierr);
461   iter = 0;
462   while (iter++ < n_iter) {
463     PetscScalar    *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew;
464     PetscBool      *cpcol_matched;
465     PetscMPIInt    *cpcol_pe,proc;
466     Vec            locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE;
467     PetscInt       nEdges,n_nz_row,jj;
468     Edge           *Edges;
469     PetscInt       gid;
470     const PetscInt *perm_ix, n_sub_its = 120;
471 
472     /* get submatrices of cMat */
473     if (isMPI) {
474       mpimat = (Mat_MPIAIJ*)cMat->data;
475       matA   = (Mat_SeqAIJ*)mpimat->A->data;
476       matB   = (Mat_SeqAIJ*)mpimat->B->data;
477       if (!matB->compressedrow.use) {
478         /* force construction of compressed row data structure since code below requires it */
479         ierr = MatCheckCompressedRow(mpimat->B,matB->nonzerorowcnt,&matB->compressedrow,matB->i,mpimat->B->rmap->n,-1.0);CHKERRQ(ierr);
480       }
481     } else {
482       matA = (Mat_SeqAIJ*)cMat->data;
483     }
484 
485     /* set max edge on nodes */
486     ierr = MatCreateVecs(cMat, &locMaxEdge, NULL);CHKERRQ(ierr);
487     ierr = MatCreateVecs(cMat, &locMaxPE, NULL);CHKERRQ(ierr);
488 
489     /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
490     if (mpimat) {
491       Vec         vec;
492       PetscScalar vval;
493 
494       ierr = MatCreateVecs(cMat, &vec, NULL);CHKERRQ(ierr);
495       /* cpcol_pe */
496       vval = (PetscScalar)(rank);
497       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
498         ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
499       }
500       ierr = VecAssemblyBegin(vec);CHKERRQ(ierr);
501       ierr = VecAssemblyEnd(vec);CHKERRQ(ierr);
502       ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
503       ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
504       ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */
505       ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
506       ierr = PetscMalloc1(n, &cpcol_pe);CHKERRQ(ierr);
507       for (kk=0; kk<n; kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
508       ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);
509 
510       /* cpcol_gid */
511       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
512         vval = (PetscScalar)(gid);
513         ierr = VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
514       }
515       ierr = VecAssemblyBegin(vec);CHKERRQ(ierr);
516       ierr = VecAssemblyEnd(vec);CHKERRQ(ierr);
517       ierr = VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
518       ierr = VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
519       ierr = VecDestroy(&vec);CHKERRQ(ierr);
520       ierr = VecGetArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr); /* get proc ID in 'cpcol_gid' */
521 
522       /* cpcol_matched */
523       ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
524       ierr = PetscMalloc1(n, &cpcol_matched);CHKERRQ(ierr);
525       for (kk=0; kk<n; kk++) cpcol_matched[kk] = PETSC_FALSE;
526     }
527 
528     /* need an inverse map - locals */
529     for (kk=0; kk<nloc; kk++) lid_cprowID[kk] = -1;
530     /* set index into compressed row 'lid_cprowID' */
531     if (matB) {
532       for (ix=0; ix<matB->compressedrow.nrows; ix++) {
533         lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
534       }
535     }
536 
537     /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
538     for (nEdges=0,kk=0,gid=my0; kk<nloc; kk++,gid++) {
539       PetscReal   max_e = 0., tt;
540       PetscScalar vval;
541       PetscInt    lid   = kk;
542       PetscMPIInt max_pe=rank,pe;
543 
544       ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
545       ap = matA->a + ii[lid];
546       for (jj=0; jj<n; jj++) {
547         PetscInt lidj = idx[jj];
548         if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
549         if (lidj > lid) nEdges++;
550       }
551       if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
552         ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
553         ap  = matB->a + ii[ix];
554         idx = matB->j + ii[ix];
555         for (jj=0; jj<n; jj++) {
556           if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
557           nEdges++;
558           if ((pe=cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
559         }
560       }
561       vval = max_e;
562       ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
563 
564       vval = (PetscScalar)max_pe;
565       ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
566     }
567     ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr);
568     ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr);
569     ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
570     ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);
571 
572     /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
573     if (mpimat) {
574       ierr = VecDuplicate(mpimat->lvec, &ghostMaxEdge);CHKERRQ(ierr);
575       ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
576       ierr = VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
577       ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
578 
579       ierr = VecDuplicate(mpimat->lvec, &ghostMaxPE);CHKERRQ(ierr);
580       ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581       ierr = VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
582       ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
583     }
584 
585     /* setup sorted list of edges */
586     ierr = PetscMalloc1(nEdges, &Edges);CHKERRQ(ierr);
587     ierr = ISGetIndices(perm, &perm_ix);CHKERRQ(ierr);
588     for (nEdges=n_nz_row=kk=0; kk<nloc; kk++) {
589       PetscInt nn, lid = perm_ix[kk];
590       ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
591       ap = matA->a + ii[lid];
592       for (jj=0; jj<n; jj++) {
593         PetscInt lidj = idx[jj];
594         if (lidj > lid) {
595           Edges[nEdges].lid0   = lid;
596           Edges[nEdges].gid1   = lidj + my0;
597           Edges[nEdges].cpid1  = -1;
598           Edges[nEdges].weight = PetscRealPart(ap[jj]);
599           nEdges++;
600         }
601       }
602       if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
603         ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
604         ap  = matB->a + ii[ix];
605         idx = matB->j + ii[ix];
606         nn += n;
607         for (jj=0; jj<n; jj++) {
608           Edges[nEdges].lid0   = lid;
609           Edges[nEdges].gid1   = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
610           Edges[nEdges].cpid1  = idx[jj];
611           Edges[nEdges].weight = PetscRealPart(ap[jj]);
612           nEdges++;
613         }
614       }
615       if (nn > 1) n_nz_row++;
616       else if (iter == 1) {
617         /* should select this because it is technically in the MIS but lets not */
618         ierr = PetscCDRemoveAll(agg_llists, lid);CHKERRQ(ierr);
619       }
620     }
621     ierr = ISRestoreIndices(perm,&perm_ix);CHKERRQ(ierr);
622 
623     qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
624 
625     /* projection matrix */
626     ierr = MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, NULL, 1, NULL, &P);CHKERRQ(ierr);
627 
628     /* clear matched flags */
629     for (kk=0; kk<nloc; kk++) lid_matched[kk] = PETSC_FALSE;
630     /* process - communicate - process */
631     for (sub_it=0; sub_it<n_sub_its; sub_it++) {
632       PetscInt nactive_edges;
633 
634       ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
635       for (kk=nactive_edges=0; kk<nEdges; kk++) {
636         /* HEM */
637         const Edge     *e   = &Edges[kk];
638         const PetscInt lid0 =e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0;
639         PetscBool      isOK = PETSC_TRUE;
640 
641         /* skip if either (local) vertex is done already */
642         if (lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0])) continue;
643 
644         /* skip if ghost vertex is done */
645         if (cpid1 != -1 && cpcol_matched[cpid1]) continue;
646 
647         nactive_edges++;
648         /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
649         if (PetscRealPart(lid_max_ew[lid0]) > e->weight + PETSC_SMALL) continue;
650 
651         if (cpid1 == -1) {
652           if (PetscRealPart(lid_max_ew[lid1]) > e->weight + PETSC_SMALL) continue;
653         } else {
654           /* see if edge might get matched on other proc */
655           PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
656           if (g_max_e > e->weight + PETSC_SMALL) continue;
657           else if (e->weight > g_max_e - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
658             /* check for max_e == to this edge and larger processor that will deal with this */
659             continue;
660           }
661         }
662 
663         /* check ghost for v0 */
664         if (isOK) {
665           PetscReal max_e,ew;
666           if ((ix=lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
667             ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
668             ap  = matB->a + ii[ix];
669             idx = matB->j + ii[ix];
670             for (jj=0; jj<n && isOK; jj++) {
671               PetscInt lidj = idx[jj];
672               if (cpcol_matched[lidj]) continue;
673               ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
674               /* check for max_e == to this edge and larger processor that will deal with this */
675               if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid0]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
676                 isOK = PETSC_FALSE;
677               }
678             }
679           }
680 
681           /* for v1 */
682           if (cpid1 == -1 && isOK) {
683             if ((ix=lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
684               ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
685               ap  = matB->a + ii[ix];
686               idx = matB->j + ii[ix];
687               for (jj=0; jj<n && isOK; jj++) {
688                 PetscInt lidj = idx[jj];
689                 if (cpcol_matched[lidj]) continue;
690                 ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
691                 /* check for max_e == to this edge and larger processor that will deal with this */
692                 if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid1]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) {
693                   isOK = PETSC_FALSE;
694                 }
695               }
696             }
697           }
698         }
699 
700         /* do it */
701         if (isOK) {
702           if (cpid1 == -1) {
703             lid_matched[lid1] = PETSC_TRUE;  /* keep track of what we've done this round */
704             ierr              = PetscCDAppendRemove(agg_llists, lid0, lid1);CHKERRQ(ierr);
705           } else if (sub_it != n_sub_its-1) {
706             /* add gid1 to list of ghost deleted by me -- I need their children */
707             proc = cpcol_pe[cpid1];
708 
709             cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */
710 
711             ierr = PetscCDAppendID(deleted_list, proc, cpid1);CHKERRQ(ierr); /* cache to send messages */
712             ierr = PetscCDAppendID(deleted_list, proc, lid0);CHKERRQ(ierr);
713           } else continue;
714 
715           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
716           /* set projection */
717           ierr = MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr);
718           ierr = MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);CHKERRQ(ierr);
719         } /* matched */
720       } /* edge loop */
721 
722       /* deal with deleted ghost on first pass */
723       if (size>1 && sub_it != n_sub_its-1) {
724 #define REQ_BF_SIZE 100
725         PetscCDIntNd *pos;
726         PetscBool    ise = PETSC_FALSE;
727         PetscInt     nSend1, **sbuffs1,nSend2;
728         MPI_Request  *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE];
729         MPI_Status   status;
730 
731         /* send request */
732         for (proc=0,nSend1=0; proc<size; proc++) {
733           ierr = PetscCDEmptyAt(deleted_list,proc,&ise);CHKERRQ(ierr);
734           if (!ise) nSend1++;
735         }
736         ierr = PetscMalloc1(nSend1, &sbuffs1);CHKERRQ(ierr);
737         for (proc=0,nSend1=0; proc<size; proc++) {
738           /* count ghosts */
739           ierr = PetscCDSizeAt(deleted_list,proc,&n);CHKERRQ(ierr);
740           if (n>0) {
741 #define CHUNCK_SIZE 100
742             PetscInt    *sbuff,*pt;
743             MPI_Request *request;
744             n   /= 2;
745             ierr = PetscMalloc1(2 + 2*n + n*CHUNCK_SIZE + 2, &sbuff);CHKERRQ(ierr);
746             /* save requests */
747             sbuffs1[nSend1] = sbuff;
748             request         = (MPI_Request*)sbuff;
749             sbuff           = pt = (PetscInt*)(request+1);
750             *pt++           = n; *pt++ = rank;
751 
752             ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr);
753             while (pos) {
754               PetscInt lid0, cpid, gid;
755               ierr  = PetscCDIntNdGetID(pos, &cpid);CHKERRQ(ierr);
756               gid   = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
757               ierr  = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
758               ierr  = PetscCDIntNdGetID(pos, &lid0);CHKERRQ(ierr);
759               ierr  = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
760               *pt++ = gid; *pt++ = lid0;
761             }
762             /* send request tag1 [n, proc, n*[gid1,lid0] ] */
763             ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, comm, request);CHKERRQ(ierr);
764             /* post receive */
765             request        = (MPI_Request*)pt;
766             rreqs2[nSend1] = request; /* cache recv request */
767             pt             = (PetscInt*)(request+1);
768             ierr           = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, comm, request);CHKERRQ(ierr);
769             /* clear list */
770             ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr);
771             nSend1++;
772           }
773         }
774         /* receive requests, send response, clear lists */
775         kk     = nactive_edges;
776         ierr   = MPIU_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); /* not correct syncronization and global */
777         nSend2 = 0;
778         while (1) {
779 #define BF_SZ 10000
780           PetscMPIInt flag,count;
781           PetscInt    rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
782           MPI_Request *request;
783 
784           ierr = MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &status);CHKERRQ(ierr);
785           if (!flag) break;
786           ierr = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
787           if (count > BF_SZ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for receive: %d",count);
788           proc = status.MPI_SOURCE;
789           /* receive request tag1 [n, proc, n*[gid1,lid0] ] */
790           ierr = MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, comm, &status);CHKERRQ(ierr);
791           /* count sends */
792           pt = rbuff; count3 = count2 = 0;
793           n  = *pt++; kk = *pt++;
794           while (n--) {
795             PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;
796             if (lid_matched[lid1]) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Recieved deleted gid %D, deleted by (lid) %D from proc %D\n",sub_it,gid1,kk);
797             lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
798             ierr              = PetscCDSizeAt(agg_llists, lid1, &kk);CHKERRQ(ierr);
799             count2           += kk + 2;
800             count3++; /* number of verts requested (n) */
801           }
802           if (count2 > count3*CHUNCK_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Irecv will be too small: %D",count2);
803           /* send tag2 *[lid0, n, n*[gid] ] */
804           ierr             = PetscMalloc(count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
805           request          = (MPI_Request*)sbuff;
806           sreqs2[nSend2++] = request; /* cache request */
807           if (nSend2==REQ_BF_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for requests: %D",nSend2);
808           pt2 = sbuff = (PetscInt*)(request+1);
809           pt  = rbuff;
810           n   = *pt++; kk = *pt++;
811           while (n--) {
812             /* read [n, proc, n*[gid1,lid0] */
813             PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++;
814             /* write [lid0, n, n*[gid] ] */
815             *pt2++ = lid0;
816             pt3    = pt2++; /* save pointer for later */
817             ierr = PetscCDGetHeadPos(agg_llists,lid1,&pos);CHKERRQ(ierr);
818             while (pos) {
819               PetscInt gid;
820               ierr   = PetscCDIntNdGetID(pos, &gid);CHKERRQ(ierr);
821               ierr   = PetscCDGetNextPos(agg_llists,lid1,&pos);CHKERRQ(ierr);
822               *pt2++ = gid;
823             }
824             *pt3 = (pt2-pt3)-1;
825             /* clear list */
826             ierr = PetscCDRemoveAll(agg_llists, lid1);CHKERRQ(ierr);
827           }
828           /* send requested data tag2 *[lid0, n, n*[gid1] ] */
829           ierr = MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, comm, request);CHKERRQ(ierr);
830         }
831 
832         /* receive tag2 *[lid0, n, n*[gid] ] */
833         for (kk=0; kk<nSend1; kk++) {
834           PetscMPIInt count;
835           MPI_Request *request;
836           PetscInt    *pt, *pt2;
837 
838           request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
839           ierr    = MPI_Wait(request, &status);CHKERRQ(ierr);
840           ierr    = MPI_Get_count(&status, MPIU_INT, &count);CHKERRQ(ierr);
841           pt      = pt2 = (PetscInt*)(request+1);
842           while (pt-pt2 < count) {
843             PetscInt lid0 = *pt++, n = *pt++;
844             while (n--) {
845               PetscInt gid1 = *pt++;
846               ierr = PetscCDAppendID(agg_llists, lid0, gid1);CHKERRQ(ierr);
847             }
848           }
849         }
850 
851         /* wait for tag1 isends */
852         while (nSend1--) {
853           MPI_Request *request;
854           request = (MPI_Request*)sbuffs1[nSend1];
855           ierr    = MPI_Wait(request, &status);CHKERRQ(ierr);
856           ierr    = PetscFree(request);CHKERRQ(ierr);
857         }
858         ierr = PetscFree(sbuffs1);CHKERRQ(ierr);
859 
860         /* wait for tag2 isends */
861         while (nSend2--) {
862           MPI_Request *request = sreqs2[nSend2];
863           ierr = MPI_Wait(request, &status);CHKERRQ(ierr);
864           ierr = PetscFree(request);CHKERRQ(ierr);
865         }
866 
867         ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
868         ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
869 
870         /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
871         for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
872           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
873           ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
874         }
875         ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
876         ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);
877         ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878         ierr = VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879         ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
880         ierr = VecGetLocalSize(mpimat->lvec, &n);CHKERRQ(ierr);
881         for (kk=0; kk<n; kk++) {
882           cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
883         }
884         ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
885       } /* size > 1 */
886 
887       /* compute 'locMaxEdge' */
888       ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
889       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
890         PetscReal max_e = 0.,tt;
891         PetscScalar vval;
892         PetscInt lid = kk;
893 
894         if (lid_matched[lid]) vval = 0.;
895         else {
896           ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
897           ap = matA->a + ii[lid];
898           for (jj=0; jj<n; jj++) {
899             PetscInt lidj = idx[jj];
900             if (lid_matched[lidj]) continue; /* this is new - can change local max */
901             if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
902           }
903           if (lid_cprowID && (ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
904             ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
905             ap  = matB->a + ii[ix];
906             idx = matB->j + ii[ix];
907             for (jj=0; jj<n; jj++) {
908               PetscInt lidj = idx[jj];
909               if (cpcol_matched[lidj]) continue;
910               if ((tt=PetscRealPart(ap[jj])) > max_e) max_e = tt;
911             }
912           }
913         }
914         vval = (PetscScalar)max_e;
915         ierr = VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr); /* set with GID */
916       }
917       ierr = VecAssemblyBegin(locMaxEdge);CHKERRQ(ierr);
918       ierr = VecAssemblyEnd(locMaxEdge);CHKERRQ(ierr);
919 
920       if (size>1 && sub_it != n_sub_its-1) {
921         /* compute 'cpcol_max_ew' */
922         ierr = VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923         ierr = VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924         ierr = VecGetArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
925         ierr = VecGetArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
926 
927         /* compute 'cpcol_max_pe' */
928         for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
929           PetscInt lid = kk;
930           PetscReal ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]);
931           PetscScalar vval;
932           PetscMPIInt max_pe=rank,pe;
933 
934           if (lid_matched[lid]) vval = (PetscScalar)rank;
935           else if ((ix=lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
936             ii  = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
937             ap  = matB->a + ii[ix];
938             idx = matB->j + ii[ix];
939             for (jj=0; jj<n; jj++) {
940               PetscInt lidj = idx[jj];
941               if (cpcol_matched[lidj]) continue;
942               ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
943               /* get max pe that has a max_e == to this edge w */
944               if ((pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - PETSC_SMALL && ew > v0_max_e - PETSC_SMALL) max_pe = pe;
945             }
946             vval = (PetscScalar)max_pe;
947           }
948           ierr = VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES);CHKERRQ(ierr);
949         }
950         ierr = VecAssemblyBegin(locMaxPE);CHKERRQ(ierr);
951         ierr = VecAssemblyEnd(locMaxPE);CHKERRQ(ierr);
952 
953         ierr = VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954         ierr = VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955         ierr = VecGetArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
956         ierr = VecRestoreArray(locMaxEdge, &lid_max_ew);CHKERRQ(ierr);
957       } /* deal with deleted ghost */
958       ierr = PetscInfo3(a_Gmat,"\t %D.%D: %D active edges.\n",iter,sub_it,nactive_edges);CHKERRQ(ierr);
959       if (!nactive_edges) break;
960     } /* sub_it loop */
961 
962     /* clean up iteration */
963     ierr = PetscFree(Edges);CHKERRQ(ierr);
964     if (mpimat) {
965       ierr = VecRestoreArray(ghostMaxEdge, &cpcol_max_ew);CHKERRQ(ierr);
966       ierr = VecDestroy(&ghostMaxEdge);CHKERRQ(ierr);
967       ierr = VecRestoreArray(ghostMaxPE, &cpcol_max_pe);CHKERRQ(ierr);
968       ierr = VecDestroy(&ghostMaxPE);CHKERRQ(ierr);
969       ierr = PetscFree(cpcol_pe);CHKERRQ(ierr);
970       ierr = PetscFree(cpcol_matched);CHKERRQ(ierr);
971     }
972 
973     ierr = VecDestroy(&locMaxEdge);CHKERRQ(ierr);
974     ierr = VecDestroy(&locMaxPE);CHKERRQ(ierr);
975 
976     if (mpimat) {
977       ierr = VecRestoreArray(mpimat->lvec, &cpcol_gid);CHKERRQ(ierr);
978     }
979 
980     /* create next G if needed */
981     if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
982       ierr = MatDestroy(&P);CHKERRQ(ierr);
983       ierr = MatDestroy(&cMat);CHKERRQ(ierr);
984       break;
985     } else {
986       Vec diag;
987       /* add identity for unmatched vertices so they stay alive */
988       for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
989         if (!lid_matched[kk]) {
990           gid  = kk+my0;
991           ierr = MatGetRow(cMat,gid,&n,NULL,NULL);CHKERRQ(ierr);
992           if (n>1) {
993             ierr = MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);CHKERRQ(ierr);
994           }
995           ierr = MatRestoreRow(cMat,gid,&n,NULL,NULL);CHKERRQ(ierr);
996         }
997       }
998       ierr = MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999       ierr = MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1000 
1001       /* project to make new graph with colapsed edges */
1002       ierr = MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);CHKERRQ(ierr);
1003       ierr = MatDestroy(&P);CHKERRQ(ierr);
1004       ierr = MatDestroy(&cMat);CHKERRQ(ierr);
1005       cMat = tMat;
1006       ierr = MatCreateVecs(cMat, &diag, NULL);CHKERRQ(ierr);
1007       ierr = MatGetDiagonal(cMat, diag);CHKERRQ(ierr); /* effectively PCJACOBI */
1008       ierr = VecReciprocal(diag);CHKERRQ(ierr);
1009       ierr = VecSqrtAbs(diag);CHKERRQ(ierr);
1010       ierr = MatDiagonalScale(cMat, diag, diag);CHKERRQ(ierr);
1011       ierr = VecDestroy(&diag);CHKERRQ(ierr);
1012     }
1013   } /* coarsen iterator */
1014 
1015   /* make fake matrix */
1016   if (size>1) {
1017     Mat          mat;
1018     PetscCDIntNd *pos;
1019     PetscInt     gid, NN, MM, jj = 0, mxsz = 0;
1020 
1021     for (kk=0; kk<nloc; kk++) {
1022       ierr = PetscCDSizeAt(agg_llists, kk, &jj);CHKERRQ(ierr);
1023       if (jj > mxsz) mxsz = jj;
1024     }
1025     ierr = MatGetSize(a_Gmat, &MM, &NN);CHKERRQ(ierr);
1026     if (mxsz > MM-nloc) mxsz = MM-nloc;
1027 
1028     ierr = MatCreateAIJ(comm, nloc, nloc,PETSC_DETERMINE, PETSC_DETERMINE,0, NULL, mxsz, NULL, &mat);CHKERRQ(ierr);
1029 
1030     for (kk=0,gid=my0; kk<nloc; kk++,gid++) {
1031       /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)) { */
1032       ierr = PetscCDGetHeadPos(agg_llists,kk,&pos);CHKERRQ(ierr);
1033       while (pos) {
1034         PetscInt gid1;
1035         ierr = PetscCDIntNdGetID(pos, &gid1);CHKERRQ(ierr);
1036         ierr = PetscCDGetNextPos(agg_llists,kk,&pos);CHKERRQ(ierr);
1037 
1038         if (gid1 < my0 || gid1 >= my0+nloc) {
1039           ierr = MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);CHKERRQ(ierr);
1040         }
1041       }
1042     }
1043     ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044     ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1045     ierr = PetscCDSetMat(agg_llists, mat);CHKERRQ(ierr);
1046   }
1047 
1048   ierr = PetscFree(lid_cprowID);CHKERRQ(ierr);
1049   ierr = PetscFree(lid_gid);CHKERRQ(ierr);
1050   ierr = PetscFree(lid_matched);CHKERRQ(ierr);
1051   ierr = PetscCDDestroy(deleted_list);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 /*
1056    HEM coarsen, simple greedy.
1057 */
MatCoarsenApply_HEM(MatCoarsen coarse)1058 static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1059 {
1060   PetscErrorCode ierr;
1061   Mat            mat = coarse->graph;
1062 
1063   PetscFunctionBegin;
1064   if (!coarse->perm) {
1065     IS       perm;
1066     PetscInt n,m;
1067 
1068     ierr = MatGetLocalSize(mat, &m, &n);CHKERRQ(ierr);
1069     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat), m, 0, 1, &perm);CHKERRQ(ierr);
1070     ierr = heavyEdgeMatchAgg(perm, mat, &coarse->agg_lists);CHKERRQ(ierr);
1071     ierr = ISDestroy(&perm);CHKERRQ(ierr);
1072   } else {
1073     ierr = heavyEdgeMatchAgg(coarse->perm, mat, &coarse->agg_lists);CHKERRQ(ierr);
1074   }
1075   PetscFunctionReturn(0);
1076 }
1077 
MatCoarsenView_HEM(MatCoarsen coarse,PetscViewer viewer)1078 static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscMPIInt    rank;
1082   PetscBool      iascii;
1083 
1084   PetscFunctionBegin;
1085   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)coarse),&rank);CHKERRQ(ierr);
1086   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1087   if (iascii) {
1088     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1089     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] HEM aggregator\n",rank);CHKERRQ(ierr);
1090     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1091     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 /*MC
1097    MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener
1098 
1099    Level: beginner
1100 
1101 .seealso: MatCoarsenSetType(), MatCoarsenType, MatCoarsenCreate()
1102 
1103 M*/
1104 
MatCoarsenCreate_HEM(MatCoarsen coarse)1105 PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1106 {
1107   PetscFunctionBegin;
1108   coarse->ops->apply   = MatCoarsenApply_HEM;
1109   coarse->ops->view    = MatCoarsenView_HEM;
1110   PetscFunctionReturn(0);
1111 }
1112