1 /*
2  * Copyright 1997, Regents of the University of Minnesota
3  *
4  * comm.c
5  *
6  * This function provides various high level communication functions
7  *
8  * $Id: comm.c 10592 2011-07-16 21:17:53Z karypis $
9  */
10 
11 #include <parmetislib.h>
12 
13 
14 /*************************************************************************/
15 /*! This function performs the following functions:
16     - determines the processors that contain adjacent vertices and setup
17       the infrastructure for efficient communication.
18     - localizes the numbering of the adjancency lists.
19 */
20 /**************************************************************************/
CommSetup(ctrl_t * ctrl,graph_t * graph)21 void CommSetup(ctrl_t *ctrl, graph_t *graph)
22 {
23   idx_t i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx,
24         nsend, nrecv, nnbrs, nadj;
25   idx_t npes=ctrl->npes, mype=ctrl->mype;
26   idx_t *vtxdist, *xadj, *adjncy;
27   idx_t *peind, *recvptr, *recvind, *sendptr, *sendind;
28   idx_t *imap, *lperm;
29   idx_t *pexadj, *peadjncy, *peadjloc, *startsind;
30   ikv_t *recvrequests, *sendrequests, *adjpairs;
31 
32   if (graph->lperm != NULL)
33     return; /* The communication structure has already been setup */
34 
35   STARTTIMER(ctrl, ctrl->SetupTmr);
36 
37   gnvtxs  = graph->gnvtxs;
38   nvtxs   = graph->nvtxs;
39   xadj    = graph->xadj;
40   adjncy  = graph->adjncy;
41   lperm   = graph->lperm = iincset(nvtxs, 0, imalloc(nvtxs, "CommSetup: graph->lperm"));
42 
43   vtxdist  = graph->vtxdist;
44   firstvtx = vtxdist[mype];
45   lastvtx  = vtxdist[mype+1];
46 
47   WCOREPUSH;
48   /*************************************************************
49    * Determine what you need to receive
50    *************************************************************/
51   /* first pass: determine nadj and interior/interface vertices */
52   for (nlocal=0, nadj=0, i=0; i<nvtxs; i++) {
53     islocal = 1;
54     for (j=xadj[i]; j<xadj[i+1]; j++) {
55       k = adjncy[j];
56       if (k < firstvtx || k >= lastvtx) { /* remote vertex */
57         nadj++;
58         islocal = 0;
59       }
60     }
61     if (islocal) {
62       lperm[i]        = lperm[nlocal];
63       lperm[nlocal++] = i;
64     }
65   }
66   graph->nlocal = nlocal;
67 
68   adjpairs = ikvwspacemalloc(ctrl, nadj+1);
69 
70   /* second pass: rewrite locale entries and populate remote edges */
71   for (nadj=0, i=0; i<nvtxs; i++) {
72     for (j=xadj[i]; j<xadj[i+1]; j++) {
73       k = adjncy[j];
74       if (k >= firstvtx && k < lastvtx) { /* local vertex */
75         adjncy[j] = k-firstvtx;
76       }
77       else { /* remote vertex */
78         adjpairs[nadj].key   = k;
79         adjpairs[nadj++].val = j;
80       }
81     }
82   }
83 
84   /* use a sort-based "unique" approach */
85   ikvsorti(nadj, adjpairs);
86   adjpairs[nadj].key = gnvtxs+1;  /* boundary condition */
87 
88   /* determine how many distinct vertices you need to receive */
89   for (nrecv=0, i=0; i<nadj; i++) {
90     if (adjpairs[i].key != adjpairs[i+1].key)
91       nrecv++;
92   }
93   graph->nrecv = nrecv;
94 
95 
96   /* allocate space for the to be received vertices part of the recvinfo */
97   recvind = graph->recvind = imalloc(nrecv, "CommSetup: recvind");
98 
99   /* store distinct vertices into recvind array and re-write adjncy */
100   for (nrecv=0, i=0; i<nadj; i++) {
101     adjncy[adjpairs[i].val] = nvtxs+nrecv;
102     if (adjpairs[i].key != adjpairs[i+1].key)
103       recvind[nrecv++] = adjpairs[i].key;
104   }
105   PASSERT(ctrl, nrecv == graph->nrecv);
106 
107 
108   /* determine the number of neighboring processors */
109   for (i=0, nnbrs=0, penum=0; penum<npes; penum++) {
110     for (j=i; j<nrecv; j++) {
111       if (recvind[j] >= vtxdist[penum+1])
112         break;
113     }
114     if (j > i) {
115       nnbrs++;
116       i = j;
117     }
118   }
119   graph->nnbrs = nnbrs;
120 
121   /* Update the ctrl arrays that have to do with p2p communication */
122   CommUpdateNnbrs(ctrl, nnbrs);
123 
124   /* allocate space for peind/recvptr part of the recvinfo */
125   peind   = graph->peind   = imalloc(nnbrs, "CommSetup: peind");
126   recvptr = graph->recvptr = imalloc(nnbrs+1, "CommSetup: recvptr");
127 
128   /* populate the peind/recvptr arrays */
129   for (i=0, nnbrs=0, recvptr[0]=0, penum=0; penum<npes; penum++) {
130     for (j=i; j<nrecv; j++) {
131       if (recvind[j] >= vtxdist[penum+1])
132         break;
133     }
134     if (j > i) {
135       peind[nnbrs++] = penum;
136       recvptr[nnbrs] = j;
137       i = j;
138     }
139   }
140   PASSERT(ctrl, nnbrs == graph->nnbrs);
141 
142   WCOREPOP;
143 
144   /* PrintVector(ctrl, nnbrs+1, 0, recvptr, "recvptr"); */
145 
146 
147   WCOREPUSH;
148   /*************************************************************
149    * Determine what you need to send
150    *************************************************************/
151   /* GKTODO - This can be replaced via a sparse communication */
152   /* Tell the other processors what they need to send you */
153   recvrequests = ikvwspacemalloc(ctrl, npes);
154   sendrequests = ikvwspacemalloc(ctrl, npes);
155   memset(recvrequests, 0, sizeof(ikv_t)*npes);
156   for (i=0; i<nnbrs; i++) {
157     recvrequests[peind[i]].key = recvptr[i+1]-recvptr[i];
158     recvrequests[peind[i]].val = nvtxs+recvptr[i];
159   }
160   gkMPI_Alltoall((void *)recvrequests, 2, IDX_T, (void *)sendrequests,
161       2, IDX_T, ctrl->comm);
162 
163   /* PrintPairs(ctrl, npes, recvrequests, "recvrequests"); */
164   /* PrintPairs(ctrl, npes, sendrequests, "sendrequests"); */
165 
166   startsind = iwspacemalloc(ctrl, nnbrs);
167   sendptr   = graph->sendptr = imalloc(nnbrs+1, "CommSetup: sendptr");
168 
169   for (j=0, i=0; i<npes; i++) {
170     if (sendrequests[i].key > 0) {
171       sendptr[j]   = sendrequests[i].key;
172       startsind[j] = sendrequests[i].val;
173       j++;
174     }
175   }
176   PASSERT(ctrl, j == nnbrs);
177 
178   MAKECSR(i, nnbrs, sendptr);
179 
180   nsend   = graph->nsend   = sendptr[nnbrs];
181   sendind = graph->sendind = imalloc(nsend, "CommSetup: sendind");
182 
183 
184   /* Issue the receives for sendind */
185   for (i=0; i<nnbrs; i++) {
186     gkMPI_Irecv((void *)(sendind+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_T,
187               peind[i], 1, ctrl->comm, ctrl->rreq+i);
188   }
189 
190   /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */
191   for (i=0; i<nnbrs; i++) {
192     gkMPI_Isend((void *)(recvind+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_T,
193               peind[i], 1, ctrl->comm, ctrl->sreq+i);
194   }
195 
196   gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
197   gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
198 
199 
200   /* Create the peadjncy data structure for sparse boundary exchanges */
201   pexadj   = graph->pexadj   = ismalloc(nvtxs+1, 0, "CommSetup: pexadj");
202   peadjncy = graph->peadjncy = imalloc(nsend, "CommSetup: peadjncy");
203   peadjloc = graph->peadjloc = imalloc(nsend, "CommSetup: peadjloc");
204 
205   for (i=0; i<nsend; i++) {
206     PASSERTP(ctrl, sendind[i] >= firstvtx && sendind[i] < lastvtx,
207             (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX"\n", sendind[i], firstvtx, lastvtx));
208     pexadj[sendind[i]-firstvtx]++;
209   }
210   MAKECSR(i, nvtxs, pexadj);
211 
212   for (i=0; i<nnbrs; i++) {
213     for (j=sendptr[i]; j<sendptr[i+1]; j++) {
214       k = pexadj[sendind[j]-firstvtx]++;
215       peadjncy[k] = i;  /* peind[i] is the actual PE number */
216       peadjloc[k] = startsind[i]++;
217     }
218   }
219   PASSERT(ctrl, pexadj[nvtxs] == nsend);
220 
221   SHIFTCSR(i, nvtxs, pexadj);
222 
223   WCOREPOP;
224 
225   /* Create the inverse map from ladjncy to adjncy */
226   imap = graph->imap = imalloc(nvtxs+nrecv, "CommSetup: imap");
227   for (i=0; i<nvtxs; i++)
228     imap[i] = firstvtx+i;
229   for (i=0; i<nrecv; i++)
230     imap[nvtxs+i] = recvind[i];
231 
232   STOPTIMER(ctrl, ctrl->SetupTmr);
233 
234 #ifdef DEBUG_SETUPINFO
235   rprintf(ctrl, "[%5"PRIDX" %5"PRIDX"] \tl:[%5"PRIDX" %5"PRIDX"] \ts:[%5"PRIDX", %5"PRIDX"] \tr:[%5"PRIDX", %5"PRIDX"]\n",
236             GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs),
237             GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal),
238             GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend),
239             GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv));
240 
241   PrintSetUpInfo(ctrl, graph);
242 #endif
243 
244 }
245 
246 
247 /*************************************************************************/
248 /*! This function updates the sreq/rreq/statuses arrays in ctrl based on
249     the new number of neighbors.
250 */
251 /*************************************************************************/
CommUpdateNnbrs(ctrl_t * ctrl,idx_t nnbrs)252 void CommUpdateNnbrs(ctrl_t *ctrl, idx_t nnbrs)
253 {
254   if (ctrl->ncommpes >= nnbrs)
255     return;
256 
257   ctrl->ncommpes = nnbrs;
258   ctrl->sreq = (MPI_Request *)gk_realloc(ctrl->sreq, sizeof(MPI_Request)*nnbrs, "sreq");
259   ctrl->rreq = (MPI_Request *)gk_realloc(ctrl->rreq, sizeof(MPI_Request)*nnbrs, "rreq");
260   ctrl->statuses = (MPI_Status *)gk_realloc(ctrl->statuses, sizeof(MPI_Status)*nnbrs, "statuses");
261 
262 }
263 
264 
265 /*************************************************************************/
266 /*! This function performs the gather/scatter for the boundary vertices
267 */
268 /*************************************************************************/
CommInterfaceData(ctrl_t * ctrl,graph_t * graph,idx_t * data,idx_t * recvvector)269 void CommInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t *data,
270          idx_t *recvvector)
271 {
272   idx_t i, k, nnbrs, firstvtx;
273   idx_t *peind, *sendptr, *sendind, *sendvector, *recvptr, *recvind;
274 
275   WCOREPUSH;
276 
277   firstvtx = graph->vtxdist[ctrl->mype];
278   nnbrs    = graph->nnbrs;
279   peind    = graph->peind;
280   sendptr  = graph->sendptr;
281   sendind  = graph->sendind;
282   recvptr  = graph->recvptr;
283   recvind  = graph->recvind;
284 
285   /* Issue the receives first */
286   for (i=0; i<nnbrs; i++) {
287     gkMPI_Irecv((void *)(recvvector+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_T,
288               peind[i], 1, ctrl->comm, ctrl->rreq+i);
289   }
290 
291   /* Issue the sends next */
292   k = sendptr[nnbrs];
293   sendvector = iwspacemalloc(ctrl, k);
294   for (i=0; i<k; i++)
295     sendvector[i] = data[sendind[i]-firstvtx];
296 
297   for (i=0; i<nnbrs; i++) {
298     gkMPI_Isend((void *)(sendvector+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_T,
299               peind[i], 1, ctrl->comm, ctrl->sreq+i);
300   }
301 
302   /* OK, now get into the loop waiting for the operations to finish */
303   gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
304   gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
305 
306   WCOREPOP;
307 }
308 
309 
310 
311 /*************************************************************************
312 * This function performs the gather/scatter for the boundary vertices
313 **************************************************************************/
CommChangedInterfaceData(ctrl_t * ctrl,graph_t * graph,idx_t nchanged,idx_t * changed,idx_t * data,ikv_t * sendpairs,ikv_t * recvpairs)314 void CommChangedInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t nchanged,
315          idx_t *changed, idx_t *data, ikv_t *sendpairs, ikv_t *recvpairs)
316 {
317   idx_t i, j, k, nnbrs, firstvtx, nrecv, penum, nreceived;
318   idx_t *peind, *sendptr, *recvptr, *recvind, *pexadj, *peadjncy,
319         *peadjloc, *psendptr;
320   ikv_t *pairs;
321 
322 
323   firstvtx = graph->vtxdist[ctrl->mype];
324   nnbrs    = graph->nnbrs;
325   nrecv    = graph->nrecv;
326   peind    = graph->peind;
327   sendptr  = graph->sendptr;
328   recvptr  = graph->recvptr;
329   recvind  = graph->recvind;
330   pexadj   = graph->pexadj;
331   peadjncy = graph->peadjncy;
332   peadjloc = graph->peadjloc;
333 
334   /* Issue the receives first */
335   for (i=0; i<nnbrs; i++) {
336     gkMPI_Irecv((void *)(recvpairs+recvptr[i]), 2*(recvptr[i+1]-recvptr[i]), IDX_T,
337               peind[i], 1, ctrl->comm, ctrl->rreq+i);
338   }
339 
340   if (nchanged != 0) {
341     WCOREPUSH;
342 
343     psendptr = icopy(nnbrs, sendptr, iwspacemalloc(ctrl, nnbrs));
344 
345     /* Copy the changed values into the sendvector */
346     for (i=0; i<nchanged; i++) {
347       j = changed[i];
348       for (k=pexadj[j]; k<pexadj[j+1]; k++) {
349         penum = peadjncy[k];
350         sendpairs[psendptr[penum]].key = peadjloc[k];
351         sendpairs[psendptr[penum]].val = data[j];
352         psendptr[penum]++;
353       }
354     }
355 
356     for (i=0; i<nnbrs; i++) {
357       gkMPI_Isend((void *)(sendpairs+sendptr[i]), 2*(psendptr[i]-sendptr[i]), IDX_T,
358                 peind[i], 1, ctrl->comm, ctrl->sreq+i);
359     }
360 
361     WCOREPOP;
362   }
363   else {
364     for (i=0; i<nnbrs; i++)
365       gkMPI_Isend((void *)(sendpairs), 0, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i);
366   }
367 
368 
369   /* OK, now get into the loop waiting for the operations to finish */
370   for (i=0; i<nnbrs; i++) {
371     gkMPI_Wait(ctrl->rreq+i, &(ctrl->status));
372     gkMPI_Get_count(&ctrl->status, IDX_T, &nreceived);
373     if (nreceived != 0) {
374       nreceived = nreceived/2;
375       pairs     = recvpairs+graph->recvptr[i];
376       for (k=0; k<nreceived; k++)
377         data[pairs[k].key] = pairs[k].val;
378     }
379   }
380 
381   gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
382 }
383 
384 
385 
386 /*************************************************************************
387 * This function computes the max of a single element
388 **************************************************************************/
GlobalSEMax(ctrl_t * ctrl,idx_t value)389 idx_t GlobalSEMax(ctrl_t *ctrl, idx_t value)
390 {
391   idx_t max;
392 
393   gkMPI_Allreduce((void *)&value, (void *)&max, 1, IDX_T, MPI_MAX, ctrl->comm);
394 
395   return max;
396 }
397 
398 /*************************************************************************
399 * This function computes the max of a single element
400 **************************************************************************/
GlobalSEMaxComm(MPI_Comm comm,idx_t value)401 idx_t GlobalSEMaxComm(MPI_Comm comm, idx_t value)
402 {
403   idx_t max;
404 
405   gkMPI_Allreduce((void *)&value, (void *)&max, 1, IDX_T, MPI_MAX, comm);
406 
407   return max;
408 }
409 
410 /*************************************************************************
411 * This function computes the max of a single element
412 **************************************************************************/
GlobalSEMin(ctrl_t * ctrl,idx_t value)413 idx_t GlobalSEMin(ctrl_t *ctrl, idx_t value)
414 {
415   idx_t min;
416 
417   gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_MIN, ctrl->comm);
418 
419   return min;
420 }
421 
422 /*************************************************************************
423 * This function computes the max of a single element
424 **************************************************************************/
GlobalSEMinComm(MPI_Comm comm,idx_t value)425 idx_t GlobalSEMinComm(MPI_Comm comm, idx_t value)
426 {
427   idx_t min;
428 
429   gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_MIN, comm);
430 
431   return min;
432 }
433 
434 
435 /*************************************************************************
436 * This function computes the max of a single element
437 **************************************************************************/
GlobalSESum(ctrl_t * ctrl,idx_t value)438 idx_t GlobalSESum(ctrl_t *ctrl, idx_t value)
439 {
440   idx_t sum;
441 
442   gkMPI_Allreduce((void *)&value, (void *)&sum, 1, IDX_T, MPI_SUM, ctrl->comm);
443 
444   return sum;
445 }
446 
447 /*************************************************************************
448 * This function computes the max of a single element
449 **************************************************************************/
GlobalSESumComm(MPI_Comm comm,idx_t value)450 idx_t GlobalSESumComm(MPI_Comm comm, idx_t value)
451 {
452   idx_t min;
453 
454   gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_SUM, comm);
455 
456   return min;
457 }
458 
459 
460 
461 /*************************************************************************
462 * This function computes the max of a single element
463 **************************************************************************/
GlobalSEMaxFloat(ctrl_t * ctrl,real_t value)464 real_t GlobalSEMaxFloat(ctrl_t *ctrl, real_t value)
465 {
466   real_t max;
467 
468   gkMPI_Allreduce((void *)&value, (void *)&max, 1, REAL_T, MPI_MAX, ctrl->comm);
469 
470   return max;
471 }
472 
473 
474 
475 /*************************************************************************
476 * This function computes the max of a single element
477 **************************************************************************/
GlobalSEMinFloat(ctrl_t * ctrl,real_t value)478 real_t GlobalSEMinFloat(ctrl_t *ctrl, real_t value)
479 {
480   real_t min;
481 
482   gkMPI_Allreduce((void *)&value, (void *)&min, 1, REAL_T, MPI_MIN, ctrl->comm);
483 
484   return min;
485 }
486 
487 /*************************************************************************
488 * This function computes the max of a single element
489 **************************************************************************/
GlobalSESumFloat(ctrl_t * ctrl,real_t value)490 real_t GlobalSESumFloat(ctrl_t *ctrl, real_t value)
491 {
492   real_t sum;
493 
494   gkMPI_Allreduce((void *)&value, (void *)&sum, 1, REAL_T, MPI_SUM, ctrl->comm);
495 
496   return sum;
497 }
498 
499