1 /*
2 * Copyright 1997, Regents of the University of Minnesota
3 *
4 * comm.c
5 *
6 * This function provides various high level communication functions
7 *
8 * $Id: comm.c 10592 2011-07-16 21:17:53Z karypis $
9 */
10
11 #include <parmetislib.h>
12
13
14 /*************************************************************************/
15 /*! This function performs the following functions:
16 - determines the processors that contain adjacent vertices and setup
17 the infrastructure for efficient communication.
18 - localizes the numbering of the adjancency lists.
19 */
20 /**************************************************************************/
CommSetup(ctrl_t * ctrl,graph_t * graph)21 void CommSetup(ctrl_t *ctrl, graph_t *graph)
22 {
23 idx_t i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx,
24 nsend, nrecv, nnbrs, nadj;
25 idx_t npes=ctrl->npes, mype=ctrl->mype;
26 idx_t *vtxdist, *xadj, *adjncy;
27 idx_t *peind, *recvptr, *recvind, *sendptr, *sendind;
28 idx_t *imap, *lperm;
29 idx_t *pexadj, *peadjncy, *peadjloc, *startsind;
30 ikv_t *recvrequests, *sendrequests, *adjpairs;
31
32 if (graph->lperm != NULL)
33 return; /* The communication structure has already been setup */
34
35 STARTTIMER(ctrl, ctrl->SetupTmr);
36
37 gnvtxs = graph->gnvtxs;
38 nvtxs = graph->nvtxs;
39 xadj = graph->xadj;
40 adjncy = graph->adjncy;
41 lperm = graph->lperm = iincset(nvtxs, 0, imalloc(nvtxs, "CommSetup: graph->lperm"));
42
43 vtxdist = graph->vtxdist;
44 firstvtx = vtxdist[mype];
45 lastvtx = vtxdist[mype+1];
46
47 WCOREPUSH;
48 /*************************************************************
49 * Determine what you need to receive
50 *************************************************************/
51 /* first pass: determine nadj and interior/interface vertices */
52 for (nlocal=0, nadj=0, i=0; i<nvtxs; i++) {
53 islocal = 1;
54 for (j=xadj[i]; j<xadj[i+1]; j++) {
55 k = adjncy[j];
56 if (k < firstvtx || k >= lastvtx) { /* remote vertex */
57 nadj++;
58 islocal = 0;
59 }
60 }
61 if (islocal) {
62 lperm[i] = lperm[nlocal];
63 lperm[nlocal++] = i;
64 }
65 }
66 graph->nlocal = nlocal;
67
68 adjpairs = ikvwspacemalloc(ctrl, nadj+1);
69
70 /* second pass: rewrite locale entries and populate remote edges */
71 for (nadj=0, i=0; i<nvtxs; i++) {
72 for (j=xadj[i]; j<xadj[i+1]; j++) {
73 k = adjncy[j];
74 if (k >= firstvtx && k < lastvtx) { /* local vertex */
75 adjncy[j] = k-firstvtx;
76 }
77 else { /* remote vertex */
78 adjpairs[nadj].key = k;
79 adjpairs[nadj++].val = j;
80 }
81 }
82 }
83
84 /* use a sort-based "unique" approach */
85 ikvsorti(nadj, adjpairs);
86 adjpairs[nadj].key = gnvtxs+1; /* boundary condition */
87
88 /* determine how many distinct vertices you need to receive */
89 for (nrecv=0, i=0; i<nadj; i++) {
90 if (adjpairs[i].key != adjpairs[i+1].key)
91 nrecv++;
92 }
93 graph->nrecv = nrecv;
94
95
96 /* allocate space for the to be received vertices part of the recvinfo */
97 recvind = graph->recvind = imalloc(nrecv, "CommSetup: recvind");
98
99 /* store distinct vertices into recvind array and re-write adjncy */
100 for (nrecv=0, i=0; i<nadj; i++) {
101 adjncy[adjpairs[i].val] = nvtxs+nrecv;
102 if (adjpairs[i].key != adjpairs[i+1].key)
103 recvind[nrecv++] = adjpairs[i].key;
104 }
105 PASSERT(ctrl, nrecv == graph->nrecv);
106
107
108 /* determine the number of neighboring processors */
109 for (i=0, nnbrs=0, penum=0; penum<npes; penum++) {
110 for (j=i; j<nrecv; j++) {
111 if (recvind[j] >= vtxdist[penum+1])
112 break;
113 }
114 if (j > i) {
115 nnbrs++;
116 i = j;
117 }
118 }
119 graph->nnbrs = nnbrs;
120
121 /* Update the ctrl arrays that have to do with p2p communication */
122 CommUpdateNnbrs(ctrl, nnbrs);
123
124 /* allocate space for peind/recvptr part of the recvinfo */
125 peind = graph->peind = imalloc(nnbrs, "CommSetup: peind");
126 recvptr = graph->recvptr = imalloc(nnbrs+1, "CommSetup: recvptr");
127
128 /* populate the peind/recvptr arrays */
129 for (i=0, nnbrs=0, recvptr[0]=0, penum=0; penum<npes; penum++) {
130 for (j=i; j<nrecv; j++) {
131 if (recvind[j] >= vtxdist[penum+1])
132 break;
133 }
134 if (j > i) {
135 peind[nnbrs++] = penum;
136 recvptr[nnbrs] = j;
137 i = j;
138 }
139 }
140 PASSERT(ctrl, nnbrs == graph->nnbrs);
141
142 WCOREPOP;
143
144 /* PrintVector(ctrl, nnbrs+1, 0, recvptr, "recvptr"); */
145
146
147 WCOREPUSH;
148 /*************************************************************
149 * Determine what you need to send
150 *************************************************************/
151 /* GKTODO - This can be replaced via a sparse communication */
152 /* Tell the other processors what they need to send you */
153 recvrequests = ikvwspacemalloc(ctrl, npes);
154 sendrequests = ikvwspacemalloc(ctrl, npes);
155 memset(recvrequests, 0, sizeof(ikv_t)*npes);
156 for (i=0; i<nnbrs; i++) {
157 recvrequests[peind[i]].key = recvptr[i+1]-recvptr[i];
158 recvrequests[peind[i]].val = nvtxs+recvptr[i];
159 }
160 gkMPI_Alltoall((void *)recvrequests, 2, IDX_T, (void *)sendrequests,
161 2, IDX_T, ctrl->comm);
162
163 /* PrintPairs(ctrl, npes, recvrequests, "recvrequests"); */
164 /* PrintPairs(ctrl, npes, sendrequests, "sendrequests"); */
165
166 startsind = iwspacemalloc(ctrl, nnbrs);
167 sendptr = graph->sendptr = imalloc(nnbrs+1, "CommSetup: sendptr");
168
169 for (j=0, i=0; i<npes; i++) {
170 if (sendrequests[i].key > 0) {
171 sendptr[j] = sendrequests[i].key;
172 startsind[j] = sendrequests[i].val;
173 j++;
174 }
175 }
176 PASSERT(ctrl, j == nnbrs);
177
178 MAKECSR(i, nnbrs, sendptr);
179
180 nsend = graph->nsend = sendptr[nnbrs];
181 sendind = graph->sendind = imalloc(nsend, "CommSetup: sendind");
182
183
184 /* Issue the receives for sendind */
185 for (i=0; i<nnbrs; i++) {
186 gkMPI_Irecv((void *)(sendind+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_T,
187 peind[i], 1, ctrl->comm, ctrl->rreq+i);
188 }
189
190 /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */
191 for (i=0; i<nnbrs; i++) {
192 gkMPI_Isend((void *)(recvind+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_T,
193 peind[i], 1, ctrl->comm, ctrl->sreq+i);
194 }
195
196 gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
197 gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
198
199
200 /* Create the peadjncy data structure for sparse boundary exchanges */
201 pexadj = graph->pexadj = ismalloc(nvtxs+1, 0, "CommSetup: pexadj");
202 peadjncy = graph->peadjncy = imalloc(nsend, "CommSetup: peadjncy");
203 peadjloc = graph->peadjloc = imalloc(nsend, "CommSetup: peadjloc");
204
205 for (i=0; i<nsend; i++) {
206 PASSERTP(ctrl, sendind[i] >= firstvtx && sendind[i] < lastvtx,
207 (ctrl, "%"PRIDX" %"PRIDX" %"PRIDX"\n", sendind[i], firstvtx, lastvtx));
208 pexadj[sendind[i]-firstvtx]++;
209 }
210 MAKECSR(i, nvtxs, pexadj);
211
212 for (i=0; i<nnbrs; i++) {
213 for (j=sendptr[i]; j<sendptr[i+1]; j++) {
214 k = pexadj[sendind[j]-firstvtx]++;
215 peadjncy[k] = i; /* peind[i] is the actual PE number */
216 peadjloc[k] = startsind[i]++;
217 }
218 }
219 PASSERT(ctrl, pexadj[nvtxs] == nsend);
220
221 SHIFTCSR(i, nvtxs, pexadj);
222
223 WCOREPOP;
224
225 /* Create the inverse map from ladjncy to adjncy */
226 imap = graph->imap = imalloc(nvtxs+nrecv, "CommSetup: imap");
227 for (i=0; i<nvtxs; i++)
228 imap[i] = firstvtx+i;
229 for (i=0; i<nrecv; i++)
230 imap[nvtxs+i] = recvind[i];
231
232 STOPTIMER(ctrl, ctrl->SetupTmr);
233
234 #ifdef DEBUG_SETUPINFO
235 rprintf(ctrl, "[%5"PRIDX" %5"PRIDX"] \tl:[%5"PRIDX" %5"PRIDX"] \ts:[%5"PRIDX", %5"PRIDX"] \tr:[%5"PRIDX", %5"PRIDX"]\n",
236 GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs),
237 GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal),
238 GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend),
239 GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv));
240
241 PrintSetUpInfo(ctrl, graph);
242 #endif
243
244 }
245
246
247 /*************************************************************************/
248 /*! This function updates the sreq/rreq/statuses arrays in ctrl based on
249 the new number of neighbors.
250 */
251 /*************************************************************************/
CommUpdateNnbrs(ctrl_t * ctrl,idx_t nnbrs)252 void CommUpdateNnbrs(ctrl_t *ctrl, idx_t nnbrs)
253 {
254 if (ctrl->ncommpes >= nnbrs)
255 return;
256
257 ctrl->ncommpes = nnbrs;
258 ctrl->sreq = (MPI_Request *)gk_realloc(ctrl->sreq, sizeof(MPI_Request)*nnbrs, "sreq");
259 ctrl->rreq = (MPI_Request *)gk_realloc(ctrl->rreq, sizeof(MPI_Request)*nnbrs, "rreq");
260 ctrl->statuses = (MPI_Status *)gk_realloc(ctrl->statuses, sizeof(MPI_Status)*nnbrs, "statuses");
261
262 }
263
264
265 /*************************************************************************/
266 /*! This function performs the gather/scatter for the boundary vertices
267 */
268 /*************************************************************************/
CommInterfaceData(ctrl_t * ctrl,graph_t * graph,idx_t * data,idx_t * recvvector)269 void CommInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t *data,
270 idx_t *recvvector)
271 {
272 idx_t i, k, nnbrs, firstvtx;
273 idx_t *peind, *sendptr, *sendind, *sendvector, *recvptr, *recvind;
274
275 WCOREPUSH;
276
277 firstvtx = graph->vtxdist[ctrl->mype];
278 nnbrs = graph->nnbrs;
279 peind = graph->peind;
280 sendptr = graph->sendptr;
281 sendind = graph->sendind;
282 recvptr = graph->recvptr;
283 recvind = graph->recvind;
284
285 /* Issue the receives first */
286 for (i=0; i<nnbrs; i++) {
287 gkMPI_Irecv((void *)(recvvector+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_T,
288 peind[i], 1, ctrl->comm, ctrl->rreq+i);
289 }
290
291 /* Issue the sends next */
292 k = sendptr[nnbrs];
293 sendvector = iwspacemalloc(ctrl, k);
294 for (i=0; i<k; i++)
295 sendvector[i] = data[sendind[i]-firstvtx];
296
297 for (i=0; i<nnbrs; i++) {
298 gkMPI_Isend((void *)(sendvector+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_T,
299 peind[i], 1, ctrl->comm, ctrl->sreq+i);
300 }
301
302 /* OK, now get into the loop waiting for the operations to finish */
303 gkMPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
304 gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
305
306 WCOREPOP;
307 }
308
309
310
311 /*************************************************************************
312 * This function performs the gather/scatter for the boundary vertices
313 **************************************************************************/
CommChangedInterfaceData(ctrl_t * ctrl,graph_t * graph,idx_t nchanged,idx_t * changed,idx_t * data,ikv_t * sendpairs,ikv_t * recvpairs)314 void CommChangedInterfaceData(ctrl_t *ctrl, graph_t *graph, idx_t nchanged,
315 idx_t *changed, idx_t *data, ikv_t *sendpairs, ikv_t *recvpairs)
316 {
317 idx_t i, j, k, nnbrs, firstvtx, nrecv, penum, nreceived;
318 idx_t *peind, *sendptr, *recvptr, *recvind, *pexadj, *peadjncy,
319 *peadjloc, *psendptr;
320 ikv_t *pairs;
321
322
323 firstvtx = graph->vtxdist[ctrl->mype];
324 nnbrs = graph->nnbrs;
325 nrecv = graph->nrecv;
326 peind = graph->peind;
327 sendptr = graph->sendptr;
328 recvptr = graph->recvptr;
329 recvind = graph->recvind;
330 pexadj = graph->pexadj;
331 peadjncy = graph->peadjncy;
332 peadjloc = graph->peadjloc;
333
334 /* Issue the receives first */
335 for (i=0; i<nnbrs; i++) {
336 gkMPI_Irecv((void *)(recvpairs+recvptr[i]), 2*(recvptr[i+1]-recvptr[i]), IDX_T,
337 peind[i], 1, ctrl->comm, ctrl->rreq+i);
338 }
339
340 if (nchanged != 0) {
341 WCOREPUSH;
342
343 psendptr = icopy(nnbrs, sendptr, iwspacemalloc(ctrl, nnbrs));
344
345 /* Copy the changed values into the sendvector */
346 for (i=0; i<nchanged; i++) {
347 j = changed[i];
348 for (k=pexadj[j]; k<pexadj[j+1]; k++) {
349 penum = peadjncy[k];
350 sendpairs[psendptr[penum]].key = peadjloc[k];
351 sendpairs[psendptr[penum]].val = data[j];
352 psendptr[penum]++;
353 }
354 }
355
356 for (i=0; i<nnbrs; i++) {
357 gkMPI_Isend((void *)(sendpairs+sendptr[i]), 2*(psendptr[i]-sendptr[i]), IDX_T,
358 peind[i], 1, ctrl->comm, ctrl->sreq+i);
359 }
360
361 WCOREPOP;
362 }
363 else {
364 for (i=0; i<nnbrs; i++)
365 gkMPI_Isend((void *)(sendpairs), 0, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i);
366 }
367
368
369 /* OK, now get into the loop waiting for the operations to finish */
370 for (i=0; i<nnbrs; i++) {
371 gkMPI_Wait(ctrl->rreq+i, &(ctrl->status));
372 gkMPI_Get_count(&ctrl->status, IDX_T, &nreceived);
373 if (nreceived != 0) {
374 nreceived = nreceived/2;
375 pairs = recvpairs+graph->recvptr[i];
376 for (k=0; k<nreceived; k++)
377 data[pairs[k].key] = pairs[k].val;
378 }
379 }
380
381 gkMPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);
382 }
383
384
385
386 /*************************************************************************
387 * This function computes the max of a single element
388 **************************************************************************/
GlobalSEMax(ctrl_t * ctrl,idx_t value)389 idx_t GlobalSEMax(ctrl_t *ctrl, idx_t value)
390 {
391 idx_t max;
392
393 gkMPI_Allreduce((void *)&value, (void *)&max, 1, IDX_T, MPI_MAX, ctrl->comm);
394
395 return max;
396 }
397
398 /*************************************************************************
399 * This function computes the max of a single element
400 **************************************************************************/
GlobalSEMaxComm(MPI_Comm comm,idx_t value)401 idx_t GlobalSEMaxComm(MPI_Comm comm, idx_t value)
402 {
403 idx_t max;
404
405 gkMPI_Allreduce((void *)&value, (void *)&max, 1, IDX_T, MPI_MAX, comm);
406
407 return max;
408 }
409
410 /*************************************************************************
411 * This function computes the max of a single element
412 **************************************************************************/
GlobalSEMin(ctrl_t * ctrl,idx_t value)413 idx_t GlobalSEMin(ctrl_t *ctrl, idx_t value)
414 {
415 idx_t min;
416
417 gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_MIN, ctrl->comm);
418
419 return min;
420 }
421
422 /*************************************************************************
423 * This function computes the max of a single element
424 **************************************************************************/
GlobalSEMinComm(MPI_Comm comm,idx_t value)425 idx_t GlobalSEMinComm(MPI_Comm comm, idx_t value)
426 {
427 idx_t min;
428
429 gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_MIN, comm);
430
431 return min;
432 }
433
434
435 /*************************************************************************
436 * This function computes the max of a single element
437 **************************************************************************/
GlobalSESum(ctrl_t * ctrl,idx_t value)438 idx_t GlobalSESum(ctrl_t *ctrl, idx_t value)
439 {
440 idx_t sum;
441
442 gkMPI_Allreduce((void *)&value, (void *)&sum, 1, IDX_T, MPI_SUM, ctrl->comm);
443
444 return sum;
445 }
446
447 /*************************************************************************
448 * This function computes the max of a single element
449 **************************************************************************/
GlobalSESumComm(MPI_Comm comm,idx_t value)450 idx_t GlobalSESumComm(MPI_Comm comm, idx_t value)
451 {
452 idx_t min;
453
454 gkMPI_Allreduce((void *)&value, (void *)&min, 1, IDX_T, MPI_SUM, comm);
455
456 return min;
457 }
458
459
460
461 /*************************************************************************
462 * This function computes the max of a single element
463 **************************************************************************/
GlobalSEMaxFloat(ctrl_t * ctrl,real_t value)464 real_t GlobalSEMaxFloat(ctrl_t *ctrl, real_t value)
465 {
466 real_t max;
467
468 gkMPI_Allreduce((void *)&value, (void *)&max, 1, REAL_T, MPI_MAX, ctrl->comm);
469
470 return max;
471 }
472
473
474
475 /*************************************************************************
476 * This function computes the max of a single element
477 **************************************************************************/
GlobalSEMinFloat(ctrl_t * ctrl,real_t value)478 real_t GlobalSEMinFloat(ctrl_t *ctrl, real_t value)
479 {
480 real_t min;
481
482 gkMPI_Allreduce((void *)&value, (void *)&min, 1, REAL_T, MPI_MIN, ctrl->comm);
483
484 return min;
485 }
486
487 /*************************************************************************
488 * This function computes the max of a single element
489 **************************************************************************/
GlobalSESumFloat(ctrl_t * ctrl,real_t value)490 real_t GlobalSESumFloat(ctrl_t *ctrl, real_t value)
491 {
492 real_t sum;
493
494 gkMPI_Allreduce((void *)&value, (void *)&sum, 1, REAL_T, MPI_SUM, ctrl->comm);
495
496 return sum;
497 }
498
499