1 /* Copyright 2007-2011,2014 IPB, Universite de Bordeaux, INRIA & CNRS
2 **
3 ** This file is part of the Scotch software package for static mapping,
4 ** graph partitioning and sparse matrix ordering.
5 **
6 ** This software is governed by the CeCILL-C license under French law
7 ** and abiding by the rules of distribution of free software. You can
8 ** use, modify and/or redistribute the software under the terms of the
9 ** CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
10 ** URL: "http://www.cecill.info".
11 **
12 ** As a counterpart to the access to the source code and rights to copy,
13 ** modify and redistribute granted by the license, users are provided
14 ** only with a limited warranty and the software's author, the holder of
15 ** the economic rights, and the successive licensors have only limited
16 ** liability.
17 **
18 ** In this respect, the user's attention is drawn to the risks associated
19 ** with loading, using, modifying and/or developing or reproducing the
20 ** software by the user in light of its specific status of free software,
21 ** that may mean that it is complicated to manipulate, and that also
22 ** therefore means that it is reserved for developers and experienced
23 ** professionals having in-depth computer knowledge. Users are therefore
24 ** encouraged to load and test the software's suitability as regards
25 ** their requirements in conditions enabling the security of their
26 ** systems and/or data to be ensured and, more generally, to use and
27 ** operate it in the same conditions as regards security.
28 **
29 ** The fact that you are presently reading this means that you have had
30 ** knowledge of the CeCILL-C license and that you accept its terms.
31 */
32 /************************************************************/
33 /** **/
34 /** NAME : dgraph_fold.c **/
35 /** **/
36 /** AUTHOR : Francois PELLEGRINI **/
37 /** **/
38 /** FUNCTION : This module handles the distributed **/
39 /** source graph folding function. **/
40 /** **/
41 /** DATES : # Version 5.0 : from : 10 aug 2006 **/
42 /** to : 27 jun 2008 **/
43 /** # Version 5.1 : from : 12 nov 2008 **/
44 /** to : 04 jan 2011 **/
45 /** # Version 6.0 : from : 28 sep 2014 **/
46 /** to : 28 sep 2014 **/
47 /** **/
48 /************************************************************/
49
50 /*
51 ** The defines and includes.
52 */
53
54 #define DGRAPH
55
56 #include "module.h"
57 #include "common.h"
58 #include "dgraph.h"
59 #include "dgraph_fold.h"
60 #include "dgraph_fold_comm.h"
61
62 /******************************/
63 /* */
64 /* This routine handles */
65 /* distributed source graphs. */
66 /* */
67 /******************************/
68
69 /* This routine builds a folded graph by merging graph
70 ** data to the processes of the first half or to the
71 ** second half of the communicator.
72 ** The key value of the folded communicator is not
73 ** changed as it is not relevant.
74 ** It returns:
75 ** - 0 : on success.
76 ** - !0 : on error.
77 */
78
79 int
dgraphFold(const Dgraph * restrict const orggrafptr,const int partval,Dgraph * restrict const fldgrafptr,const void * restrict const orgdataptr,void ** restrict const flddataptr,MPI_Datatype datatype)80 dgraphFold (
81 const Dgraph * restrict const orggrafptr,
82 const int partval, /* 0 for first half, 1 for second half */
83 Dgraph * restrict const fldgrafptr,
84 const void * restrict const orgdataptr, /* Un-based array of data which must be folded, e.g. coarmulttab */
85 void ** restrict const flddataptr, /* Un-based array of data which must be folded, e.g. coarmulttab */
86 MPI_Datatype datatype)
87 {
88 int fldprocnbr;
89 int fldprocnum; /* Index of local process in folded communicator */
90 int fldproccol; /* Color of receiver or not wanted in communicator */
91 MPI_Comm fldproccomm; /* Communicator of folded part */
92 int o;
93
94 fldprocnbr = (orggrafptr->procglbnbr + 1) / 2;
95 fldprocnum = orggrafptr->proclocnum;
96 if (partval == 1) {
97 fldprocnum = fldprocnum - fldprocnbr;
98 fldprocnbr = orggrafptr->procglbnbr - fldprocnbr;
99 }
100 fldproccol = ((fldprocnum >= 0) && (fldprocnum < fldprocnbr)) ? 0 : MPI_UNDEFINED;
101
102 if (MPI_Comm_split (orggrafptr->proccomm, fldproccol, fldprocnum, &fldproccomm) != MPI_SUCCESS) {
103 errorPrint ("dgraphFold: communication error");
104 return (1);
105 }
106
107 o = dgraphFold2 (orggrafptr, partval, fldgrafptr, fldproccomm, orgdataptr, flddataptr, datatype);
108 fldgrafptr->prockeyval = fldproccol; /* Key of folded communicator is always zero if no duplication occurs */
109
110 return (o);
111 }
112
113 int
dgraphFold2(const Dgraph * restrict const orggrafptr,const int partval,Dgraph * restrict const fldgrafptr,MPI_Comm fldproccomm,const void * restrict const orgdataptr,void ** restrict const flddataptr,MPI_Datatype datatype)114 dgraphFold2 (
115 const Dgraph * restrict const orggrafptr,
116 const int partval, /* 0 for first half, 1 for second half */
117 Dgraph * restrict const fldgrafptr,
118 MPI_Comm fldproccomm,
119 const void * restrict const orgdataptr, /* Un-based array of data which must be kept, e.g. coarmulttab */
120 void ** restrict const flddataptr, /* Un-based array of data which must be kept, e.g. coarmulttab */
121 MPI_Datatype datatype)
122 {
123 int fldcommtypval; /* Type of communication for this process */
124 DgraphFoldCommData * restrict fldcommdattab; /* Array of two communication data */
125 Gnum * restrict fldcommvrttab; /* Starting global send indices of communications */
126 Gnum * restrict fldvertidxtab; /* Start indices of vertex arrays */
127 Gnum * restrict fldedgeidxtab; /* Start indices of edge arrays */
128 Gnum * restrict fldedgecnttab; /* Number of edges exchanged during each communication */
129 Gnum * restrict fldedgecnptab; /* Temporary save for fldedgecnttab for MPI standard */
130 Gnum fldvertlocnbr; /* Number of vertices in local folded part */
131 Gnum fldedgelocsiz; /* (Upper bound of) number of edges in folded graph */
132 Gnum fldedlolocsiz; /* (Upper bound of) number of edge loads in folded graph */
133 int fldprocglbnbr;
134 int fldproclocnum; /* Index of local process in folded communicator */
135 int fldvertadjnbr;
136 Gnum * restrict fldvertadjtab; /* Array of global start indices for adjustment slots */
137 Gnum * restrict fldvertdlttab; /* Array of index adjustments for original global indices */
138 int cheklocval;
139 int chekglbval;
140 int commmax;
141 int commnbr;
142 int requnbr;
143 MPI_Request * restrict requtab;
144 int infosiz; /* Size of one information */
145
146 #ifdef SCOTCH_DEBUG_DGRAPH2
147 if (orggrafptr->vendloctax != (orggrafptr->vertloctax + 1)) {
148 errorPrint ("dgraphFold2: graph must be compact");
149 return (1);
150 }
151 #endif /* SCOTCH_DEBUG_DGRAPH2 */
152
153 fldprocglbnbr = (orggrafptr->procglbnbr + 1) / 2;
154 if (partval == 1) {
155 fldproclocnum = orggrafptr->proclocnum - fldprocglbnbr;
156 fldprocglbnbr = orggrafptr->procglbnbr - fldprocglbnbr;
157 }
158 else
159 fldproclocnum = orggrafptr->proclocnum;
160
161 fldcommtypval = ((fldproclocnum >= 0) && (fldproclocnum < fldprocglbnbr)) ? DGRAPHFOLDCOMMRECV : DGRAPHFOLDCOMMSEND;
162 if (orgdataptr != NULL)
163 MPI_Type_size (datatype, &infosiz);
164
165 cheklocval = 0;
166 fldcommdattab = NULL;
167 fldvertidxtab = NULL;
168 if (fldcommtypval == DGRAPHFOLDCOMMRECV) { /* If we are going to receive */
169 #ifdef SCOTCH_DEBUG_DGRAPH2
170 if (fldgrafptr == NULL) {
171 errorPrint ("dgraphFold2: invalid parameters (1)");
172 return (1);
173 }
174 if (fldproccomm == MPI_COMM_NULL) {
175 errorPrint ("dgraphFold2: invalid parameters (2)");
176 return (1);
177 }
178 #endif /* SCOTCH_DEBUG_DGRAPH2 */
179
180 memSet (fldgrafptr, 0, sizeof (Dgraph)); /* Pre-initialize graph fields */
181
182 fldgrafptr->proccomm = fldproccomm;
183 fldgrafptr->procglbnbr = fldprocglbnbr;
184 fldgrafptr->proclocnum = fldproclocnum;
185 fldgrafptr->flagval = DGRAPHFREEALL | DGRAPHVERTGROUP | DGRAPHEDGEGROUP; /* For premature freeing on error */
186
187 if (memAllocGroup ((void **) (void *) /* Allocate distributed graph private data */
188 &fldgrafptr->procdsptab, (size_t) ((fldprocglbnbr + 1) * sizeof (Gnum)),
189 &fldgrafptr->proccnttab, (size_t) (fldprocglbnbr * sizeof (Gnum)),
190 &fldgrafptr->procngbtab, (size_t) (fldprocglbnbr * sizeof (int)),
191 &fldgrafptr->procrcvtab, (size_t) (fldprocglbnbr * sizeof (int)),
192 &fldgrafptr->procsndtab, (size_t) (fldprocglbnbr * sizeof (int)), NULL) == NULL) {
193 errorPrint ("dgraphFold2: out of memory (1)");
194 cheklocval = 1;
195 }
196 else if (dgraphFoldComm (orggrafptr, partval, &commmax, &fldcommtypval, &fldcommdattab, &fldcommvrttab, /* Process can become a sender receiver */
197 fldgrafptr->proccnttab, &fldvertadjnbr, &fldvertadjtab, &fldvertdlttab) != 0) {
198 errorPrint ("dgraphFold2: cannot compute folding communications (1)");
199 cheklocval = 1;
200 }
201 else {
202 Gnum fldvelolocnbr;
203
204 if ((fldcommtypval & DGRAPHFOLDCOMMSEND) == 0) { /* If process is a normal receiver */
205 int i;
206
207 for (i = 0, fldvertlocnbr = 0; (i < commmax) && (fldcommdattab[i].procnum != -1); i ++)
208 fldvertlocnbr += fldcommdattab[i].vertnbr;
209 commnbr = i;
210
211 fldedgelocsiz = orggrafptr->edgeglbsmx * i; /* Upper bound on received edges */
212 if ((orggrafptr->degrglbmax > 0) && (fldvertlocnbr < (fldedgelocsiz / orggrafptr->degrglbmax))) /* Choose best upper bound on number of edges (avoid multiply overflow) */
213 fldedgelocsiz = fldvertlocnbr * orggrafptr->degrglbmax;
214
215 fldedgelocsiz += orggrafptr->edgelocnbr; /* Add local edges and vertices */
216 fldvertlocnbr += orggrafptr->vertlocnbr;
217 }
218 else { /* Process is a sender receiver */
219 fldvertlocnbr = fldcommvrttab[0] - orggrafptr->procvrttab[orggrafptr->proclocnum]; /* Communications will remove vertices */
220 fldedgelocsiz = orggrafptr->vertloctax[fldvertlocnbr + orggrafptr->baseval] - orggrafptr->baseval; /* Exact number of edges */
221
222 fldgrafptr->edgelocnbr =
223 fldgrafptr->edgelocsiz = fldedgelocsiz;
224 }
225 fldvelolocnbr = (orggrafptr->veloloctax != NULL) ? fldvertlocnbr : 0;
226
227 if (memAllocGroup ((void **) (void *) /* Allocate distributed graph public data */
228 &fldgrafptr->vertloctax, (size_t) ((fldvertlocnbr + 1) * sizeof (Gnum)),
229 &fldgrafptr->vnumloctax, (size_t) ( fldvertlocnbr * sizeof (Gnum)),
230 &fldgrafptr->veloloctax, (size_t) ( fldvelolocnbr * sizeof (Gnum)), NULL) == NULL) {
231 errorPrint ("dgraphFold2: out of memory (2)");
232 cheklocval = 1;
233 }
234 else if (fldgrafptr->vertloctax -= orggrafptr->baseval,
235 fldgrafptr->vnumloctax -= orggrafptr->baseval,
236 fldgrafptr->vendloctax = fldgrafptr->vertloctax + 1, /* Folded graph is compact */
237 fldgrafptr->veloloctax = ((orggrafptr->veloloctax != NULL) ? (fldgrafptr->veloloctax - orggrafptr->baseval) : NULL),
238 fldedlolocsiz = ((orggrafptr->edloloctax != NULL) ? fldedgelocsiz : 0),
239 (fldgrafptr->edgeloctax = memAlloc ((fldedgelocsiz + fldedlolocsiz) * sizeof (Gnum))) == NULL) { /* Allocate single array for both edge arrays */
240 errorPrint ("dgraphFold2: out of memory (3)");
241 cheklocval = 1;
242 }
243 else {
244 if (orgdataptr != NULL) {
245 if ((*flddataptr = (byte *) memAlloc (fldvertlocnbr * infosiz)) == NULL) {
246 errorPrint ("dgraphFold2: out of memory (4)");
247 cheklocval = 1;
248 }
249 }
250 fldgrafptr->edgeloctax -= orggrafptr->baseval; /* Do not care about the validity of edloloctax at this stage */
251 }
252 }
253 }
254 else { /* Process is a sender */
255 #ifdef SCOTCH_DEBUG_HDGRAPH2
256 if (fldproccomm != MPI_COMM_NULL) {
257 errorPrint ("dgraphFold2: invalid parameters (3)");
258 return (1);
259 }
260 #endif /* SCOTCH_DEBUG_HDGRAPH2 */
261
262 if (dgraphFoldComm (orggrafptr, partval, &commmax, &fldcommtypval, &fldcommdattab, &fldcommvrttab, NULL, NULL, NULL, NULL) != 0) {
263 errorPrint ("dgraphFold2: cannot compute folding communications (2)");
264 cheklocval = 1;
265 }
266 }
267
268 if ((cheklocval == 0) &&
269 (memAllocGroup ((void **) (void *) /* Allocate folding data */
270 &fldvertidxtab, (size_t) (commmax * sizeof (Gnum)),
271 &fldedgeidxtab, (size_t) (commmax * sizeof (Gnum)),
272 &fldedgecnttab, (size_t) (commmax * sizeof (Gnum)),
273 &fldedgecnptab, (size_t) (commmax * sizeof (Gnum)),
274 &requtab, (size_t) (commmax * DGRAPHFOLDTAGNBR * sizeof (MPI_Request)), NULL) == NULL)) {
275 errorPrint ("dgraphFold2: out of memory (5)");
276 cheklocval = 1;
277 }
278
279 #ifdef SCOTCH_DEBUG_DGRAPH1 /* Communication cannot be merged with a useful one */
280 if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, orggrafptr->proccomm) != MPI_SUCCESS) {
281 errorPrint ("dgraphFold2: communication error (1)");
282 chekglbval = 1;
283 }
284 #else /* SCOTCH_DEBUG_DGRAPH1 */
285 chekglbval = cheklocval;
286 #endif /* SCOTCH_DEBUG_DGRAPH1 */
287 if (chekglbval != 0) {
288 if ((fldcommtypval & DGRAPHFOLDCOMMRECV) != 0) {
289 if (fldvertidxtab != NULL)
290 memFree (fldvertidxtab); /* Free group leader */
291 if (fldcommdattab != NULL)
292 memFree (fldcommdattab);
293 dgraphExit (fldgrafptr);
294 }
295 return (1);
296 }
297
298 requnbr = 0; /* Communications without further processing are placed at beginning of array */
299
300 if ((fldcommtypval & DGRAPHFOLDCOMMSEND) != 0) { /* If process is (also) a sender */
301 Gnum vertsndbas;
302 Gnum vertsndnbr;
303 int i;
304
305 vertsndnbr = ((fldcommtypval & DGRAPHFOLDCOMMRECV) != 0) ? (fldcommvrttab[0] - orggrafptr->procvrttab[orggrafptr->proclocnum]) : 0; /* If process is also a receiver, start sending after kept vertices */
306
307 for (i = 0, vertsndbas = orggrafptr->baseval; /* For all send communications to perform */
308 (i < commmax) && (fldcommdattab[i].procnum != -1) && (cheklocval == 0); i ++) {
309 vertsndbas += vertsndnbr;
310 vertsndnbr = fldcommdattab[i].vertnbr;
311
312 fldvertidxtab[i] = vertsndbas;
313 fldedgeidxtab[i] = orggrafptr->vertloctax[vertsndbas];
314 fldedgecnptab[i] = /* Save fldedgecnttab in temporary array to read it while MPI communication in progress */
315 fldedgecnttab[i] = orggrafptr->vertloctax[vertsndbas + vertsndnbr] - orggrafptr->vertloctax[vertsndbas]; /* Graph is compact */
316 if (MPI_Isend (&fldedgecnptab[i], 1, GNUM_MPI, fldcommdattab[i].procnum,
317 TAGFOLD + TAGVLBLLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
318 errorPrint ("dgraphFold2: communication error (2)");
319 cheklocval = 1;
320 }
321 }
322 commnbr = i;
323
324 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
325 if (MPI_Isend (orggrafptr->vertloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
326 TAGFOLD + TAGVERTLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
327 errorPrint ("dgraphFold2: communication error (3)");
328 cheklocval = 1;
329 }
330 }
331 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
332 if (MPI_Isend (orggrafptr->edgeloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, fldcommdattab[i].procnum,
333 TAGFOLD + TAGEDGELOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
334 errorPrint ("dgraphFold2: communication error (4)");
335 cheklocval = 1;
336 }
337 }
338 if (orggrafptr->veloloctax != NULL) {
339 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
340 if (MPI_Isend (orggrafptr->veloloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
341 TAGFOLD + TAGVELOLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
342 errorPrint ("dgraphFold2: communication error (5)");
343 cheklocval = 1;
344 }
345 }
346 }
347 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
348 int procsndnum; /* Rank of process to send to */
349
350 procsndnum = fldcommdattab[i].procnum;
351 if ((orggrafptr->edloloctax != NULL) &&
352 (MPI_Isend (orggrafptr->edloloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, procsndnum,
353 TAGFOLD + TAGEDLOLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
354 errorPrint ("dgraphFold2: communication error (6)");
355 cheklocval = 1;
356 }
357 else if ((orggrafptr->vnumloctax != NULL) &&
358 (MPI_Isend (orggrafptr->vnumloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, procsndnum,
359 TAGFOLD + TAGVNUMLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
360 errorPrint ("dgraphFold2: communication error (7)");
361 cheklocval = 1;
362 }
363 else if ((orgdataptr != NULL) &&
364 (MPI_Isend ((byte *) orgdataptr + ((fldvertidxtab[i] - orggrafptr->baseval) * infosiz), fldcommdattab[i].vertnbr, datatype, procsndnum,
365 TAGFOLD + TAGDATALOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
366 errorPrint ("dgraphFold2: communication error (8)");
367 cheklocval = 1;
368 }
369 }
370 } /* Communications of sender-receivers will be completed in the receiving phase */
371
372 if ((fldcommtypval & DGRAPHFOLDCOMMRECV) != 0) { /* If process is (also) a receiver */
373 Gnum orgvertlocnbr;
374 Gnum orgvertlocnnd;
375 Gnum orgvertlocmin;
376 Gnum orgvertlocmax;
377 Gnum fldvertlocadj;
378 Gnum fldvelolocsum;
379 Gnum fldedgelocnum;
380 Gnum fldedgelocnnd;
381 int fldprocnum;
382 int procngbmin;
383 int procngbmax;
384 int i;
385
386 const Gnum * restrict const orgedgeloctax = orggrafptr->edgeloctax;
387 Gnum * restrict const fldedgeloctax = fldgrafptr->edgeloctax;
388
389 fldgrafptr->procvrttab = fldgrafptr->procdsptab; /* Graph does not have holes */
390 fldgrafptr->procdsptab[0] = orggrafptr->baseval; /* Build private data of folded graph and array */
391 for (fldprocnum = 0; fldprocnum < fldprocglbnbr; fldprocnum ++) /* New subdomain indices start from baseval */
392 fldgrafptr->procdsptab[fldprocnum + 1] = fldgrafptr->procdsptab[fldprocnum] + fldgrafptr->proccnttab[fldprocnum];
393
394 if ((fldcommtypval & DGRAPHFOLDCOMMSEND) == 0) { /* If process is a normal receiver */
395 Gnum fldedgelocbas;
396 Gnum fldvertrcvbas;
397 Gnum fldvertrcvnbr;
398
399 for (i = 0, fldvertrcvbas = orggrafptr->vertlocnnd, fldvertrcvnbr = 0; /* For all receive communications to perform */
400 (i < commnbr) && (cheklocval == 0); i ++) {
401 fldvertrcvbas += fldvertrcvnbr;
402 fldvertrcvnbr = fldcommdattab[i].vertnbr;
403
404 fldvertidxtab[i] = fldvertrcvbas;
405 if (MPI_Irecv (&fldedgecnttab[i], 1, GNUM_MPI, fldcommdattab[i].procnum,
406 TAGFOLD + TAGVLBLLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGENBR * commmax + i]) != MPI_SUCCESS) {
407 errorPrint ("dgraphFold2: communication error (9)");
408 cheklocval = 1;
409 }
410 }
411
412 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) { /* Let these communications progress while we process the edge size messages */
413 if (MPI_Irecv (fldgrafptr->vertloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
414 TAGFOLD + TAGVERTLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGVERT * commmax + i]) != MPI_SUCCESS) {
415 errorPrint ("dgraphFold2: communication error (10)");
416 cheklocval = 1;
417 }
418 }
419
420 MPI_Waitall (commnbr, &requtab[DGRAPHFOLDTAGENBR * commmax], MPI_STATUSES_IGNORE);
421
422 for (i = 0, fldedgelocbas = orggrafptr->vertloctax[orggrafptr->vertlocnnd]; (i < commnbr) && (cheklocval == 0); i ++) {
423 fldedgeidxtab[i] = fldedgelocbas;
424 fldedgelocbas += fldedgecnttab[i];
425
426 if (MPI_Irecv (fldgrafptr->edgeloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, fldcommdattab[i].procnum,
427 TAGFOLD + TAGEDGELOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGEDGE * commmax + i]) != MPI_SUCCESS) {
428 errorPrint ("dgraphFold2: communication error (11)");
429 cheklocval = 1;
430 }
431 }
432 fldgrafptr->edgelocnbr = /* Get number of local edges */
433 fldgrafptr->edgelocsiz = fldedgelocbas - orggrafptr->baseval;
434
435 if (orggrafptr->veloloctax != NULL) {
436 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
437 if (MPI_Irecv (fldgrafptr->veloloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
438 TAGFOLD + TAGVELOLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGVELO * commmax + i]) != MPI_SUCCESS) {
439 errorPrint ("dgraphFold2: communication error (12)");
440 cheklocval = 1;
441 }
442 }
443 }
444 if (orggrafptr->edloloctax != NULL) {
445 fldgrafptr->edloloctax = fldgrafptr->edgeloctax + fldgrafptr->edgelocnbr; /* Set start index of edge load array */
446
447 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
448 if (MPI_Irecv (fldgrafptr->edloloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, fldcommdattab[i].procnum,
449 TAGFOLD + TAGEDLOLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGEDLO * commmax + i]) != MPI_SUCCESS) {
450 errorPrint ("dgraphFold2: communication error (13)");
451 cheklocval = 1;
452 }
453 }
454 }
455 for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
456 int procrcvnum; /* Rank of process to receive from */
457 Gnum vertrcvnbr;
458
459 procrcvnum = fldcommdattab[i].procnum;
460 vertrcvnbr = fldcommdattab[i].vertnbr;
461 if ((orggrafptr->vnumloctax != NULL) &&
462 (MPI_Irecv (fldgrafptr->vnumloctax + fldvertidxtab[i], vertrcvnbr, GNUM_MPI, procrcvnum,
463 TAGFOLD + TAGVNUMLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
464 errorPrint ("dgraphFold2: communication error (14)");
465 cheklocval = 1;
466 }
467 else if ((orgdataptr != NULL) &&
468 (MPI_Irecv ((byte *) (*flddataptr) + ((fldvertidxtab[i] - orggrafptr->baseval) * infosiz), vertrcvnbr, datatype, procrcvnum,
469 TAGFOLD + TAGDATALOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
470 errorPrint ("dgraphFold2: communication error (15)");
471 cheklocval = 1;
472 }
473 }
474
475 orgvertlocnbr = orggrafptr->vertlocnbr; /* Process all local vertices */
476 orgvertlocnnd = orggrafptr->vertlocnnd;
477
478 if (orggrafptr->vnumloctax == NULL) { /* If original graph does not have vertex numbers, create remote parts of vertex number array */
479 Gnum fldvertlocnum;
480 Gnum fldvertlocadj;
481 int i;
482
483 Gnum * restrict const fldvnumloctax = fldgrafptr->vnumloctax;
484
485 for (i = 0, fldvertlocnum = orgvertlocnnd; i < commnbr; i ++) {
486 Gnum fldvertlocnnd;
487
488 for (fldvertlocnnd = fldvertlocnum + fldcommdattab[i].vertnbr, fldvertlocadj = fldcommvrttab[i];
489 fldvertlocnum < fldvertlocnnd; fldvertlocnum ++)
490 fldvnumloctax[fldvertlocnum] = fldvertlocadj ++;
491 }
492 }
493
494 fldedgelocnnd = orggrafptr->vertloctax[orggrafptr->vertlocnnd];
495 fldvelolocsum = orggrafptr->velolocsum; /* In case there are vertex loads, we keep all of existing load */
496 }
497 else { /* Receiver process is also a sender */
498 orgvertlocnbr = fldvertlocnbr; /* Process only remaining local vertices */
499 orgvertlocnnd = fldvertlocnbr + orggrafptr->baseval;
500
501 if (orggrafptr->veloloctax != NULL) { /* If original graph has vertex loads */
502 Gnum fldvertlocnum;
503
504 for (fldvertlocnum = orggrafptr->baseval, fldvelolocsum = 0; /* Accumulate load sum of remaining part */
505 fldvertlocnum < orgvertlocnnd; fldvertlocnum ++)
506 fldvelolocsum += orggrafptr->veloloctax[fldvertlocnum];
507 }
508 fldedgelocnnd = orggrafptr->vertloctax[orgvertlocnnd]; /* Reorder remaining local part of edge array */
509 if (orggrafptr->edloloctax != NULL)
510 fldgrafptr->edloloctax = fldgrafptr->edgeloctax + fldgrafptr->edgelocnbr; /* Set start index of edge load array */
511
512 commnbr = 0; /* Turn sender-receiver into normal receiver without any communications to perform */
513 }
514
515 for (procngbmin = 0, procngbmax = fldvertadjnbr; /* Initialize search accelerator */
516 procngbmax - procngbmin > 1; ) {
517 int procngbmed;
518
519 procngbmed = (procngbmax + procngbmin) / 2;
520 if (fldvertadjtab[procngbmed] <= orggrafptr->procvrttab[orggrafptr->proclocnum])
521 procngbmin = procngbmed;
522 else
523 procngbmax = procngbmed;
524 }
525 orgvertlocmin = fldvertadjtab[procngbmin];
526 orgvertlocmax = fldvertadjtab[procngbmax];
527 fldvertlocadj = fldvertdlttab[procngbmin];
528 for (fldedgelocnum = orggrafptr->baseval; fldedgelocnum < fldedgelocnnd; fldedgelocnum ++) {
529 Gnum orgvertlocend;
530
531 orgvertlocend = orgedgeloctax[fldedgelocnum];
532
533 if ((orgvertlocend >= orgvertlocmin) && /* If end vertex is local */
534 (orgvertlocend < orgvertlocmax))
535 fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertlocadj;
536 else { /* End vertex is not local */
537 int procngbnum;
538 int procngbmax;
539
540 for (procngbnum = 0, procngbmax = fldvertadjnbr;
541 procngbmax - procngbnum > 1; ) {
542 int procngbmed;
543
544 procngbmed = (procngbmax + procngbnum) / 2;
545 if (fldvertadjtab[procngbmed] <= orgvertlocend)
546 procngbnum = procngbmed;
547 else
548 procngbmax = procngbmed;
549 }
550 fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertdlttab[procngbnum];
551 }
552 }
553
554 if (orggrafptr->veloloctax != NULL) /* If original graph has vertex loads */
555 memCpy (fldgrafptr->veloloctax + orggrafptr->baseval, /* Copy local part of vertex load array */
556 orggrafptr->veloloctax + orggrafptr->baseval, orgvertlocnbr * sizeof (Gnum));
557
558 if (orggrafptr->edloloctax != NULL) /* If original graph has edge loads */
559 memCpy (fldgrafptr->edloloctax + orggrafptr->baseval, /* Copy local part of edge load array */
560 orggrafptr->edloloctax + orggrafptr->baseval,
561 (orggrafptr->vertloctax[orgvertlocnnd] - orggrafptr->baseval) * sizeof (Gnum));
562
563 if (orggrafptr->vnumloctax != NULL) /* If original graph has vertex numbers */
564 memCpy (fldgrafptr->vnumloctax + orggrafptr->baseval, /* Copy local part of vertex number array */
565 orggrafptr->vnumloctax + orggrafptr->baseval, orgvertlocnbr * sizeof (Gnum));
566 else { /* Build local part of vertex number array */
567 Gnum fldvertlocnum;
568 Gnum fldvertlocadj;
569
570 for (fldvertlocnum = orggrafptr->baseval, fldvertlocadj = orggrafptr->procvrttab[orggrafptr->proclocnum];
571 fldvertlocnum < orgvertlocnnd; fldvertlocnum ++)
572 fldgrafptr->vnumloctax[fldvertlocnum] = fldvertlocadj ++;
573 }
574
575 memCpy (fldgrafptr->vertloctax + orggrafptr->baseval, /* Copy local part of vertex array, since it is compact */
576 orggrafptr->vertloctax + orggrafptr->baseval, orgvertlocnbr * sizeof (Gnum)); /* Last value is not copied */
577 fldgrafptr->vertloctax[fldvertlocnbr + orggrafptr->baseval] = fldgrafptr->edgelocnbr + orggrafptr->baseval;
578
579 if (orgdataptr != NULL) /* If additional data present */
580 memCpy ((byte *) (*flddataptr), (byte *) orgdataptr, orgvertlocnbr * infosiz); /* Copy local part */
581
582 for (i = 0; i < commnbr; i ++) {
583 int j;
584
585 if (MPI_Waitany (commnbr, &requtab[DGRAPHFOLDTAGVERT * commmax], &j, MPI_STATUS_IGNORE) != MPI_SUCCESS) {
586 errorPrint ("dgraphFold2: communication error (16)");
587 cheklocval = 1;
588 }
589 else { /* Adjust first remote part of vertex array */
590 Gnum fldvertlocnum;
591 Gnum fldvertlocnnd;
592 Gnum fldvertlocadj;
593
594 Gnum * restrict const fldvertloctax = fldgrafptr->vertloctax;
595
596 fldvertlocnum = fldvertidxtab[j];
597 fldvertlocadj = fldedgeidxtab[j] - fldgrafptr->vertloctax[fldvertlocnum];
598
599 for (fldvertlocnnd = fldvertlocnum + fldcommdattab[j].vertnbr; fldvertlocnum < fldvertlocnnd; fldvertlocnum ++)
600 fldvertloctax[fldvertlocnum] += fldvertlocadj;
601 }
602 }
603
604 for (i = 0; i < commnbr; i ++) {
605 MPI_Status statdat;
606 int j;
607
608 if (MPI_Waitany (commnbr, &requtab[DGRAPHFOLDTAGEDGE * commmax], &j, &statdat) != MPI_SUCCESS) {
609 errorPrint ("dgraphFold2: communication error (17)");
610 cheklocval = 1;
611 }
612 else if (cheklocval == 0) { /* Adjust remote part(s) of edge array */
613 Gnum orgvertlocmin;
614 Gnum orgvertlocmax;
615 Gnum fldvertlocadj;
616 int procngbnum;
617 int procngbmax;
618
619 Gnum * restrict const fldedgeloctax = fldgrafptr->edgeloctax;
620
621 #ifdef SCOTCH_DEBUG_DGRAPH2
622 int fldedgercvnbr;
623
624 MPI_Get_count (&statdat, GNUM_MPI, &fldedgercvnbr);
625 if (fldedgercvnbr != fldedgecnttab[j]) {
626 errorPrint ("dgraphFold2: internal error (1)");
627 return (1);
628 }
629 #endif /* SCOTCH_DEBUG_DGRAPH2 */
630
631 for (procngbnum = 0, procngbmax = fldvertadjnbr; /* Initialize search accelerator */
632 procngbmax - procngbnum > 1; ) {
633 int procngbmed;
634
635 procngbmed = (procngbmax + procngbnum) / 2;
636 if (fldvertadjtab[procngbmed] <= fldcommvrttab[j])
637 procngbnum = procngbmed;
638 else
639 procngbmax = procngbmed;
640 }
641 orgvertlocmin = fldvertadjtab[procngbnum];
642 orgvertlocmax = fldvertadjtab[procngbmax];
643 fldvertlocadj = fldvertdlttab[procngbnum];
644 for (fldedgelocnum = fldedgeidxtab[j], fldedgelocnnd = fldedgelocnum + fldedgecnttab[j];
645 fldedgelocnum < fldedgelocnnd; fldedgelocnum ++) { /* Reorder end vertices */
646 Gnum orgvertlocend;
647
648 #ifdef SCOTCH_DEBUG_DGRAPH2
649 if (fldedgelocnum >= (fldgrafptr->edgelocnbr + orggrafptr->baseval)) {
650 errorPrint ("dgraphFold2: internal error (2)");
651 return (1);
652 }
653 #endif /* SCOTCH_DEBUG_DGRAPH2 */
654
655 orgvertlocend = fldedgeloctax[fldedgelocnum];
656
657 if ((orgvertlocend >= orgvertlocmin) && /* If end vertex is local */
658 (orgvertlocend < orgvertlocmax))
659 fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertlocadj;
660 else {
661 int procngbnum;
662 int procngbmax;
663
664 for (procngbnum = 0, procngbmax = fldvertadjnbr;
665 procngbmax - procngbnum > 1; ) {
666 int procngbmed;
667
668 procngbmed = (procngbmax + procngbnum) / 2;
669 if (fldvertadjtab[procngbmed] <= orgvertlocend)
670 procngbnum = procngbmed;
671 else
672 procngbmax = procngbmed;
673 }
674 fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertdlttab[procngbnum];
675 }
676 }
677 }
678 }
679
680 if (orggrafptr->veloloctax == NULL) /* If no vertex loads, reset graph vertex load to number of vertices */
681 fldvelolocsum = fldvertlocnbr;
682 else { /* Graph has vertex loads and load of local part has already been computed */
683 for (i = 0; i < commnbr; i ++) {
684 int j;
685
686 if (MPI_Waitany (commnbr, &requtab[DGRAPHFOLDTAGVELO * commmax], &j, MPI_STATUS_IGNORE) != MPI_SUCCESS) {
687 errorPrint ("dgraphFold2: communication error (18)");
688 cheklocval = 1;
689 }
690 else if (cheklocval == 0) { /* Accumulate vertex loads for received vertex load array */
691 Gnum fldvertlocnum;
692 Gnum fldvertlocnnd;
693
694 for (fldvertlocnum = fldvertidxtab[j], fldvertlocnnd = fldvertlocnum + fldcommdattab[j].vertnbr;
695 fldvertlocnum < fldvertlocnnd; fldvertlocnum ++)
696 fldvelolocsum += fldgrafptr->veloloctax[fldvertlocnum];
697 }
698 }
699 }
700
701 if ((fldcommtypval & DGRAPHFOLDCOMMSEND) == 0) { /* If process is a normal receiver, edge arrays may have been oversized */
702 Gnum fldedgeloctmp;
703
704 fldedgeloctmp = fldgrafptr->edgelocnbr;
705 if (orggrafptr->edloloctax != NULL) {
706 fldedgeloctmp *= 2;
707 if (MPI_Waitall (commnbr, &requtab[DGRAPHFOLDTAGEDLO * commmax], MPI_STATUSES_IGNORE) != MPI_SUCCESS) { /* Wait for edge load sub-arrays */
708 errorPrint ("dgraphFold2: communication error (19)");
709 cheklocval = 1;
710 }
711 }
712
713 fldgrafptr->edgeloctax = memRealloc (fldgrafptr->edgeloctax + orggrafptr->baseval, fldedgeloctmp * sizeof (Gnum));
714 fldgrafptr->edgeloctax -= orggrafptr->baseval;
715 if (orggrafptr->edloloctax != NULL)
716 fldgrafptr->edloloctax = fldgrafptr->edgeloctax + fldgrafptr->edgelocnbr;
717 }
718
719 fldgrafptr->baseval = orggrafptr->baseval;
720 fldgrafptr->vertlocnbr = fldvertlocnbr;
721 fldgrafptr->vertlocnnd = fldvertlocnbr + orggrafptr->baseval;
722 fldgrafptr->velolocsum = fldvelolocsum;
723 fldgrafptr->degrglbmax = orggrafptr->degrglbmax;
724 if (dgraphBuild4 (fldgrafptr) != 0) {
725 errorPrint ("dgraphFold2: cannot build folded graph");
726 dgraphExit (fldgrafptr);
727 return (1);
728 }
729
730 #ifdef SCOTCH_DEBUG_DGRAPH2
731 if (dgraphCheck (fldgrafptr) != 0) { /* Check graph consistency; vnumloctab is not checked so no need to wait for it */
732 errorPrint ("dgraphFold2: internal error (3)");
733 dgraphExit (fldgrafptr);
734 return (1);
735 }
736 #endif /* SCOTCH_DEBUG_DGRAPH2 */
737 }
738
739 memFree (fldcommdattab); /* Free group leader */
740
741 if (MPI_Waitall (requnbr, requtab, MPI_STATUSES_IGNORE) != MPI_SUCCESS) { /* Wait for all graph data to arrive because graph could be freed afterwards */
742 errorPrint ("dgraphFold2: communication error (20)");
743 cheklocval = 1;
744 }
745
746 memFree (fldvertidxtab); /* Free group leader including request array */
747
748 #ifdef SCOTCH_DEBUG_DGRAPH1 /* Communication cannot be merged with a useful one */
749 if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, orggrafptr->proccomm) != MPI_SUCCESS) {
750 errorPrint ("dgraphFold2: communication error (21)");
751 chekglbval = 1;
752 }
753 #else /* SCOTCH_DEBUG_DGRAPH1 */
754 chekglbval = cheklocval;
755 #endif /* SCOTCH_DEBUG_DGRAPH1 */
756
757 return (chekglbval);
758 }
759