1 /* Copyright 2007-2011,2014 IPB, Universite de Bordeaux, INRIA & CNRS
2 **
3 ** This file is part of the Scotch software package for static mapping,
4 ** graph partitioning and sparse matrix ordering.
5 **
6 ** This software is governed by the CeCILL-C license under French law
7 ** and abiding by the rules of distribution of free software. You can
8 ** use, modify and/or redistribute the software under the terms of the
9 ** CeCILL-C license as circulated by CEA, CNRS and INRIA at the following
10 ** URL: "http://www.cecill.info".
11 **
12 ** As a counterpart to the access to the source code and rights to copy,
13 ** modify and redistribute granted by the license, users are provided
14 ** only with a limited warranty and the software's author, the holder of
15 ** the economic rights, and the successive licensors have only limited
16 ** liability.
17 **
18 ** In this respect, the user's attention is drawn to the risks associated
19 ** with loading, using, modifying and/or developing or reproducing the
20 ** software by the user in light of its specific status of free software,
21 ** that may mean that it is complicated to manipulate, and that also
22 ** therefore means that it is reserved for developers and experienced
23 ** professionals having in-depth computer knowledge. Users are therefore
24 ** encouraged to load and test the software's suitability as regards
25 ** their requirements in conditions enabling the security of their
26 ** systems and/or data to be ensured and, more generally, to use and
27 ** operate it in the same conditions as regards security.
28 **
29 ** The fact that you are presently reading this means that you have had
30 ** knowledge of the CeCILL-C license and that you accept its terms.
31 */
32 /************************************************************/
33 /**                                                        **/
34 /**   NAME       : dgraph_fold.c                           **/
35 /**                                                        **/
36 /**   AUTHOR     : Francois PELLEGRINI                     **/
37 /**                                                        **/
38 /**   FUNCTION   : This module handles the distributed     **/
39 /**                source graph folding function.          **/
40 /**                                                        **/
41 /**   DATES      : # Version 5.0  : from : 10 aug 2006     **/
42 /**                                 to   : 27 jun 2008     **/
43 /**                # Version 5.1  : from : 12 nov 2008     **/
44 /**                                 to   : 04 jan 2011     **/
45 /**                # Version 6.0  : from : 28 sep 2014     **/
46 /**                                 to   : 28 sep 2014     **/
47 /**                                                        **/
48 /************************************************************/
49 
50 /*
51 **  The defines and includes.
52 */
53 
54 #define DGRAPH
55 
56 #include "module.h"
57 #include "common.h"
58 #include "dgraph.h"
59 #include "dgraph_fold.h"
60 #include "dgraph_fold_comm.h"
61 
62 /******************************/
63 /*                            */
64 /* This routine handles       */
65 /* distributed source graphs. */
66 /*                            */
67 /******************************/
68 
69 /* This routine builds a folded graph by merging graph
70 ** data to the processes of the first half or to the
71 ** second half of the communicator.
72 ** The key value of the folded communicator is not
73 ** changed as it is not relevant.
74 ** It returns:
75 ** - 0   : on success.
76 ** - !0  : on error.
77 */
78 
79 int
dgraphFold(const Dgraph * restrict const orggrafptr,const int partval,Dgraph * restrict const fldgrafptr,const void * restrict const orgdataptr,void ** restrict const flddataptr,MPI_Datatype datatype)80 dgraphFold (
81 const Dgraph * restrict const orggrafptr,
82 const int                     partval,            /* 0 for first half, 1 for second half */
83 Dgraph * restrict const       fldgrafptr,
84 const void * restrict const   orgdataptr,         /* Un-based array of data which must be folded, e.g. coarmulttab */
85 void ** restrict const        flddataptr,         /* Un-based array of data which must be folded, e.g. coarmulttab */
86 MPI_Datatype                  datatype)
87 {
88   int               fldprocnbr;
89   int               fldprocnum;                   /* Index of local process in folded communicator   */
90   int               fldproccol;                   /* Color of receiver or not wanted in communicator */
91   MPI_Comm          fldproccomm;                  /* Communicator of folded part                     */
92   int               o;
93 
94   fldprocnbr = (orggrafptr->procglbnbr + 1) / 2;
95   fldprocnum = orggrafptr->proclocnum;
96   if (partval == 1) {
97     fldprocnum = fldprocnum - fldprocnbr;
98     fldprocnbr = orggrafptr->procglbnbr - fldprocnbr;
99   }
100   fldproccol = ((fldprocnum >= 0) && (fldprocnum < fldprocnbr)) ? 0 : MPI_UNDEFINED;
101 
102   if (MPI_Comm_split (orggrafptr->proccomm, fldproccol, fldprocnum, &fldproccomm) != MPI_SUCCESS) {
103     errorPrint ("dgraphFold: communication error");
104     return     (1);
105   }
106 
107   o = dgraphFold2 (orggrafptr, partval, fldgrafptr, fldproccomm, orgdataptr, flddataptr, datatype);
108   fldgrafptr->prockeyval = fldproccol;            /* Key of folded communicator is always zero if no duplication occurs */
109 
110   return (o);
111 }
112 
113 int
dgraphFold2(const Dgraph * restrict const orggrafptr,const int partval,Dgraph * restrict const fldgrafptr,MPI_Comm fldproccomm,const void * restrict const orgdataptr,void ** restrict const flddataptr,MPI_Datatype datatype)114 dgraphFold2 (
115 const Dgraph * restrict const orggrafptr,
116 const int                     partval,            /* 0 for first half, 1 for second half */
117 Dgraph * restrict const       fldgrafptr,
118 MPI_Comm                      fldproccomm,
119 const void * restrict const   orgdataptr,         /* Un-based array of data which must be kept, e.g. coarmulttab */
120 void ** restrict const        flddataptr,         /* Un-based array of data which must be kept, e.g. coarmulttab */
121 MPI_Datatype                  datatype)
122 {
123   int                           fldcommtypval;    /* Type of communication for this process                 */
124   DgraphFoldCommData * restrict fldcommdattab;    /* Array of two communication data                        */
125   Gnum * restrict               fldcommvrttab;    /* Starting global send indices of communications         */
126   Gnum * restrict               fldvertidxtab;    /* Start indices of vertex arrays                         */
127   Gnum * restrict               fldedgeidxtab;    /* Start indices of edge arrays                           */
128   Gnum * restrict               fldedgecnttab;    /* Number of edges exchanged during each communication    */
129   Gnum * restrict               fldedgecnptab;    /* Temporary save for fldedgecnttab for MPI standard      */
130   Gnum                          fldvertlocnbr;    /* Number of vertices in local folded part                */
131   Gnum                          fldedgelocsiz;    /* (Upper bound of) number of edges in folded graph       */
132   Gnum                          fldedlolocsiz;    /* (Upper bound of) number of edge loads in folded graph  */
133   int                           fldprocglbnbr;
134   int                           fldproclocnum;    /* Index of local process in folded communicator          */
135   int                           fldvertadjnbr;
136   Gnum * restrict               fldvertadjtab;    /* Array of global start indices for adjustment slots     */
137   Gnum * restrict               fldvertdlttab;    /* Array of index adjustments for original global indices */
138   int                           cheklocval;
139   int                           chekglbval;
140   int                           commmax;
141   int                           commnbr;
142   int                           requnbr;
143   MPI_Request * restrict        requtab;
144   int                           infosiz;          /* Size of one information                                */
145 
146 #ifdef SCOTCH_DEBUG_DGRAPH2
147   if (orggrafptr->vendloctax != (orggrafptr->vertloctax + 1)) {
148     errorPrint ("dgraphFold2: graph must be compact");
149     return     (1);
150   }
151 #endif /* SCOTCH_DEBUG_DGRAPH2 */
152 
153   fldprocglbnbr = (orggrafptr->procglbnbr + 1) / 2;
154   if (partval == 1) {
155     fldproclocnum = orggrafptr->proclocnum - fldprocglbnbr;
156     fldprocglbnbr = orggrafptr->procglbnbr - fldprocglbnbr;
157   }
158   else
159     fldproclocnum = orggrafptr->proclocnum;
160 
161   fldcommtypval = ((fldproclocnum >= 0) && (fldproclocnum < fldprocglbnbr)) ? DGRAPHFOLDCOMMRECV : DGRAPHFOLDCOMMSEND;
162   if (orgdataptr != NULL)
163     MPI_Type_size (datatype, &infosiz);
164 
165   cheklocval    = 0;
166   fldcommdattab = NULL;
167   fldvertidxtab = NULL;
168   if (fldcommtypval == DGRAPHFOLDCOMMRECV) {      /* If we are going to receive */
169 #ifdef SCOTCH_DEBUG_DGRAPH2
170     if (fldgrafptr == NULL) {
171       errorPrint ("dgraphFold2: invalid parameters (1)");
172       return     (1);
173     }
174     if (fldproccomm == MPI_COMM_NULL) {
175       errorPrint ("dgraphFold2: invalid parameters (2)");
176       return     (1);
177     }
178 #endif /* SCOTCH_DEBUG_DGRAPH2 */
179 
180     memSet (fldgrafptr, 0, sizeof (Dgraph));      /* Pre-initialize graph fields */
181 
182     fldgrafptr->proccomm   = fldproccomm;
183     fldgrafptr->procglbnbr = fldprocglbnbr;
184     fldgrafptr->proclocnum = fldproclocnum;
185     fldgrafptr->flagval    = DGRAPHFREEALL | DGRAPHVERTGROUP | DGRAPHEDGEGROUP; /* For premature freeing on error */
186 
187     if (memAllocGroup ((void **) (void *)         /* Allocate distributed graph private data */
188                        &fldgrafptr->procdsptab, (size_t) ((fldprocglbnbr + 1) * sizeof (Gnum)),
189                        &fldgrafptr->proccnttab, (size_t) (fldprocglbnbr       * sizeof (Gnum)),
190                        &fldgrafptr->procngbtab, (size_t) (fldprocglbnbr       * sizeof (int)),
191                        &fldgrafptr->procrcvtab, (size_t) (fldprocglbnbr       * sizeof (int)),
192                        &fldgrafptr->procsndtab, (size_t) (fldprocglbnbr       * sizeof (int)), NULL) == NULL) {
193       errorPrint ("dgraphFold2: out of memory (1)");
194       cheklocval = 1;
195     }
196     else if (dgraphFoldComm (orggrafptr, partval, &commmax, &fldcommtypval, &fldcommdattab, &fldcommvrttab, /* Process can become a sender receiver */
197                              fldgrafptr->proccnttab, &fldvertadjnbr, &fldvertadjtab, &fldvertdlttab) != 0) {
198       errorPrint ("dgraphFold2: cannot compute folding communications (1)");
199       cheklocval = 1;
200     }
201     else {
202       Gnum              fldvelolocnbr;
203 
204       if ((fldcommtypval & DGRAPHFOLDCOMMSEND) == 0) { /* If process is a normal receiver */
205         int               i;
206 
207         for (i = 0, fldvertlocnbr = 0; (i < commmax) && (fldcommdattab[i].procnum != -1); i ++)
208           fldvertlocnbr += fldcommdattab[i].vertnbr;
209         commnbr = i;
210 
211         fldedgelocsiz = orggrafptr->edgeglbsmx * i; /* Upper bound on received edges */
212         if ((orggrafptr->degrglbmax > 0) && (fldvertlocnbr < (fldedgelocsiz / orggrafptr->degrglbmax))) /* Choose best upper bound on number of edges (avoid multiply overflow) */
213           fldedgelocsiz = fldvertlocnbr * orggrafptr->degrglbmax;
214 
215         fldedgelocsiz += orggrafptr->edgelocnbr;  /* Add local edges and vertices */
216         fldvertlocnbr += orggrafptr->vertlocnbr;
217       }
218       else {                                      /* Process is a sender receiver */
219         fldvertlocnbr = fldcommvrttab[0] - orggrafptr->procvrttab[orggrafptr->proclocnum]; /* Communications will remove vertices   */
220         fldedgelocsiz = orggrafptr->vertloctax[fldvertlocnbr + orggrafptr->baseval] - orggrafptr->baseval; /* Exact number of edges */
221 
222         fldgrafptr->edgelocnbr =
223         fldgrafptr->edgelocsiz = fldedgelocsiz;
224       }
225       fldvelolocnbr = (orggrafptr->veloloctax != NULL) ? fldvertlocnbr : 0;
226 
227       if (memAllocGroup ((void **) (void *)       /* Allocate distributed graph public data */
228                          &fldgrafptr->vertloctax, (size_t) ((fldvertlocnbr + 1) * sizeof (Gnum)),
229                          &fldgrafptr->vnumloctax, (size_t) ( fldvertlocnbr      * sizeof (Gnum)),
230                          &fldgrafptr->veloloctax, (size_t) ( fldvelolocnbr      * sizeof (Gnum)), NULL) == NULL) {
231         errorPrint ("dgraphFold2: out of memory (2)");
232         cheklocval = 1;
233       }
234       else if (fldgrafptr->vertloctax -= orggrafptr->baseval,
235                fldgrafptr->vnumloctax -= orggrafptr->baseval,
236                fldgrafptr->vendloctax  = fldgrafptr->vertloctax + 1, /* Folded graph is compact */
237                fldgrafptr->veloloctax  = ((orggrafptr->veloloctax != NULL) ? (fldgrafptr->veloloctax - orggrafptr->baseval) : NULL),
238                fldedlolocsiz = ((orggrafptr->edloloctax != NULL) ? fldedgelocsiz : 0),
239                (fldgrafptr->edgeloctax = memAlloc ((fldedgelocsiz + fldedlolocsiz) * sizeof (Gnum))) == NULL) { /* Allocate single array for both edge arrays */
240         errorPrint ("dgraphFold2: out of memory (3)");
241         cheklocval = 1;
242       }
243       else {
244         if (orgdataptr != NULL) {
245           if ((*flddataptr = (byte *) memAlloc (fldvertlocnbr * infosiz)) == NULL) {
246             errorPrint ("dgraphFold2: out of memory (4)");
247             cheklocval = 1;
248           }
249         }
250         fldgrafptr->edgeloctax -= orggrafptr->baseval; /* Do not care about the validity of edloloctax at this stage */
251       }
252     }
253   }
254   else {                                          /* Process is a sender */
255 #ifdef SCOTCH_DEBUG_HDGRAPH2
256     if (fldproccomm != MPI_COMM_NULL) {
257       errorPrint ("dgraphFold2: invalid parameters (3)");
258       return     (1);
259     }
260 #endif /* SCOTCH_DEBUG_HDGRAPH2 */
261 
262     if (dgraphFoldComm (orggrafptr, partval, &commmax, &fldcommtypval, &fldcommdattab, &fldcommvrttab, NULL, NULL, NULL, NULL) != 0) {
263       errorPrint ("dgraphFold2: cannot compute folding communications (2)");
264       cheklocval = 1;
265     }
266   }
267 
268   if ((cheklocval == 0) &&
269       (memAllocGroup ((void **) (void *)          /* Allocate folding data */
270                       &fldvertidxtab, (size_t) (commmax * sizeof (Gnum)),
271                       &fldedgeidxtab, (size_t) (commmax * sizeof (Gnum)),
272                       &fldedgecnttab, (size_t) (commmax * sizeof (Gnum)),
273                       &fldedgecnptab, (size_t) (commmax * sizeof (Gnum)),
274                       &requtab,       (size_t) (commmax * DGRAPHFOLDTAGNBR * sizeof (MPI_Request)), NULL) == NULL)) {
275     errorPrint ("dgraphFold2: out of memory (5)");
276     cheklocval = 1;
277   }
278 
279 #ifdef SCOTCH_DEBUG_DGRAPH1                       /* Communication cannot be merged with a useful one */
280   if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, orggrafptr->proccomm) != MPI_SUCCESS) {
281     errorPrint ("dgraphFold2: communication error (1)");
282     chekglbval = 1;
283   }
284 #else /* SCOTCH_DEBUG_DGRAPH1 */
285   chekglbval = cheklocval;
286 #endif /* SCOTCH_DEBUG_DGRAPH1 */
287   if (chekglbval != 0) {
288     if ((fldcommtypval & DGRAPHFOLDCOMMRECV) != 0) {
289       if (fldvertidxtab != NULL)
290         memFree (fldvertidxtab);                  /* Free group leader */
291       if (fldcommdattab != NULL)
292         memFree (fldcommdattab);
293       dgraphExit (fldgrafptr);
294     }
295     return (1);
296   }
297 
298   requnbr = 0;                                    /* Communications without further processing are placed at beginning of array */
299 
300   if ((fldcommtypval & DGRAPHFOLDCOMMSEND) != 0) { /* If process is (also) a sender */
301     Gnum              vertsndbas;
302     Gnum              vertsndnbr;
303     int               i;
304 
305     vertsndnbr = ((fldcommtypval & DGRAPHFOLDCOMMRECV) != 0) ? (fldcommvrttab[0] - orggrafptr->procvrttab[orggrafptr->proclocnum]) : 0; /* If process is also a receiver, start sending after kept vertices */
306 
307     for (i = 0, vertsndbas = orggrafptr->baseval; /* For all send communications to perform */
308          (i < commmax) && (fldcommdattab[i].procnum != -1) && (cheklocval == 0); i ++) {
309       vertsndbas += vertsndnbr;
310       vertsndnbr  = fldcommdattab[i].vertnbr;
311 
312       fldvertidxtab[i] = vertsndbas;
313       fldedgeidxtab[i] = orggrafptr->vertloctax[vertsndbas];
314       fldedgecnptab[i] =                          /* Save fldedgecnttab in temporary array to read it while MPI communication in progress */
315       fldedgecnttab[i] = orggrafptr->vertloctax[vertsndbas + vertsndnbr] - orggrafptr->vertloctax[vertsndbas]; /* Graph is compact        */
316       if (MPI_Isend (&fldedgecnptab[i], 1, GNUM_MPI, fldcommdattab[i].procnum,
317                      TAGFOLD + TAGVLBLLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
318         errorPrint ("dgraphFold2: communication error (2)");
319         cheklocval = 1;
320       }
321     }
322     commnbr = i;
323 
324     for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
325       if (MPI_Isend (orggrafptr->vertloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
326                      TAGFOLD + TAGVERTLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
327         errorPrint ("dgraphFold2: communication error (3)");
328         cheklocval = 1;
329       }
330     }
331     for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
332       if (MPI_Isend (orggrafptr->edgeloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, fldcommdattab[i].procnum,
333                      TAGFOLD + TAGEDGELOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
334         errorPrint ("dgraphFold2: communication error (4)");
335         cheklocval = 1;
336       }
337     }
338     if (orggrafptr->veloloctax != NULL) {
339       for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
340         if  (MPI_Isend (orggrafptr->veloloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
341                         TAGFOLD + TAGVELOLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS) {
342           errorPrint ("dgraphFold2: communication error (5)");
343           cheklocval = 1;
344         }
345       }
346     }
347     for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
348       int               procsndnum;               /* Rank of process to send to */
349 
350       procsndnum = fldcommdattab[i].procnum;
351       if ((orggrafptr->edloloctax != NULL) &&
352           (MPI_Isend (orggrafptr->edloloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, procsndnum,
353                       TAGFOLD + TAGEDLOLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
354         errorPrint ("dgraphFold2: communication error (6)");
355         cheklocval = 1;
356       }
357       else if ((orggrafptr->vnumloctax != NULL) &&
358                (MPI_Isend (orggrafptr->vnumloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, procsndnum,
359                            TAGFOLD + TAGVNUMLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
360         errorPrint ("dgraphFold2: communication error (7)");
361         cheklocval = 1;
362       }
363       else if ((orgdataptr != NULL)  &&
364                (MPI_Isend ((byte *) orgdataptr + ((fldvertidxtab[i] - orggrafptr->baseval) * infosiz), fldcommdattab[i].vertnbr, datatype, procsndnum,
365                            TAGFOLD + TAGDATALOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
366         errorPrint ("dgraphFold2: communication error (8)");
367         cheklocval = 1;
368       }
369     }
370   }                                               /* Communications of sender-receivers will be completed in the receiving phase */
371 
372   if ((fldcommtypval & DGRAPHFOLDCOMMRECV) != 0) { /* If process is (also) a receiver */
373     Gnum                orgvertlocnbr;
374     Gnum                orgvertlocnnd;
375     Gnum                orgvertlocmin;
376     Gnum                orgvertlocmax;
377     Gnum                fldvertlocadj;
378     Gnum                fldvelolocsum;
379     Gnum                fldedgelocnum;
380     Gnum                fldedgelocnnd;
381     int                 fldprocnum;
382     int                 procngbmin;
383     int                 procngbmax;
384     int                 i;
385 
386     const Gnum * restrict const orgedgeloctax = orggrafptr->edgeloctax;
387     Gnum * restrict const       fldedgeloctax = fldgrafptr->edgeloctax;
388 
389     fldgrafptr->procvrttab = fldgrafptr->procdsptab; /* Graph does not have holes                               */
390     fldgrafptr->procdsptab[0] = orggrafptr->baseval; /* Build private data of folded graph and array            */
391     for (fldprocnum = 0; fldprocnum < fldprocglbnbr; fldprocnum ++) /* New subdomain indices start from baseval */
392       fldgrafptr->procdsptab[fldprocnum + 1] = fldgrafptr->procdsptab[fldprocnum] + fldgrafptr->proccnttab[fldprocnum];
393 
394     if ((fldcommtypval & DGRAPHFOLDCOMMSEND) == 0) { /* If process is a normal receiver */
395       Gnum                fldedgelocbas;
396       Gnum                fldvertrcvbas;
397       Gnum                fldvertrcvnbr;
398 
399       for (i = 0, fldvertrcvbas = orggrafptr->vertlocnnd, fldvertrcvnbr = 0; /* For all receive communications to perform */
400            (i < commnbr) && (cheklocval == 0); i ++) {
401         fldvertrcvbas += fldvertrcvnbr;
402         fldvertrcvnbr  = fldcommdattab[i].vertnbr;
403 
404         fldvertidxtab[i] = fldvertrcvbas;
405         if (MPI_Irecv (&fldedgecnttab[i], 1, GNUM_MPI, fldcommdattab[i].procnum,
406                        TAGFOLD + TAGVLBLLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGENBR * commmax + i]) != MPI_SUCCESS) {
407           errorPrint ("dgraphFold2: communication error (9)");
408           cheklocval = 1;
409         }
410       }
411 
412       for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) { /* Let these communications progress while we process the edge size messages */
413         if (MPI_Irecv (fldgrafptr->vertloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
414                        TAGFOLD + TAGVERTLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGVERT * commmax + i]) != MPI_SUCCESS) {
415           errorPrint ("dgraphFold2: communication error (10)");
416           cheklocval = 1;
417         }
418       }
419 
420       MPI_Waitall (commnbr, &requtab[DGRAPHFOLDTAGENBR * commmax], MPI_STATUSES_IGNORE);
421 
422       for (i = 0, fldedgelocbas = orggrafptr->vertloctax[orggrafptr->vertlocnnd]; (i < commnbr) && (cheklocval == 0); i ++) {
423         fldedgeidxtab[i] = fldedgelocbas;
424         fldedgelocbas += fldedgecnttab[i];
425 
426         if (MPI_Irecv (fldgrafptr->edgeloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, fldcommdattab[i].procnum,
427                        TAGFOLD + TAGEDGELOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGEDGE * commmax + i]) != MPI_SUCCESS) {
428           errorPrint ("dgraphFold2: communication error (11)");
429           cheklocval = 1;
430         }
431       }
432       fldgrafptr->edgelocnbr =                    /* Get number of local edges */
433       fldgrafptr->edgelocsiz = fldedgelocbas - orggrafptr->baseval;
434 
435       if (orggrafptr->veloloctax != NULL) {
436         for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
437           if (MPI_Irecv (fldgrafptr->veloloctax + fldvertidxtab[i], fldcommdattab[i].vertnbr, GNUM_MPI, fldcommdattab[i].procnum,
438                          TAGFOLD + TAGVELOLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGVELO * commmax + i]) != MPI_SUCCESS) {
439             errorPrint ("dgraphFold2: communication error (12)");
440             cheklocval = 1;
441           }
442         }
443       }
444       if (orggrafptr->edloloctax != NULL) {
445         fldgrafptr->edloloctax = fldgrafptr->edgeloctax + fldgrafptr->edgelocnbr; /* Set start index of edge load array */
446 
447         for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
448           if (MPI_Irecv (fldgrafptr->edloloctax + fldedgeidxtab[i], fldedgecnttab[i], GNUM_MPI, fldcommdattab[i].procnum,
449                          TAGFOLD + TAGEDLOLOCTAB, orggrafptr->proccomm, &requtab[DGRAPHFOLDTAGEDLO * commmax + i]) != MPI_SUCCESS) {
450             errorPrint ("dgraphFold2: communication error (13)");
451             cheklocval = 1;
452           }
453         }
454       }
455       for (i = 0; (i < commnbr) && (cheklocval == 0); i ++) {
456         int               procrcvnum;             /* Rank of process to receive from */
457         Gnum              vertrcvnbr;
458 
459         procrcvnum = fldcommdattab[i].procnum;
460         vertrcvnbr = fldcommdattab[i].vertnbr;
461         if ((orggrafptr->vnumloctax != NULL) &&
462             (MPI_Irecv (fldgrafptr->vnumloctax + fldvertidxtab[i], vertrcvnbr, GNUM_MPI, procrcvnum,
463                         TAGFOLD + TAGVNUMLOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
464           errorPrint ("dgraphFold2: communication error (14)");
465           cheklocval = 1;
466         }
467         else if ((orgdataptr != NULL) &&
468                  (MPI_Irecv ((byte *) (*flddataptr) + ((fldvertidxtab[i] - orggrafptr->baseval) * infosiz), vertrcvnbr, datatype, procrcvnum,
469                              TAGFOLD + TAGDATALOCTAB, orggrafptr->proccomm, &requtab[requnbr ++]) != MPI_SUCCESS)) {
470           errorPrint ("dgraphFold2: communication error (15)");
471           cheklocval = 1;
472         }
473       }
474 
475       orgvertlocnbr = orggrafptr->vertlocnbr;     /* Process all local vertices */
476       orgvertlocnnd = orggrafptr->vertlocnnd;
477 
478       if (orggrafptr->vnumloctax == NULL) {       /* If original graph does not have vertex numbers, create remote parts of vertex number array */
479         Gnum              fldvertlocnum;
480         Gnum              fldvertlocadj;
481         int               i;
482 
483         Gnum * restrict const fldvnumloctax = fldgrafptr->vnumloctax;
484 
485         for (i = 0, fldvertlocnum = orgvertlocnnd; i < commnbr; i ++) {
486           Gnum              fldvertlocnnd;
487 
488           for (fldvertlocnnd = fldvertlocnum + fldcommdattab[i].vertnbr, fldvertlocadj = fldcommvrttab[i];
489                fldvertlocnum < fldvertlocnnd; fldvertlocnum ++)
490             fldvnumloctax[fldvertlocnum] = fldvertlocadj ++;
491         }
492       }
493 
494       fldedgelocnnd = orggrafptr->vertloctax[orggrafptr->vertlocnnd];
495       fldvelolocsum = orggrafptr->velolocsum;     /* In case there are vertex loads, we keep all of existing load */
496     }
497     else {                                        /* Receiver process is also a sender     */
498       orgvertlocnbr = fldvertlocnbr;              /* Process only remaining local vertices */
499       orgvertlocnnd = fldvertlocnbr + orggrafptr->baseval;
500 
501       if (orggrafptr->veloloctax != NULL) {       /* If original graph has vertex loads */
502         Gnum                fldvertlocnum;
503 
504         for (fldvertlocnum = orggrafptr->baseval, fldvelolocsum = 0; /* Accumulate load sum of remaining part */
505              fldvertlocnum < orgvertlocnnd; fldvertlocnum ++)
506           fldvelolocsum += orggrafptr->veloloctax[fldvertlocnum];
507       }
508       fldedgelocnnd = orggrafptr->vertloctax[orgvertlocnnd]; /* Reorder remaining local part of edge array */
509       if (orggrafptr->edloloctax != NULL)
510         fldgrafptr->edloloctax = fldgrafptr->edgeloctax + fldgrafptr->edgelocnbr; /* Set start index of edge load array */
511 
512       commnbr = 0;                                /* Turn sender-receiver into normal receiver without any communications to perform */
513     }
514 
515     for (procngbmin = 0, procngbmax = fldvertadjnbr; /* Initialize search accelerator */
516          procngbmax - procngbmin > 1; ) {
517       int               procngbmed;
518 
519       procngbmed = (procngbmax + procngbmin) / 2;
520       if (fldvertadjtab[procngbmed] <= orggrafptr->procvrttab[orggrafptr->proclocnum])
521         procngbmin = procngbmed;
522       else
523         procngbmax = procngbmed;
524     }
525     orgvertlocmin = fldvertadjtab[procngbmin];
526     orgvertlocmax = fldvertadjtab[procngbmax];
527     fldvertlocadj = fldvertdlttab[procngbmin];
528     for (fldedgelocnum = orggrafptr->baseval; fldedgelocnum < fldedgelocnnd; fldedgelocnum ++) {
529       Gnum              orgvertlocend;
530 
531       orgvertlocend = orgedgeloctax[fldedgelocnum];
532 
533       if ((orgvertlocend >= orgvertlocmin) &&     /* If end vertex is local */
534           (orgvertlocend <  orgvertlocmax))
535         fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertlocadj;
536       else {                                      /* End vertex is not local */
537         int               procngbnum;
538         int               procngbmax;
539 
540         for (procngbnum = 0, procngbmax = fldvertadjnbr;
541              procngbmax - procngbnum > 1; ) {
542           int               procngbmed;
543 
544           procngbmed = (procngbmax + procngbnum) / 2;
545           if (fldvertadjtab[procngbmed] <= orgvertlocend)
546             procngbnum = procngbmed;
547           else
548             procngbmax = procngbmed;
549         }
550         fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertdlttab[procngbnum];
551       }
552     }
553 
554     if (orggrafptr->veloloctax != NULL)           /* If original graph has vertex loads             */
555       memCpy (fldgrafptr->veloloctax + orggrafptr->baseval, /* Copy local part of vertex load array */
556               orggrafptr->veloloctax + orggrafptr->baseval, orgvertlocnbr * sizeof (Gnum));
557 
558     if (orggrafptr->edloloctax != NULL)           /* If original graph has edge loads             */
559       memCpy (fldgrafptr->edloloctax + orggrafptr->baseval, /* Copy local part of edge load array */
560               orggrafptr->edloloctax + orggrafptr->baseval,
561               (orggrafptr->vertloctax[orgvertlocnnd] - orggrafptr->baseval) * sizeof (Gnum));
562 
563     if (orggrafptr->vnumloctax != NULL)           /* If original graph has vertex numbers             */
564       memCpy (fldgrafptr->vnumloctax + orggrafptr->baseval, /* Copy local part of vertex number array */
565               orggrafptr->vnumloctax + orggrafptr->baseval, orgvertlocnbr * sizeof (Gnum));
566     else {                                        /* Build local part of vertex number array */
567       Gnum              fldvertlocnum;
568       Gnum              fldvertlocadj;
569 
570       for (fldvertlocnum = orggrafptr->baseval, fldvertlocadj = orggrafptr->procvrttab[orggrafptr->proclocnum];
571            fldvertlocnum < orgvertlocnnd; fldvertlocnum ++)
572         fldgrafptr->vnumloctax[fldvertlocnum] = fldvertlocadj ++;
573     }
574 
575     memCpy (fldgrafptr->vertloctax + orggrafptr->baseval, /* Copy local part of vertex array, since it is compact     */
576             orggrafptr->vertloctax + orggrafptr->baseval, orgvertlocnbr * sizeof (Gnum)); /* Last value is not copied */
577     fldgrafptr->vertloctax[fldvertlocnbr + orggrafptr->baseval] = fldgrafptr->edgelocnbr + orggrafptr->baseval;
578 
579     if (orgdataptr != NULL)                       /* If additional data present */
580       memCpy ((byte *) (*flddataptr), (byte *) orgdataptr, orgvertlocnbr * infosiz); /* Copy local part */
581 
582     for (i = 0; i < commnbr; i ++) {
583       int               j;
584 
585       if (MPI_Waitany (commnbr, &requtab[DGRAPHFOLDTAGVERT * commmax], &j, MPI_STATUS_IGNORE) != MPI_SUCCESS) {
586         errorPrint ("dgraphFold2: communication error (16)");
587         cheklocval = 1;
588       }
589       else {                                      /* Adjust first remote part of vertex array */
590         Gnum              fldvertlocnum;
591         Gnum              fldvertlocnnd;
592         Gnum              fldvertlocadj;
593 
594         Gnum * restrict const fldvertloctax = fldgrafptr->vertloctax;
595 
596         fldvertlocnum = fldvertidxtab[j];
597         fldvertlocadj = fldedgeidxtab[j] - fldgrafptr->vertloctax[fldvertlocnum];
598 
599         for (fldvertlocnnd = fldvertlocnum + fldcommdattab[j].vertnbr; fldvertlocnum < fldvertlocnnd; fldvertlocnum ++)
600           fldvertloctax[fldvertlocnum] += fldvertlocadj;
601       }
602     }
603 
604     for (i = 0; i < commnbr; i ++) {
605       MPI_Status        statdat;
606       int               j;
607 
608       if (MPI_Waitany (commnbr, &requtab[DGRAPHFOLDTAGEDGE * commmax], &j, &statdat) != MPI_SUCCESS) {
609         errorPrint ("dgraphFold2: communication error (17)");
610         cheklocval = 1;
611       }
612       else if (cheklocval == 0) {                 /* Adjust remote part(s) of edge array */
613         Gnum              orgvertlocmin;
614         Gnum              orgvertlocmax;
615         Gnum              fldvertlocadj;
616         int               procngbnum;
617         int               procngbmax;
618 
619         Gnum * restrict const fldedgeloctax = fldgrafptr->edgeloctax;
620 
621 #ifdef SCOTCH_DEBUG_DGRAPH2
622         int               fldedgercvnbr;
623 
624         MPI_Get_count (&statdat, GNUM_MPI, &fldedgercvnbr);
625         if (fldedgercvnbr != fldedgecnttab[j]) {
626           errorPrint  ("dgraphFold2: internal error (1)");
627           return      (1);
628         }
629 #endif /* SCOTCH_DEBUG_DGRAPH2 */
630 
631         for (procngbnum = 0, procngbmax = fldvertadjnbr; /* Initialize search accelerator */
632              procngbmax - procngbnum > 1; ) {
633           int               procngbmed;
634 
635           procngbmed = (procngbmax + procngbnum) / 2;
636           if (fldvertadjtab[procngbmed] <= fldcommvrttab[j])
637             procngbnum = procngbmed;
638           else
639             procngbmax = procngbmed;
640         }
641         orgvertlocmin = fldvertadjtab[procngbnum];
642         orgvertlocmax = fldvertadjtab[procngbmax];
643         fldvertlocadj = fldvertdlttab[procngbnum];
644         for (fldedgelocnum = fldedgeidxtab[j], fldedgelocnnd = fldedgelocnum + fldedgecnttab[j];
645              fldedgelocnum < fldedgelocnnd; fldedgelocnum ++) { /* Reorder end vertices */
646           Gnum              orgvertlocend;
647 
648 #ifdef SCOTCH_DEBUG_DGRAPH2
649           if (fldedgelocnum >= (fldgrafptr->edgelocnbr + orggrafptr->baseval)) {
650             errorPrint  ("dgraphFold2: internal error (2)");
651             return      (1);
652           }
653 #endif /* SCOTCH_DEBUG_DGRAPH2 */
654 
655           orgvertlocend = fldedgeloctax[fldedgelocnum];
656 
657           if ((orgvertlocend >= orgvertlocmin) && /* If end vertex is local */
658               (orgvertlocend <  orgvertlocmax))
659             fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertlocadj;
660           else {
661             int               procngbnum;
662             int               procngbmax;
663 
664             for (procngbnum = 0, procngbmax = fldvertadjnbr;
665                  procngbmax - procngbnum > 1; ) {
666               int               procngbmed;
667 
668               procngbmed = (procngbmax + procngbnum) / 2;
669               if (fldvertadjtab[procngbmed] <= orgvertlocend)
670                 procngbnum = procngbmed;
671               else
672                 procngbmax = procngbmed;
673             }
674             fldedgeloctax[fldedgelocnum] = orgvertlocend + fldvertdlttab[procngbnum];
675           }
676         }
677       }
678     }
679 
680     if (orggrafptr->veloloctax == NULL)           /* If no vertex loads, reset graph vertex load to number of vertices */
681       fldvelolocsum = fldvertlocnbr;
682     else {                                        /* Graph has vertex loads and load of local part has already been computed */
683       for (i = 0; i < commnbr; i ++) {
684         int               j;
685 
686         if (MPI_Waitany (commnbr, &requtab[DGRAPHFOLDTAGVELO * commmax], &j, MPI_STATUS_IGNORE) != MPI_SUCCESS) {
687           errorPrint ("dgraphFold2: communication error (18)");
688           cheklocval = 1;
689         }
690         else if (cheklocval == 0) {               /* Accumulate vertex loads for received vertex load array */
691           Gnum              fldvertlocnum;
692           Gnum              fldvertlocnnd;
693 
694           for (fldvertlocnum = fldvertidxtab[j], fldvertlocnnd = fldvertlocnum + fldcommdattab[j].vertnbr;
695                fldvertlocnum < fldvertlocnnd; fldvertlocnum ++)
696             fldvelolocsum += fldgrafptr->veloloctax[fldvertlocnum];
697         }
698       }
699     }
700 
701     if ((fldcommtypval & DGRAPHFOLDCOMMSEND) == 0) { /* If process is a normal receiver, edge arrays may have been oversized */
702       Gnum                fldedgeloctmp;
703 
704       fldedgeloctmp = fldgrafptr->edgelocnbr;
705       if (orggrafptr->edloloctax != NULL) {
706         fldedgeloctmp *= 2;
707         if (MPI_Waitall (commnbr, &requtab[DGRAPHFOLDTAGEDLO * commmax], MPI_STATUSES_IGNORE) != MPI_SUCCESS) { /* Wait for edge load sub-arrays */
708           errorPrint ("dgraphFold2: communication error (19)");
709           cheklocval = 1;
710         }
711       }
712 
713       fldgrafptr->edgeloctax  = memRealloc (fldgrafptr->edgeloctax + orggrafptr->baseval, fldedgeloctmp * sizeof (Gnum));
714       fldgrafptr->edgeloctax -= orggrafptr->baseval;
715       if (orggrafptr->edloloctax != NULL)
716         fldgrafptr->edloloctax = fldgrafptr->edgeloctax + fldgrafptr->edgelocnbr;
717     }
718 
719     fldgrafptr->baseval    = orggrafptr->baseval;
720     fldgrafptr->vertlocnbr = fldvertlocnbr;
721     fldgrafptr->vertlocnnd = fldvertlocnbr + orggrafptr->baseval;
722     fldgrafptr->velolocsum = fldvelolocsum;
723     fldgrafptr->degrglbmax = orggrafptr->degrglbmax;
724     if (dgraphBuild4 (fldgrafptr) != 0) {
725       errorPrint ("dgraphFold2: cannot build folded graph");
726       dgraphExit (fldgrafptr);
727       return     (1);
728     }
729 
730 #ifdef SCOTCH_DEBUG_DGRAPH2
731     if (dgraphCheck (fldgrafptr) != 0) {          /* Check graph consistency; vnumloctab is not checked so no need to wait for it */
732       errorPrint ("dgraphFold2: internal error (3)");
733       dgraphExit (fldgrafptr);
734       return     (1);
735     }
736 #endif /* SCOTCH_DEBUG_DGRAPH2 */
737   }
738 
739   memFree (fldcommdattab);                        /* Free group leader */
740 
741   if (MPI_Waitall (requnbr, requtab, MPI_STATUSES_IGNORE) != MPI_SUCCESS) { /* Wait for all graph data to arrive because graph could be freed afterwards */
742     errorPrint ("dgraphFold2: communication error (20)");
743     cheklocval = 1;
744   }
745 
746   memFree (fldvertidxtab);                        /* Free group leader including request array */
747 
748 #ifdef SCOTCH_DEBUG_DGRAPH1                       /* Communication cannot be merged with a useful one */
749   if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, orggrafptr->proccomm) != MPI_SUCCESS) {
750     errorPrint ("dgraphFold2: communication error (21)");
751     chekglbval = 1;
752   }
753 #else /* SCOTCH_DEBUG_DGRAPH1 */
754   chekglbval = cheklocval;
755 #endif /* SCOTCH_DEBUG_DGRAPH1 */
756 
757   return (chekglbval);
758 }
759