1 /*
2  * Copyright 1997, Regents of the University of Minnesota
3  *
4  * mesh.c
5  *
6  * This file contains routines for constructing the dual graph of a mesh.
7  * Assumes that each processor has at least one mesh element.
8  *
9  * Started 10/19/94
10  * George
11  *
12  * $Id: mesh.c 10575 2011-07-14 14:46:42Z karypis $
13  *
14  */
15 
16 #include <parmetislib.h>
17 
18 
19 /*************************************************************************
20 * This function converts a mesh into a dual graph
21 **************************************************************************/
ParMETIS_V3_Mesh2Dual(idx_t * elmdist,idx_t * eptr,idx_t * eind,idx_t * numflag,idx_t * ncommon,idx_t ** r_xadj,idx_t ** r_adjncy,MPI_Comm * comm)22 int ParMETIS_V3_Mesh2Dual(idx_t *elmdist, idx_t *eptr, idx_t *eind,
23                  idx_t *numflag, idx_t *ncommon, idx_t **r_xadj,
24 		 idx_t **r_adjncy, MPI_Comm *comm)
25 {
26   idx_t i, j, jj, k, kk, m;
27   idx_t npes, mype, pe, count, mask, pass;
28   idx_t nelms, lnns, my_nns, node;
29   idx_t firstelm, firstnode, lnode, nrecv, nsend;
30   idx_t *scounts, *rcounts, *sdispl, *rdispl;
31   idx_t *nodedist, *nmap, *auxarray;
32   idx_t *gnptr, *gnind, *nptr, *nind, *myxadj=NULL, *myadjncy = NULL;
33   idx_t *sbuffer, *rbuffer, *htable;
34   ikv_t *nodelist, *recvbuffer;
35   idx_t maxcount, *ind, *wgt;
36   idx_t gmaxnode, gminnode;
37   size_t curmem;
38 
39   gk_malloc_init();
40   curmem = gk_GetCurMemoryUsed();
41 
42   /* Get basic comm info */
43   gkMPI_Comm_size(*comm, &npes);
44   gkMPI_Comm_rank(*comm, &mype);
45 
46 
47   nelms = elmdist[mype+1]-elmdist[mype];
48 
49   if (*numflag > 0)
50     ChangeNumberingMesh(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);
51 
52   mask = (1<<11)-1;
53 
54   /*****************************/
55   /* Determine number of nodes */
56   /*****************************/
57   gminnode = GlobalSEMinComm(*comm, imin(eptr[nelms], eind));
58   for (i=0; i<eptr[nelms]; i++)
59     eind[i] -= gminnode;
60 
61   gmaxnode = GlobalSEMaxComm(*comm, imax(eptr[nelms], eind));
62 
63 
64   /**************************/
65   /* Check for input errors */
66   /**************************/
67   ASSERT(nelms > 0);
68 
69   /* construct node distribution array */
70   nodedist = ismalloc(npes+1, 0, "nodedist");
71   for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
72     k = j/(npes-i);
73     nodedist[i+1] = nodedist[i]+k;
74     j -= k;
75   }
76   my_nns = nodedist[mype+1]-nodedist[mype];
77   firstnode = nodedist[mype];
78 
79   nodelist = ikvmalloc(eptr[nelms], "nodelist");
80   auxarray = imalloc(eptr[nelms], "auxarray");
81   htable   = ismalloc(gk_max(my_nns, mask+1), -1, "htable");
82   scounts  = imalloc(npes, "scounts");
83   rcounts  = imalloc(npes, "rcounts");
84   sdispl   = imalloc(npes+1, "sdispl");
85   rdispl   = imalloc(npes+1, "rdispl");
86 
87 
88   /*********************************************/
89   /* first find a local numbering of the nodes */
90   /*********************************************/
91   for (i=0; i<nelms; i++) {
92     for (j=eptr[i]; j<eptr[i+1]; j++) {
93       nodelist[j].key = eind[j];
94       nodelist[j].val = j;
95       auxarray[j]     = i; /* remember the local element ID that uses this node */
96     }
97   }
98   ikvsorti(eptr[nelms], nodelist);
99 
100   for (count=1, i=1; i<eptr[nelms]; i++) {
101     if (nodelist[i].key > nodelist[i-1].key)
102       count++;
103   }
104 
105   lnns = count;
106   nmap = imalloc(lnns, "nmap");
107 
108   /* renumber the nodes of the elements array */
109   count = 1;
110   nmap[0] = nodelist[0].key;
111   eind[nodelist[0].val] = 0;
112   nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
113   for (i=1; i<eptr[nelms]; i++) {
114     if (nodelist[i].key > nodelist[i-1].key) {
115       nmap[count] = nodelist[i].key;
116       count++;
117     }
118     eind[nodelist[i].val] = count-1;
119     nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
120   }
121   gkMPI_Barrier(*comm);
122 
123   /**********************************************************/
124   /* perform comms necessary to construct node-element list */
125   /**********************************************************/
126   iset(npes, 0, scounts);
127   for (pe=i=0; i<eptr[nelms]; i++) {
128     while (nodelist[i].key >= nodedist[pe+1])
129       pe++;
130     scounts[pe] += 2;
131   }
132   ASSERT(pe < npes);
133 
134   gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);
135 
136   icopy(npes, scounts, sdispl);
137   MAKECSR(i, npes, sdispl);
138 
139   icopy(npes, rcounts, rdispl);
140   MAKECSR(i, npes, rdispl);
141 
142   ASSERT(sdispl[npes] == eptr[nelms]*2);
143 
144   nrecv = rdispl[npes]/2;
145   recvbuffer = ikvmalloc(gk_max(1, nrecv), "recvbuffer");
146 
147   gkMPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_T, (void *)recvbuffer,
148       rcounts, rdispl, IDX_T, *comm);
149 
150   /**************************************/
151   /* construct global node-element list */
152   /**************************************/
153   gnptr = ismalloc(my_nns+1, 0, "gnptr");
154 
155   for (i=0; i<npes; i++) {
156     for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
157       lnode = recvbuffer[j].key-firstnode;
158       ASSERT(lnode >= 0 && lnode < my_nns)
159 
160       gnptr[lnode]++;
161     }
162   }
163   MAKECSR(i, my_nns, gnptr);
164 
165   gnind = imalloc(gk_max(1, gnptr[my_nns]), "gnind");
166   for (pe=0; pe<npes; pe++) {
167     firstelm = elmdist[pe];
168     for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
169       lnode = recvbuffer[j].key-firstnode;
170       gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
171     }
172   }
173   SHIFTCSR(i, my_nns, gnptr);
174 
175 
176   /*********************************************************/
177   /* send the node-element info to the relevant processors */
178   /*********************************************************/
179   iset(npes, 0, scounts);
180 
181   /* use a hash table to ensure that each node is sent to a proc only once */
182   for (pe=0; pe<npes; pe++) {
183     for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
184       lnode = recvbuffer[j].key-firstnode;
185       if (htable[lnode] == -1) {
186         scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
187         htable[lnode] = 1;
188       }
189     }
190 
191     /* now reset the hash table */
192     for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
193       lnode = recvbuffer[j].key-firstnode;
194       htable[lnode] = -1;
195     }
196   }
197 
198 
199   gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);
200 
201   icopy(npes, scounts, sdispl);
202   MAKECSR(i, npes, sdispl);
203 
204   /* create the send buffer */
205   nsend = sdispl[npes];
206   sbuffer = imalloc(gk_max(1, nsend), "sbuffer");
207 
208   count = 0;
209   for (pe=0; pe<npes; pe++) {
210     for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
211       lnode = recvbuffer[j].key-firstnode;
212       if (htable[lnode] == -1) {
213         for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
214           if (k == gnptr[lnode])
215             sbuffer[count++] = -1*(gnind[k]+1);
216           else
217             sbuffer[count++] = gnind[k];
218         }
219         htable[lnode] = 1;
220       }
221     }
222     ASSERT(count == sdispl[pe+1]);
223 
224     /* now reset the hash table */
225     for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
226       lnode = recvbuffer[j].key-firstnode;
227       htable[lnode] = -1;
228     }
229   }
230 
231   icopy(npes, rcounts, rdispl);
232   MAKECSR(i, npes, rdispl);
233 
234   nrecv   = rdispl[npes];
235   rbuffer = imalloc(gk_max(1, nrecv), "rbuffer");
236 
237   gkMPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_T, (void *)rbuffer,
238       rcounts, rdispl, IDX_T, *comm);
239 
240   k = -1;
241   nptr = ismalloc(lnns+1, 0, "nptr");
242   nind = rbuffer;
243   for (pe=0; pe<npes; pe++) {
244     for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
245       if (nind[j] < 0) {
246         k++;
247         nind[j] = (-1*nind[j])-1;
248       }
249       nptr[k]++;
250     }
251   }
252   MAKECSR(i, lnns, nptr);
253 
254   ASSERT(k+1 == lnns);
255   ASSERT(nptr[lnns] == nrecv)
256 
257   myxadj = *r_xadj = (idx_t *)malloc(sizeof(idx_t)*(nelms+1));
258   if (myxadj == NULL)
259     gk_errexit(SIGMEM, "Failed to allocate memory for the dual graph's xadj array.\n");
260   iset(nelms+1, 0, myxadj);
261 
262   iset(mask+1, -1, htable);
263 
264   firstelm = elmdist[mype];
265 
266   /* Two passes -- in first pass, simply find out the memory requirements */
267   maxcount = 200;
268   ind = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: ind");
269   wgt = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: wgt");
270 
271   for (pass=0; pass<2; pass++) {
272     for (i=0; i<nelms; i++) {
273       for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
274         node = eind[j];
275 
276         for (k=nptr[node]; k<nptr[node+1]; k++) {
277           if ((kk=nind[k]) == firstelm+i)
278 	    continue;
279 
280           m = htable[(kk&mask)];
281 
282           if (m == -1) {
283             ind[count] = kk;
284             wgt[count] = 1;
285             htable[(kk&mask)] = count++;
286           }
287           else {
288             if (ind[m] == kk) {
289               wgt[m]++;
290             }
291             else {
292               for (jj=0; jj<count; jj++) {
293                 if (ind[jj] == kk) {
294                   wgt[jj]++;
295                   break;
296 	        }
297               }
298               if (jj == count) {
299                 ind[count]   = kk;
300                 wgt[count++] = 1;
301               }
302 	    }
303           }
304 
305           /* Adjust the memory.
306              This will be replaced by a idxrealloc() when GKlib will be incorporated */
307           if (count == maxcount-1) {
308             maxcount *= 2;
309             ind = irealloc(ind, maxcount, "ParMETIS_V3_Mesh2Dual: ind");
310             wgt = irealloc(wgt, maxcount, "ParMETIS_V3_Mesh2Dual: wgt");
311           }
312         }
313       }
314 
315       for (j=0; j<count; j++) {
316         htable[(ind[j]&mask)] = -1;
317         if (wgt[j] >= *ncommon) {
318           if (pass == 0)
319             myxadj[i]++;
320           else
321             myadjncy[myxadj[i]++] = ind[j];
322 	}
323       }
324     }
325 
326     if (pass == 0) {
327       MAKECSR(i, nelms, myxadj);
328       myadjncy = *r_adjncy = (idx_t *)malloc(sizeof(idx_t)*myxadj[nelms]);
329       if (myadjncy == NULL)
330         gk_errexit(SIGMEM, "Failed to allocate memory for dual graph's adjncy array.\n");
331     }
332     else {
333       SHIFTCSR(i, nelms, myxadj);
334     }
335   }
336 
337   /*****************************************/
338   /* correctly renumber the elements array */
339   /*****************************************/
340   for (i=0; i<eptr[nelms]; i++)
341     eind[i] = nmap[eind[i]] + gminnode;
342 
343   if (*numflag == 1)
344     ChangeNumberingMesh(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);
345 
346   /* do not free nodelist, recvbuffer, rbuffer */
347   gk_free((void **)&nodedist, &nodelist, &auxarray, &htable, &scounts, &rcounts,
348       &sdispl, &rdispl, &nmap, &recvbuffer, &gnptr, &gnind, &sbuffer, &rbuffer,
349       &nptr, &ind, &wgt, LTERM);
350 
351   if (gk_GetCurMemoryUsed() - curmem > 0) {
352     printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
353         (ssize_t)(gk_GetCurMemoryUsed() - curmem));
354   }
355   gk_malloc_cleanup(0);
356 
357   return METIS_OK;
358 }
359 
360