1 /*
2 * Copyright 1997, Regents of the University of Minnesota
3 *
4 * mesh.c
5 *
6 * This file contains routines for constructing the dual graph of a mesh.
7 * Assumes that each processor has at least one mesh element.
8 *
9 * Started 10/19/94
10 * George
11 *
12 * $Id: mesh.c 10575 2011-07-14 14:46:42Z karypis $
13 *
14 */
15
16 #include <parmetislib.h>
17
18
19 /*************************************************************************
20 * This function converts a mesh into a dual graph
21 **************************************************************************/
ParMETIS_V3_Mesh2Dual(idx_t * elmdist,idx_t * eptr,idx_t * eind,idx_t * numflag,idx_t * ncommon,idx_t ** r_xadj,idx_t ** r_adjncy,MPI_Comm * comm)22 int ParMETIS_V3_Mesh2Dual(idx_t *elmdist, idx_t *eptr, idx_t *eind,
23 idx_t *numflag, idx_t *ncommon, idx_t **r_xadj,
24 idx_t **r_adjncy, MPI_Comm *comm)
25 {
26 idx_t i, j, jj, k, kk, m;
27 idx_t npes, mype, pe, count, mask, pass;
28 idx_t nelms, lnns, my_nns, node;
29 idx_t firstelm, firstnode, lnode, nrecv, nsend;
30 idx_t *scounts, *rcounts, *sdispl, *rdispl;
31 idx_t *nodedist, *nmap, *auxarray;
32 idx_t *gnptr, *gnind, *nptr, *nind, *myxadj=NULL, *myadjncy = NULL;
33 idx_t *sbuffer, *rbuffer, *htable;
34 ikv_t *nodelist, *recvbuffer;
35 idx_t maxcount, *ind, *wgt;
36 idx_t gmaxnode, gminnode;
37 size_t curmem;
38
39 gk_malloc_init();
40 curmem = gk_GetCurMemoryUsed();
41
42 /* Get basic comm info */
43 gkMPI_Comm_size(*comm, &npes);
44 gkMPI_Comm_rank(*comm, &mype);
45
46
47 nelms = elmdist[mype+1]-elmdist[mype];
48
49 if (*numflag > 0)
50 ChangeNumberingMesh(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);
51
52 mask = (1<<11)-1;
53
54 /*****************************/
55 /* Determine number of nodes */
56 /*****************************/
57 gminnode = GlobalSEMinComm(*comm, imin(eptr[nelms], eind));
58 for (i=0; i<eptr[nelms]; i++)
59 eind[i] -= gminnode;
60
61 gmaxnode = GlobalSEMaxComm(*comm, imax(eptr[nelms], eind));
62
63
64 /**************************/
65 /* Check for input errors */
66 /**************************/
67 ASSERT(nelms > 0);
68
69 /* construct node distribution array */
70 nodedist = ismalloc(npes+1, 0, "nodedist");
71 for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
72 k = j/(npes-i);
73 nodedist[i+1] = nodedist[i]+k;
74 j -= k;
75 }
76 my_nns = nodedist[mype+1]-nodedist[mype];
77 firstnode = nodedist[mype];
78
79 nodelist = ikvmalloc(eptr[nelms], "nodelist");
80 auxarray = imalloc(eptr[nelms], "auxarray");
81 htable = ismalloc(gk_max(my_nns, mask+1), -1, "htable");
82 scounts = imalloc(npes, "scounts");
83 rcounts = imalloc(npes, "rcounts");
84 sdispl = imalloc(npes+1, "sdispl");
85 rdispl = imalloc(npes+1, "rdispl");
86
87
88 /*********************************************/
89 /* first find a local numbering of the nodes */
90 /*********************************************/
91 for (i=0; i<nelms; i++) {
92 for (j=eptr[i]; j<eptr[i+1]; j++) {
93 nodelist[j].key = eind[j];
94 nodelist[j].val = j;
95 auxarray[j] = i; /* remember the local element ID that uses this node */
96 }
97 }
98 ikvsorti(eptr[nelms], nodelist);
99
100 for (count=1, i=1; i<eptr[nelms]; i++) {
101 if (nodelist[i].key > nodelist[i-1].key)
102 count++;
103 }
104
105 lnns = count;
106 nmap = imalloc(lnns, "nmap");
107
108 /* renumber the nodes of the elements array */
109 count = 1;
110 nmap[0] = nodelist[0].key;
111 eind[nodelist[0].val] = 0;
112 nodelist[0].val = auxarray[nodelist[0].val]; /* Store the local element ID */
113 for (i=1; i<eptr[nelms]; i++) {
114 if (nodelist[i].key > nodelist[i-1].key) {
115 nmap[count] = nodelist[i].key;
116 count++;
117 }
118 eind[nodelist[i].val] = count-1;
119 nodelist[i].val = auxarray[nodelist[i].val]; /* Store the local element ID */
120 }
121 gkMPI_Barrier(*comm);
122
123 /**********************************************************/
124 /* perform comms necessary to construct node-element list */
125 /**********************************************************/
126 iset(npes, 0, scounts);
127 for (pe=i=0; i<eptr[nelms]; i++) {
128 while (nodelist[i].key >= nodedist[pe+1])
129 pe++;
130 scounts[pe] += 2;
131 }
132 ASSERT(pe < npes);
133
134 gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);
135
136 icopy(npes, scounts, sdispl);
137 MAKECSR(i, npes, sdispl);
138
139 icopy(npes, rcounts, rdispl);
140 MAKECSR(i, npes, rdispl);
141
142 ASSERT(sdispl[npes] == eptr[nelms]*2);
143
144 nrecv = rdispl[npes]/2;
145 recvbuffer = ikvmalloc(gk_max(1, nrecv), "recvbuffer");
146
147 gkMPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_T, (void *)recvbuffer,
148 rcounts, rdispl, IDX_T, *comm);
149
150 /**************************************/
151 /* construct global node-element list */
152 /**************************************/
153 gnptr = ismalloc(my_nns+1, 0, "gnptr");
154
155 for (i=0; i<npes; i++) {
156 for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
157 lnode = recvbuffer[j].key-firstnode;
158 ASSERT(lnode >= 0 && lnode < my_nns)
159
160 gnptr[lnode]++;
161 }
162 }
163 MAKECSR(i, my_nns, gnptr);
164
165 gnind = imalloc(gk_max(1, gnptr[my_nns]), "gnind");
166 for (pe=0; pe<npes; pe++) {
167 firstelm = elmdist[pe];
168 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
169 lnode = recvbuffer[j].key-firstnode;
170 gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
171 }
172 }
173 SHIFTCSR(i, my_nns, gnptr);
174
175
176 /*********************************************************/
177 /* send the node-element info to the relevant processors */
178 /*********************************************************/
179 iset(npes, 0, scounts);
180
181 /* use a hash table to ensure that each node is sent to a proc only once */
182 for (pe=0; pe<npes; pe++) {
183 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
184 lnode = recvbuffer[j].key-firstnode;
185 if (htable[lnode] == -1) {
186 scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
187 htable[lnode] = 1;
188 }
189 }
190
191 /* now reset the hash table */
192 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
193 lnode = recvbuffer[j].key-firstnode;
194 htable[lnode] = -1;
195 }
196 }
197
198
199 gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);
200
201 icopy(npes, scounts, sdispl);
202 MAKECSR(i, npes, sdispl);
203
204 /* create the send buffer */
205 nsend = sdispl[npes];
206 sbuffer = imalloc(gk_max(1, nsend), "sbuffer");
207
208 count = 0;
209 for (pe=0; pe<npes; pe++) {
210 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
211 lnode = recvbuffer[j].key-firstnode;
212 if (htable[lnode] == -1) {
213 for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
214 if (k == gnptr[lnode])
215 sbuffer[count++] = -1*(gnind[k]+1);
216 else
217 sbuffer[count++] = gnind[k];
218 }
219 htable[lnode] = 1;
220 }
221 }
222 ASSERT(count == sdispl[pe+1]);
223
224 /* now reset the hash table */
225 for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
226 lnode = recvbuffer[j].key-firstnode;
227 htable[lnode] = -1;
228 }
229 }
230
231 icopy(npes, rcounts, rdispl);
232 MAKECSR(i, npes, rdispl);
233
234 nrecv = rdispl[npes];
235 rbuffer = imalloc(gk_max(1, nrecv), "rbuffer");
236
237 gkMPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_T, (void *)rbuffer,
238 rcounts, rdispl, IDX_T, *comm);
239
240 k = -1;
241 nptr = ismalloc(lnns+1, 0, "nptr");
242 nind = rbuffer;
243 for (pe=0; pe<npes; pe++) {
244 for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
245 if (nind[j] < 0) {
246 k++;
247 nind[j] = (-1*nind[j])-1;
248 }
249 nptr[k]++;
250 }
251 }
252 MAKECSR(i, lnns, nptr);
253
254 ASSERT(k+1 == lnns);
255 ASSERT(nptr[lnns] == nrecv)
256
257 myxadj = *r_xadj = (idx_t *)malloc(sizeof(idx_t)*(nelms+1));
258 if (myxadj == NULL)
259 gk_errexit(SIGMEM, "Failed to allocate memory for the dual graph's xadj array.\n");
260 iset(nelms+1, 0, myxadj);
261
262 iset(mask+1, -1, htable);
263
264 firstelm = elmdist[mype];
265
266 /* Two passes -- in first pass, simply find out the memory requirements */
267 maxcount = 200;
268 ind = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: ind");
269 wgt = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: wgt");
270
271 for (pass=0; pass<2; pass++) {
272 for (i=0; i<nelms; i++) {
273 for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
274 node = eind[j];
275
276 for (k=nptr[node]; k<nptr[node+1]; k++) {
277 if ((kk=nind[k]) == firstelm+i)
278 continue;
279
280 m = htable[(kk&mask)];
281
282 if (m == -1) {
283 ind[count] = kk;
284 wgt[count] = 1;
285 htable[(kk&mask)] = count++;
286 }
287 else {
288 if (ind[m] == kk) {
289 wgt[m]++;
290 }
291 else {
292 for (jj=0; jj<count; jj++) {
293 if (ind[jj] == kk) {
294 wgt[jj]++;
295 break;
296 }
297 }
298 if (jj == count) {
299 ind[count] = kk;
300 wgt[count++] = 1;
301 }
302 }
303 }
304
305 /* Adjust the memory.
306 This will be replaced by a idxrealloc() when GKlib will be incorporated */
307 if (count == maxcount-1) {
308 maxcount *= 2;
309 ind = irealloc(ind, maxcount, "ParMETIS_V3_Mesh2Dual: ind");
310 wgt = irealloc(wgt, maxcount, "ParMETIS_V3_Mesh2Dual: wgt");
311 }
312 }
313 }
314
315 for (j=0; j<count; j++) {
316 htable[(ind[j]&mask)] = -1;
317 if (wgt[j] >= *ncommon) {
318 if (pass == 0)
319 myxadj[i]++;
320 else
321 myadjncy[myxadj[i]++] = ind[j];
322 }
323 }
324 }
325
326 if (pass == 0) {
327 MAKECSR(i, nelms, myxadj);
328 myadjncy = *r_adjncy = (idx_t *)malloc(sizeof(idx_t)*myxadj[nelms]);
329 if (myadjncy == NULL)
330 gk_errexit(SIGMEM, "Failed to allocate memory for dual graph's adjncy array.\n");
331 }
332 else {
333 SHIFTCSR(i, nelms, myxadj);
334 }
335 }
336
337 /*****************************************/
338 /* correctly renumber the elements array */
339 /*****************************************/
340 for (i=0; i<eptr[nelms]; i++)
341 eind[i] = nmap[eind[i]] + gminnode;
342
343 if (*numflag == 1)
344 ChangeNumberingMesh(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);
345
346 /* do not free nodelist, recvbuffer, rbuffer */
347 gk_free((void **)&nodedist, &nodelist, &auxarray, &htable, &scounts, &rcounts,
348 &sdispl, &rdispl, &nmap, &recvbuffer, &gnptr, &gnind, &sbuffer, &rbuffer,
349 &nptr, &ind, &wgt, LTERM);
350
351 if (gk_GetCurMemoryUsed() - curmem > 0) {
352 printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
353 (ssize_t)(gk_GetCurMemoryUsed() - curmem));
354 }
355 gk_malloc_cleanup(0);
356
357 return METIS_OK;
358 }
359
360