1 /*
2  * Copyright 1997, Regents of the University of Minnesota
3  *
4  * wave.c
5  *
6  * This file contains code for directed diffusion at the coarsest graph
7  *
8  * Started 5/19/97, Kirk, George
9  *
10  * $Id: wave.c 13946 2013-03-30 15:51:45Z karypis $
11  *
12  */
13 
14 #include <parmetislib.h>
15 
16 /*************************************************************************
17 * This function performs a k-way directed diffusion
18 **************************************************************************/
WavefrontDiffusion(ctrl_t * ctrl,graph_t * graph,idx_t * home)19 real_t WavefrontDiffusion(ctrl_t *ctrl, graph_t *graph, idx_t *home)
20 {
21   idx_t ii, i, j, k, l, nvtxs, nedges, nparts;
22   idx_t from, to, edge, done, nswaps, noswaps, totalv, wsize;
23   idx_t npasses, first, second, third, mind, maxd;
24   idx_t *xadj, *adjncy, *adjwgt, *where, *perm;
25   idx_t *rowptr, *colind, *ed, *psize;
26   real_t *transfer, *tmpvec;
27   real_t balance = -1.0, *load, *solution, *workspace;
28   real_t *nvwgt, *npwgts, flowFactor, cost, ubfactor;
29   matrix_t matrix;
30   ikv_t *cand;
31   idx_t ndirty, nclean, dptr, clean;
32 
33   nvtxs        = graph->nvtxs;
34   nedges       = graph->nedges;
35   xadj         = graph->xadj;
36   nvwgt        = graph->nvwgt;
37   adjncy       = graph->adjncy;
38   adjwgt       = graph->adjwgt;
39   where        = graph->where;
40   nparts       = ctrl->nparts;
41   ubfactor     = ctrl->ubvec[0];
42   matrix.nrows = nparts;
43 
44   flowFactor = 0.35;
45   flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor;
46   flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor;
47   flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor;
48 
49   /* allocate memory */
50   solution                   = rmalloc(6*nparts+2*nedges, "WavefrontDiffusion: solution");
51   tmpvec                     = solution + nparts;           /* nparts */
52   npwgts                     = solution + 2*nparts;         /* nparts */
53   load                       = solution + 3*nparts;         /* nparts */
54   matrix.values              = solution + 4*nparts;         /* nparts+nedges */
55   transfer = matrix.transfer = solution + 5*nparts + nedges /* nparts+nedges */;
56 
57   perm                   = imalloc(2*nvtxs+3*nparts+nedges+1, "WavefrontDiffusion: perm");
58   ed                     = perm + nvtxs;                  /* nvtxs */
59   psize                  = perm + 2*nvtxs;                /* nparts */
60   rowptr = matrix.rowptr = perm + 2*nvtxs + nparts;       /* nparts+1 */
61   colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1; /* nparts+nedges */
62 
63   /*GKTODO - Potential problem with this malloc */
64   wsize     = gk_max(sizeof(real_t)*6*nparts, sizeof(idx_t)*(nvtxs+2*nparts+1));
65   workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace");
66   cand      = ikvmalloc(nvtxs, "WavefrontDiffusion: cand");
67 
68 
69   /* Populate empty subdomains */
70   iset(nparts, 0, psize);
71   for (i=0; i<nvtxs; i++)
72     psize[where[i]]++;
73 
74   for (l=0; l<nparts; l++) {
75     if (psize[l] == 0)
76       break;
77   }
78 
79   if (l < nparts) { /* there is at least an empty subdomain */
80     FastRandomPermute(nvtxs, perm, 1);
81     for (mind=0; mind<nparts; mind++) {
82       if (psize[mind] > 0)
83         continue;
84 
85       maxd = iargmax(nparts, psize);
86       if (psize[maxd] == 1)
87         break;  /* we cannot do anything if the heaviest subdomain contains one vertex! */
88       for (i=0; i<nvtxs; i++) {
89         k = perm[i];
90         if (where[k] == maxd) {
91           where[k] = mind;
92           psize[mind]++;
93           psize[maxd]--;
94           break;
95         }
96       }
97     }
98   }
99 
100   /* compute the external degrees of the vertices */
101   iset(nvtxs, 0, ed);
102   rset(nparts, 0.0, npwgts);
103   for (i=0; i<nvtxs; i++) {
104     npwgts[where[i]] += nvwgt[i];
105     for (j=xadj[i]; j<xadj[i+1]; j++)
106       ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0);
107   }
108 
109   ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0);
110 
111 
112   /* zero out the tmpvec array */
113   rset(nparts, 0.0, tmpvec);
114 
115   npasses = gk_min(nparts/2, NGD_PASSES);
116   for (done=0, l=0; l<npasses; l++) {
117     /* Set-up and solve the diffusion equation */
118     nswaps = 0;
119 
120     /* Solve flow equations */
121     SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);
122 
123     /* check for disconnected subdomains */
124     for(i=0; i<matrix.nrows; i++) {
125       if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) {
126         cost = (real_t)(ctrl->mype);
127         break;
128       }
129     }
130 
131     if (i == matrix.nrows) { /* if connected, proceed */
132       ConjGrad2(&matrix, load, solution, 0.001, workspace);
133       ComputeTransferVector(1, &matrix, solution, transfer, 0);
134 
135       GetThreeMax(nparts, load, &first, &second, &third);
136 
137       if (l%3 == 0) {
138         FastRandomPermute(nvtxs, perm, 1);
139       }
140       else {
141         /*****************************/
142         /* move dirty vertices first */
143         /*****************************/
144         ndirty = 0;
145         for (i=0; i<nvtxs; i++) {
146           if (where[i] != home[i])
147             ndirty++;
148         }
149 
150         dptr = 0;
151         for (i=0; i<nvtxs; i++) {
152           if (where[i] != home[i])
153             perm[dptr++] = i;
154           else
155             perm[ndirty++] = i;
156         }
157 
158         PASSERT(ctrl, ndirty == nvtxs);
159         ndirty = dptr;
160         nclean = nvtxs-dptr;
161         FastRandomPermute(ndirty, perm, 0);
162         FastRandomPermute(nclean, perm+ndirty, 0);
163       }
164 
165       if (ctrl->mype == 0) {
166         for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) {
167           i = perm[ii];
168           if (ed[i] != 0) {
169             cand[k].key = -ed[i];
170             cand[k++].val = i;
171           }
172           else {
173             cand[--j].key = 0;
174             cand[j].val = i;
175           }
176         }
177         ikvsorti(k, cand);
178       }
179 
180 
181       for (ii=0; ii<nvtxs/3; ii++) {
182         i = (ctrl->mype == 0) ? cand[ii].val : perm[ii];
183         from = where[i];
184 
185         /* don't move out the last vertex in a subdomain */
186         if (psize[from] == 1)
187           continue;
188 
189         clean = (from == home[i]) ? 1 : 0;
190 
191         /* only move from top three or dirty vertices */
192         if (from != first && from != second && from != third && clean)
193           continue;
194 
195         /* Scatter the sparse transfer row into the dense tmpvec row */
196         for (j=rowptr[from]+1; j<rowptr[from+1]; j++)
197           tmpvec[colind[j]] = transfer[j];
198 
199         for (j=xadj[i]; j<xadj[i+1]; j++) {
200           to = where[adjncy[j]];
201           if (from != to) {
202             if (tmpvec[to] > (flowFactor * nvwgt[i])) {
203               tmpvec[to] -= nvwgt[i];
204               INC_DEC(psize[to], psize[from], 1);
205               INC_DEC(npwgts[to], npwgts[from], nvwgt[i]);
206               INC_DEC(load[to], load[from], nvwgt[i]);
207               where[i] = to;
208               nswaps++;
209 
210               /* Update external degrees */
211               ed[i] = 0;
212               for (k=xadj[i]; k<xadj[i+1]; k++) {
213                 edge = adjncy[k];
214                 ed[i] += (to != where[edge] ? adjwgt[k] : 0);
215 
216                 if (where[edge] == from)
217                   ed[edge] += adjwgt[k];
218                 if (where[edge] == to)
219                   ed[edge] -= adjwgt[k];
220               }
221               break;
222             }
223           }
224         }
225 
226         /* Gather the dense tmpvec row into the sparse transfer row */
227         for (j=rowptr[from]+1; j<rowptr[from+1]; j++) {
228           transfer[j] = tmpvec[colind[j]];
229           tmpvec[colind[j]] = 0.0;
230         }
231       }
232     }
233 
234     if (l % 2 == 1) {
235       balance = rmax(nparts, npwgts)*nparts;
236       if (balance < ubfactor + 0.035)
237         done = 1;
238 
239       if (GlobalSESum(ctrl, done) > 0)
240         break;
241 
242       noswaps = (nswaps > 0 ? 0 : 1);
243       if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2)
244         break;
245     }
246   }
247 
248   graph->mincut = ComputeSerialEdgeCut(graph);
249   totalv        = Mc_ComputeSerialTotalV(graph, home);
250   cost          = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv;
251 
252 
253 CleanUpAndExit:
254   gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM);
255 
256   return cost;
257 }
258 
259