1 /*
2 * Copyright 1997, Regents of the University of Minnesota
3 *
4 * wave.c
5 *
6 * This file contains code for directed diffusion at the coarsest graph
7 *
8 * Started 5/19/97, Kirk, George
9 *
10 * $Id: wave.c 13946 2013-03-30 15:51:45Z karypis $
11 *
12 */
13
14 #include <parmetislib.h>
15
16 /*************************************************************************
17 * This function performs a k-way directed diffusion
18 **************************************************************************/
WavefrontDiffusion(ctrl_t * ctrl,graph_t * graph,idx_t * home)19 real_t WavefrontDiffusion(ctrl_t *ctrl, graph_t *graph, idx_t *home)
20 {
21 idx_t ii, i, j, k, l, nvtxs, nedges, nparts;
22 idx_t from, to, edge, done, nswaps, noswaps, totalv, wsize;
23 idx_t npasses, first, second, third, mind, maxd;
24 idx_t *xadj, *adjncy, *adjwgt, *where, *perm;
25 idx_t *rowptr, *colind, *ed, *psize;
26 real_t *transfer, *tmpvec;
27 real_t balance = -1.0, *load, *solution, *workspace;
28 real_t *nvwgt, *npwgts, flowFactor, cost, ubfactor;
29 matrix_t matrix;
30 ikv_t *cand;
31 idx_t ndirty, nclean, dptr, clean;
32
33 nvtxs = graph->nvtxs;
34 nedges = graph->nedges;
35 xadj = graph->xadj;
36 nvwgt = graph->nvwgt;
37 adjncy = graph->adjncy;
38 adjwgt = graph->adjwgt;
39 where = graph->where;
40 nparts = ctrl->nparts;
41 ubfactor = ctrl->ubvec[0];
42 matrix.nrows = nparts;
43
44 flowFactor = 0.35;
45 flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor;
46 flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor;
47 flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor;
48
49 /* allocate memory */
50 solution = rmalloc(6*nparts+2*nedges, "WavefrontDiffusion: solution");
51 tmpvec = solution + nparts; /* nparts */
52 npwgts = solution + 2*nparts; /* nparts */
53 load = solution + 3*nparts; /* nparts */
54 matrix.values = solution + 4*nparts; /* nparts+nedges */
55 transfer = matrix.transfer = solution + 5*nparts + nedges /* nparts+nedges */;
56
57 perm = imalloc(2*nvtxs+3*nparts+nedges+1, "WavefrontDiffusion: perm");
58 ed = perm + nvtxs; /* nvtxs */
59 psize = perm + 2*nvtxs; /* nparts */
60 rowptr = matrix.rowptr = perm + 2*nvtxs + nparts; /* nparts+1 */
61 colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1; /* nparts+nedges */
62
63 /*GKTODO - Potential problem with this malloc */
64 wsize = gk_max(sizeof(real_t)*6*nparts, sizeof(idx_t)*(nvtxs+2*nparts+1));
65 workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace");
66 cand = ikvmalloc(nvtxs, "WavefrontDiffusion: cand");
67
68
69 /* Populate empty subdomains */
70 iset(nparts, 0, psize);
71 for (i=0; i<nvtxs; i++)
72 psize[where[i]]++;
73
74 for (l=0; l<nparts; l++) {
75 if (psize[l] == 0)
76 break;
77 }
78
79 if (l < nparts) { /* there is at least an empty subdomain */
80 FastRandomPermute(nvtxs, perm, 1);
81 for (mind=0; mind<nparts; mind++) {
82 if (psize[mind] > 0)
83 continue;
84
85 maxd = iargmax(nparts, psize);
86 if (psize[maxd] == 1)
87 break; /* we cannot do anything if the heaviest subdomain contains one vertex! */
88 for (i=0; i<nvtxs; i++) {
89 k = perm[i];
90 if (where[k] == maxd) {
91 where[k] = mind;
92 psize[mind]++;
93 psize[maxd]--;
94 break;
95 }
96 }
97 }
98 }
99
100 /* compute the external degrees of the vertices */
101 iset(nvtxs, 0, ed);
102 rset(nparts, 0.0, npwgts);
103 for (i=0; i<nvtxs; i++) {
104 npwgts[where[i]] += nvwgt[i];
105 for (j=xadj[i]; j<xadj[i+1]; j++)
106 ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0);
107 }
108
109 ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0);
110
111
112 /* zero out the tmpvec array */
113 rset(nparts, 0.0, tmpvec);
114
115 npasses = gk_min(nparts/2, NGD_PASSES);
116 for (done=0, l=0; l<npasses; l++) {
117 /* Set-up and solve the diffusion equation */
118 nswaps = 0;
119
120 /* Solve flow equations */
121 SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);
122
123 /* check for disconnected subdomains */
124 for(i=0; i<matrix.nrows; i++) {
125 if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) {
126 cost = (real_t)(ctrl->mype);
127 break;
128 }
129 }
130
131 if (i == matrix.nrows) { /* if connected, proceed */
132 ConjGrad2(&matrix, load, solution, 0.001, workspace);
133 ComputeTransferVector(1, &matrix, solution, transfer, 0);
134
135 GetThreeMax(nparts, load, &first, &second, &third);
136
137 if (l%3 == 0) {
138 FastRandomPermute(nvtxs, perm, 1);
139 }
140 else {
141 /*****************************/
142 /* move dirty vertices first */
143 /*****************************/
144 ndirty = 0;
145 for (i=0; i<nvtxs; i++) {
146 if (where[i] != home[i])
147 ndirty++;
148 }
149
150 dptr = 0;
151 for (i=0; i<nvtxs; i++) {
152 if (where[i] != home[i])
153 perm[dptr++] = i;
154 else
155 perm[ndirty++] = i;
156 }
157
158 PASSERT(ctrl, ndirty == nvtxs);
159 ndirty = dptr;
160 nclean = nvtxs-dptr;
161 FastRandomPermute(ndirty, perm, 0);
162 FastRandomPermute(nclean, perm+ndirty, 0);
163 }
164
165 if (ctrl->mype == 0) {
166 for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) {
167 i = perm[ii];
168 if (ed[i] != 0) {
169 cand[k].key = -ed[i];
170 cand[k++].val = i;
171 }
172 else {
173 cand[--j].key = 0;
174 cand[j].val = i;
175 }
176 }
177 ikvsorti(k, cand);
178 }
179
180
181 for (ii=0; ii<nvtxs/3; ii++) {
182 i = (ctrl->mype == 0) ? cand[ii].val : perm[ii];
183 from = where[i];
184
185 /* don't move out the last vertex in a subdomain */
186 if (psize[from] == 1)
187 continue;
188
189 clean = (from == home[i]) ? 1 : 0;
190
191 /* only move from top three or dirty vertices */
192 if (from != first && from != second && from != third && clean)
193 continue;
194
195 /* Scatter the sparse transfer row into the dense tmpvec row */
196 for (j=rowptr[from]+1; j<rowptr[from+1]; j++)
197 tmpvec[colind[j]] = transfer[j];
198
199 for (j=xadj[i]; j<xadj[i+1]; j++) {
200 to = where[adjncy[j]];
201 if (from != to) {
202 if (tmpvec[to] > (flowFactor * nvwgt[i])) {
203 tmpvec[to] -= nvwgt[i];
204 INC_DEC(psize[to], psize[from], 1);
205 INC_DEC(npwgts[to], npwgts[from], nvwgt[i]);
206 INC_DEC(load[to], load[from], nvwgt[i]);
207 where[i] = to;
208 nswaps++;
209
210 /* Update external degrees */
211 ed[i] = 0;
212 for (k=xadj[i]; k<xadj[i+1]; k++) {
213 edge = adjncy[k];
214 ed[i] += (to != where[edge] ? adjwgt[k] : 0);
215
216 if (where[edge] == from)
217 ed[edge] += adjwgt[k];
218 if (where[edge] == to)
219 ed[edge] -= adjwgt[k];
220 }
221 break;
222 }
223 }
224 }
225
226 /* Gather the dense tmpvec row into the sparse transfer row */
227 for (j=rowptr[from]+1; j<rowptr[from+1]; j++) {
228 transfer[j] = tmpvec[colind[j]];
229 tmpvec[colind[j]] = 0.0;
230 }
231 }
232 }
233
234 if (l % 2 == 1) {
235 balance = rmax(nparts, npwgts)*nparts;
236 if (balance < ubfactor + 0.035)
237 done = 1;
238
239 if (GlobalSESum(ctrl, done) > 0)
240 break;
241
242 noswaps = (nswaps > 0 ? 0 : 1);
243 if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2)
244 break;
245 }
246 }
247
248 graph->mincut = ComputeSerialEdgeCut(graph);
249 totalv = Mc_ComputeSerialTotalV(graph, home);
250 cost = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv;
251
252
253 CleanUpAndExit:
254 gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM);
255
256 return cost;
257 }
258
259