1 /* ----------------------------------------------------------------------
2 SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3 http://sparta.sandia.gov
4 Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5 Sandia National Laboratories
6
7 Copyright (2014) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14
15 #include "stdlib.h"
16 #include "string.h"
17 #include "balance_grid.h"
18 #include "update.h"
19 #include "grid.h"
20 #include "surf.h"
21 #include "domain.h"
22 #include "modify.h"
23 #include "comm.h"
24 #include "rcb.h"
25 #include "output.h"
26 #include "dump.h"
27 #include "random_mars.h"
28 #include "random_knuth.h"
29 #include "memory.h"
30 #include "error.h"
31 #include "timer.h"
32
33 using namespace SPARTA_NS;
34
35 //#define RCB_DEBUG 1 // un-comment to include RCB proc boxes in image
36
37 enum{NONE,STRIDE,CLUMP,BLOCK,RANDOM,PROC,BISECTION};
38 enum{XYZ,XZY,YXZ,YZX,ZXY,ZYX};
39 enum{CELL,PARTICLE,TIME};
40
41 #define ZEROPARTICLE 0.1
42
43 /* ---------------------------------------------------------------------- */
44
BalanceGrid(SPARTA * sparta)45 BalanceGrid::BalanceGrid(SPARTA *sparta) : Pointers(sparta) {
46 last = 0.0;
47 }
48
49 /* ---------------------------------------------------------------------- */
50
command(int narg,char ** arg,int outflag)51 void BalanceGrid::command(int narg, char **arg, int outflag)
52 {
53 if (!grid->exist)
54 error->all(FLERR,"Cannot balance grid before grid is defined");
55
56 if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
57
58 int bstyle,order;
59 int px,py,pz;
60 int rcbwt;
61 int iarg;
62
63 if (strcmp(arg[0],"none") == 0) {
64 if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
65 bstyle = NONE;
66
67 } else if (strcmp(arg[0],"stride") == 0) {
68 if (narg < 2) error->all(FLERR,"Illegal balance_grid command");
69 bstyle = STRIDE;
70 if (strcmp(arg[1],"xyz") == 0) order = XYZ;
71 else if (strcmp(arg[1],"xzy") == 0) order = XZY;
72 else if (strcmp(arg[1],"yxz") == 0) order = YXZ;
73 else if (strcmp(arg[1],"yzx") == 0) order = YZX;
74 else if (strcmp(arg[1],"zxy") == 0) order = ZXY;
75 else if (strcmp(arg[1],"zyx") == 0) order = ZYX;
76 else error->all(FLERR,"Illegal balance_grid command");
77 iarg = 1;
78
79 } else if (strcmp(arg[0],"clump") == 0) {
80 if (narg < 2) error->all(FLERR,"Illegal balance_grid command");
81 bstyle = CLUMP;
82 if (strcmp(arg[1],"xyz") == 0) order = XYZ;
83 else if (strcmp(arg[1],"xzy") == 0) order = XZY;
84 else if (strcmp(arg[1],"yxz") == 0) order = YXZ;
85 else if (strcmp(arg[1],"yzx") == 0) order = YZX;
86 else if (strcmp(arg[1],"zxy") == 0) order = ZXY;
87 else if (strcmp(arg[1],"zyx") == 0) order = ZYX;
88 else error->all(FLERR,"Illegal balance_grid command");
89 iarg = 2;
90
91 } else if (strcmp(arg[0],"block") == 0) {
92 if (narg < 4) error->all(FLERR,"Illegal balance_grid command");
93 bstyle = BLOCK;
94 if (strcmp(arg[1],"*") == 0) px = 0;
95 else px = atoi(arg[1]);
96 if (strcmp(arg[2],"*") == 0) py = 0;
97 else py = atoi(arg[2]);
98 if (strcmp(arg[3],"*") == 0) pz = 0;
99 else pz = atoi(arg[3]);
100 iarg = 4;
101
102 } else if (strcmp(arg[0],"random") == 0) {
103 if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
104 bstyle = RANDOM;
105 iarg = 0;
106
107 } else if (strcmp(arg[0],"proc") == 0) {
108 if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
109 bstyle = PROC;
110 iarg = 0;
111
112 } else if (strcmp(arg[0],"rcb") == 0) {
113 if (narg < 2) error->all(FLERR,"Illegal balance_grid command");
114 bstyle = BISECTION;
115 if (strcmp(arg[1],"cell") == 0) rcbwt = CELL;
116 else if (strcmp(arg[1],"part") == 0) rcbwt = PARTICLE;
117 else if (strcmp(arg[1],"time") == 0) rcbwt = TIME;
118 else error->all(FLERR,"Illegal balance_grid command");
119 iarg = 2;
120 }
121
122 // optional args
123
124 char eligible[4];
125 strcpy(eligible,"xyz");
126 int rcbflip = 0;
127
128 while (iarg < narg) {
129 if (strcmp(arg[iarg],"axes") == 0) {
130 if (iarg+2 > narg) error->all(FLERR,"Illegal balance_grid command");
131 if (strlen(arg[iarg+1]) > 3)
132 error->all(FLERR,"Illegal balance_grid command");
133 strcpy(eligible,arg[iarg+1]);
134 int xdim = 0;
135 int ydim = 0;
136 int zdim = 0;
137 if (strchr(eligible,'x')) xdim = 1;
138 if (strchr(eligible,'y')) ydim = 1;
139 if (strchr(eligible,'z')) zdim = 1;
140 if (zdim && domain->dimension == 2)
141 error->all(FLERR,"Illegal balance_grid command");
142 if (xdim+ydim+zdim != strlen(eligible))
143 error->all(FLERR,"Illegal balance_grid command");
144 iarg += 2;
145 } else if (strcmp(arg[iarg],"flip") == 0) {
146 if (iarg+2 > narg) error->all(FLERR,"Illegal balance_grid command");
147 if (strcmp(arg[iarg+1],"yes") == 0) rcbflip = 1;
148 else if (strcmp(arg[iarg+1],"no") == 0) rcbflip = 0;
149 else error->all(FLERR,"Illegal balance_grid command");
150 iarg += 2;
151 } else error->all(FLERR,"Illegal balance_grid command");
152 }
153
154 // error check on methods only allowed for a uniform grid
155
156 if (bstyle == STRIDE || bstyle == CLUMP || bstyle == BLOCK)
157 if (!grid->uniform)
158 error->all(FLERR,"Invalid balance_grid style for non-uniform grid");
159
160 // re-assign each of my local child cells to a proc
161 // only assign unsplit and split cells
162 // do not assign sub cells since they migrate with their split cell
163 // set nmigrate = # of cells that will migrate to a new proc
164 // reset proc field in cells for migrating cells
165 // style NONE performs no re-assignment
166
167 MPI_Barrier(world);
168 double time1 = MPI_Wtime();
169
170 Grid::ChildCell *cells = grid->cells;
171 Grid::ChildInfo *cinfo = grid->cinfo;
172 int nglocal = grid->nlocal;
173
174 int nprocs = comm->nprocs;
175 int newproc;
176 int nmigrate = 0;
177
178 if (bstyle == STRIDE) {
179 cellint idm1,ix,iy,iz,nth;
180
181 cellint nx = grid->unx;
182 cellint ny = grid->uny;
183 cellint nz = grid->unz;
184
185 for (int icell = 0; icell < nglocal; icell++) {
186 if (cells[icell].nsplit <= 0) continue;
187 idm1 = cells[icell].id - 1;
188 ix = idm1 % nx;
189 iy = (idm1 / nx) % ny;
190 iz = idm1 / (nx*ny);
191
192 if (order == XYZ) nth = iz*nx*ny + iy*nx + ix;
193 else if (order == XZY) nth = iy*nx*nz + iz*nx + ix;
194 else if (order == YXZ) nth = iz*ny*nx + ix*ny + iy;
195 else if (order == YZX) nth = ix*ny*nz + iz*ny + iy;
196 else if (order == ZXY) nth = iy*nz*nx + ix*nz + iz;
197 else if (order == ZYX) nth = ix*nz*ny + iy*nz + iz;
198
199 newproc = nth % nprocs;
200
201 if (newproc != cells[icell].proc) nmigrate++;
202 cells[icell].proc = newproc;
203 }
204
205 } else if (bstyle == CLUMP) {
206 cellint idm1,ix,iy,iz,nth;
207
208 cellint nx = grid->unx;
209 cellint ny = grid->uny;
210 cellint nz = grid->unz;
211 bigint ntotal = (bigint) nx * ny * nz;
212
213 for (int icell = 0; icell < nglocal; icell++) {
214 if (cells[icell].nsplit <= 0) continue;
215 idm1 = cells[icell].id - 1;
216 ix = idm1 % nx;
217 iy = (idm1 / nx) % ny;
218 iz = idm1 / (nx*ny);
219
220 if (order == XYZ) nth = iz*nx*ny + iy*nx + ix;
221 else if (order == XZY) nth = iy*nx*nz + iz*nx + ix;
222 else if (order == YXZ) nth = iz*ny*nx + ix*ny + iy;
223 else if (order == YZX) nth = ix*ny*nz + iz*ny + iy;
224 else if (order == ZXY) nth = iy*nz*nx + ix*nz + iz;
225 else if (order == ZYX) nth = ix*nz*ny + iy*nz + iz;
226
227 newproc = static_cast<int> (1.0*nth/ntotal * nprocs);
228
229 if (newproc != cells[icell].proc) nmigrate++;
230 cells[icell].proc = newproc;
231 }
232
233 } else if (bstyle == BLOCK) {
234 cellint idm1,ix,iy,iz;
235 int ipx,ipy,ipz;
236
237 int nx = grid->unx;
238 int ny = grid->uny;
239 int nz = grid->unz;
240
241 procs2grid(nx,ny,nz,px,py,pz);
242 if (px*py*pz != nprocs)
243 error->all(FLERR,"Bad grid of processors for balance_grid block");
244
245 for (int icell = 0; icell < nglocal; icell++) {
246 if (cells[icell].nsplit <= 0) continue;
247 idm1 = cells[icell].id - 1;
248 ix = idm1 % nx;
249 iy = (idm1 / nx) % ny;
250 iz = idm1 / (nx*ny);
251 ipx = ix*px / nx;
252 ipy = iy*py / ny;
253 ipz = iz*pz / nz;
254
255 newproc = ipz*px*py + ipy*px + ipx;
256
257 if (newproc != cells[icell].proc) nmigrate++;
258 cells[icell].proc = newproc;
259 }
260
261 } else if (bstyle == RANDOM) {
262 int newproc;
263 RanKnuth *random = new RanKnuth(update->ranmaster->uniform());
264 double seed = update->ranmaster->uniform();
265 random->reset(seed,comm->me,100);
266
267 for (int icell = 0; icell < nglocal; icell++) {
268 if (cells[icell].nsplit <= 0) continue;
269 newproc = nprocs * random->uniform();
270 if (newproc != cells[icell].proc) nmigrate++;
271 cells[icell].proc = newproc;
272 }
273
274 delete random;
275
276 } else if (bstyle == PROC) {
277 int newproc;
278 RanKnuth *random = new RanKnuth(update->ranmaster->uniform());
279 newproc = nprocs * random->uniform();
280
281 for (int icell = 0; icell < nglocal; icell++) {
282 if (cells[icell].nsplit <= 0) continue;
283 if (newproc != cells[icell].proc) nmigrate++;
284 cells[icell].proc = newproc;
285 newproc++;
286 if (newproc == nprocs) newproc = 0;
287 }
288
289 delete random;
290
291 } else if (bstyle == BISECTION) {
292 RCB *rcb = new RCB(sparta);
293
294 double **x;
295 memory->create(x,nglocal,3,"balance_grid:x");
296
297 double *lo,*hi;
298
299 int nbalance = 0;
300 for (int icell = 0; icell < nglocal; icell++) {
301 if (cells[icell].nsplit <= 0) continue;
302 lo = cells[icell].lo;
303 hi = cells[icell].hi;
304 x[nbalance][0] = 0.5*(lo[0]+hi[0]);
305 x[nbalance][1] = 0.5*(lo[1]+hi[1]);
306 x[nbalance][2] = 0.5*(lo[2]+hi[2]);
307 nbalance++;
308 }
309
310 double *wt = NULL;
311 if (rcbwt == PARTICLE) {
312 particle->sort();
313 memory->create(wt,nglocal,"balance_grid:wt");
314 int n;
315 int zero = 0;
316 nbalance = 0;
317 for (int icell = 0; icell < nglocal; icell++) {
318 if (cells[icell].nsplit <= 0) continue;
319 n = cinfo[icell].count;
320 if (n) wt[nbalance++] = n;
321 else {
322 wt[nbalance++] = ZEROPARTICLE;
323 zero++;
324 }
325 }
326 } else if (rcbwt == TIME) {
327 memory->create(wt,nglocal,"balance_grid:wt");
328 timer_cell_weights(wt);
329 }
330
331 rcb->compute(nbalance,x,wt,eligible,rcbflip);
332
333 // DEBUG info for dump image
334
335 #ifdef RCB_DEBUG
336
337 update->rcblo[0] = rcb->lo[0];
338 update->rcblo[1] = rcb->lo[1];
339 update->rcblo[2] = rcb->lo[2];
340 update->rcbhi[0] = rcb->hi[0];
341 update->rcbhi[1] = rcb->hi[1];
342 update->rcbhi[2] = rcb->hi[2];
343
344 #endif
345
346 rcb->invert();
347
348 nbalance = 0;
349 int *sendproc = rcb->sendproc;
350 for (int icell = 0; icell < nglocal; icell++) {
351 if (cells[icell].nsplit <= 0) continue;
352 cells[icell].proc = sendproc[nbalance++];
353 }
354 nmigrate = nbalance - rcb->nkeep;
355
356 delete rcb;
357 memory->destroy(x);
358 memory->destroy(wt);
359 }
360
361 // set clumped or not, depending on style
362 // NONE style does not change clumping
363
364 if (nprocs == 1 || bstyle == CLUMP || bstyle == BLOCK || bstyle == BISECTION)
365 grid->clumped = 1;
366 else if (bstyle != NONE) grid->clumped = 0;
367
368 MPI_Barrier(world);
369 double time2 = MPI_Wtime();
370
371 // sort particles
372 // NOTE: not needed again if rcbwt = PARTICLE for bstyle = BISECTION ??
373
374 particle->sort();
375
376 MPI_Barrier(world);
377 double time3 = MPI_Wtime();
378
379 // invoke init() so all grid cell info, including collide & fixes,
380 // is ready to migrate
381 // for init, do not require surfs be assigned collision models
382 // this allows balance call early in script, e.g. from ReadRestart
383 // migrate grid cells and their particles to new owners
384 // invoke grid methods to complete grid setup
385
386 int ghost_previous = grid->exist_ghost;
387
388 domain->boundary_collision_check = 0;
389 surf->surf_collision_check = 0;
390 sparta->init();
391 domain->boundary_collision_check = 1;
392 surf->surf_collision_check = 1;
393
394 grid->unset_neighbors();
395 grid->remove_ghosts();
396
397 comm->migrate_cells(nmigrate);
398 grid->hashfilled = 0;
399
400 MPI_Barrier(world);
401 double time4 = MPI_Wtime();
402
403 grid->setup_owned();
404 grid->acquire_ghosts();
405 if (ghost_previous) grid->reset_neighbors();
406 else grid->find_neighbors();
407 comm->reset_neighbors();
408
409 MPI_Barrier(world);
410 double time5 = MPI_Wtime();
411
412 // DEBUG
413
414 /*
415 sprintf(file,"tmp.aft.%d",comm->me);
416 fp = fopen(file,"w");
417
418 fprintf(fp,"Cells %d %d\n",grid->nlocal,grid->nghost);
419 for (int i = 0; i < grid->nlocal+grid->nghost; i++) {
420 fprintf(fp,"cell %d " CELLINT_FORMAT ": %d : %d %d %d %d %d %d\n",
421 i,grid->cells[i].id,
422 grid->cells[i].nmask,
423 grid->cells[i].neigh[0],
424 grid->cells[i].neigh[1],
425 grid->cells[i].neigh[2],
426 grid->cells[i].neigh[3],
427 grid->cells[i].neigh[4],
428 grid->cells[i].neigh[5]);
429 }
430 fclose(fp);
431 */
432
433 // stats on balance operation
434 // only print if outflag = 1
435 // some callers suppress output, e.g. ReadRestart
436
437 bigint count = nmigrate;
438 bigint nmigrate_all;
439 MPI_Allreduce(&count,&nmigrate_all,1,MPI_SPARTA_BIGINT,MPI_SUM,world);
440 double time_total = time5-time1;
441
442 if (comm->me == 0 && outflag) {
443 if (screen) {
444 fprintf(screen,"Balance grid migrated " BIGINT_FORMAT " cells\n",
445 nmigrate_all);
446 fprintf(screen," CPU time = %g secs\n",time_total);
447 fprintf(screen," reassign/sort/migrate/ghost percent = %g %g %g %g\n",
448 100.0*(time2-time1)/time_total,100.0*(time3-time2)/time_total,
449 100.0*(time4-time3)/time_total,100.0*(time5-time4)/time_total);
450 }
451 if (logfile) {
452 fprintf(logfile,"Balance grid migrated " BIGINT_FORMAT " cells\n",
453 nmigrate_all);
454 fprintf(logfile," CPU time = %g secs\n",time_total);
455 fprintf(logfile," reassign/sort/migrate/ghost percent = %g %g %g %g\n",
456 100.0*(time2-time1)/time_total,100.0*(time3-time2)/time_total,
457 100.0*(time4-time3)/time_total,100.0*(time5-time4)/time_total);
458 }
459 }
460 }
461
462 /* ----------------------------------------------------------------------
463 assign nprocs to 3d grid so as to minimize surface area
464 area = surface area of each of 3 faces of simulation box
465 ------------------------------------------------------------------------- */
466
procs2grid(int nx,int ny,int nz,int & px,int & py,int & pz)467 void BalanceGrid::procs2grid(int nx, int ny, int nz,
468 int &px, int &py, int &pz)
469 {
470 int upx = px;
471 int upy = py;
472 int upz = pz;
473
474 int nprocs = comm->nprocs;
475
476 // all 3 proc counts are specified
477
478 if (px && py && pz) return;
479
480 // 2 out of 3 proc counts are specified
481
482 if (py > 0 && pz > 0) {
483 px = nprocs/(py*pz);
484 return;
485 } else if (px > 0 && pz > 0) {
486 py = nprocs/(px*pz);
487 return;
488 } else if (px > 0 && py > 0) {
489 pz = nprocs/(px*py);
490 return;
491 }
492
493 // determine cross-sectional areas
494 // area[0] = xy, area[1] = xz, area[2] = yz
495
496 double area[3];
497 area[0] = nx*ny;
498 area[1] = nx*nz;
499 area[2] = ny*nz;
500
501 double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
502
503 // loop thru all possible factorizations of nprocs
504 // only consider valid cases that match procgrid settings
505 // surf = surface area of a proc sub-domain
506
507 int ipx,ipy,ipz,valid;
508 double surf;
509
510 ipx = 1;
511 while (ipx <= nprocs) {
512 valid = 1;
513 if (upx && ipx != upx) valid = 0;
514 if (nprocs % ipx) valid = 0;
515 if (!valid) {
516 ipx++;
517 continue;
518 }
519
520 ipy = 1;
521 while (ipy <= nprocs/ipx) {
522 valid = 1;
523 if (upy && ipy != upy) valid = 0;
524 if ((nprocs/ipx) % ipy) valid = 0;
525 if (!valid) {
526 ipy++;
527 continue;
528 }
529
530 ipz = nprocs/ipx/ipy;
531 valid = 1;
532 if (upz && ipz != upz) valid = 0;
533 if (domain->dimension == 2 && ipz != 1) valid = 0;
534 if (!valid) {
535 ipy++;
536 continue;
537 }
538
539 surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz;
540 if (surf < bestsurf) {
541 bestsurf = surf;
542 px = ipx;
543 py = ipy;
544 pz = ipz;
545 }
546 ipy++;
547 }
548
549 ipx++;
550 }
551 }
552
553 /* -------------------------------------------------------------------- */
554
timer_cell_weights(double * weight)555 void BalanceGrid::timer_cell_weights(double *weight)
556 {
557 // cost = CPU time for relevant timers since last invocation
558
559 double cost = -last;
560 cost += timer->array[TIME_MOVE];
561 cost += timer->array[TIME_SORT];
562 cost += timer->array[TIME_COLLIDE];
563 cost += timer->array[TIME_MODIFY];
564
565 // localwt = weight assigned to each owned grid cell
566 // just return if no time yet tallied
567
568 double maxcost;
569 MPI_Allreduce(&cost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world);
570 if (maxcost <= 0.0) {
571 memory->destroy(weight);
572 weight = NULL;
573 return;
574 }
575
576 Grid::ChildCell *cells = grid->cells;
577 Grid::ChildInfo *cinfo = grid->cinfo;
578 int nglocal = grid->nlocal;
579
580 double localwt_total = 0.0;
581 if (nglocal) localwt_total = cost/nglocal;
582 if (nglocal && localwt_total <= 0.0) error->one(FLERR,"Balance weight <= 0.0");
583
584 if (!particle->sorted) particle->sort();
585 double wttotal = 0;
586 int nbalance = 0;
587 double* localwt;
588 memory->create(localwt,nglocal,"imbalance_time:localwt");
589 for (int icell = 0; icell < nglocal; icell++) {
590 localwt[icell] = 0.0;
591 if (cells[icell].nsplit <= 0) continue;
592 int n = cinfo[icell].count;
593 if (n) localwt[nbalance++] = n;
594 else {
595 localwt[nbalance++] = ZEROPARTICLE;
596 }
597 wttotal += localwt[nbalance-1];
598 }
599
600 for (int icell = 0; icell < nglocal; icell++)
601 weight[icell] = cost*localwt[icell]/wttotal;
602
603 memory->destroy(localwt);
604
605 // last = time up to this point
606
607 last += cost;
608 }
609