1 /* ----------------------------------------------------------------------
2    SPARTA - Stochastic PArallel Rarefied-gas Time-accurate Analyzer
3    http://sparta.sandia.gov
4    Steve Plimpton, sjplimp@sandia.gov, Michael Gallis, magalli@sandia.gov
5    Sandia National Laboratories
6 
7    Copyright (2014) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level SPARTA directory.
13 ------------------------------------------------------------------------- */
14 
15 #include "stdlib.h"
16 #include "string.h"
17 #include "balance_grid.h"
18 #include "update.h"
19 #include "grid.h"
20 #include "surf.h"
21 #include "domain.h"
22 #include "modify.h"
23 #include "comm.h"
24 #include "rcb.h"
25 #include "output.h"
26 #include "dump.h"
27 #include "random_mars.h"
28 #include "random_knuth.h"
29 #include "memory.h"
30 #include "error.h"
31 #include "timer.h"
32 
33 using namespace SPARTA_NS;
34 
35 //#define RCB_DEBUG 1     // un-comment to include RCB proc boxes in image
36 
37 enum{NONE,STRIDE,CLUMP,BLOCK,RANDOM,PROC,BISECTION};
38 enum{XYZ,XZY,YXZ,YZX,ZXY,ZYX};
39 enum{CELL,PARTICLE,TIME};
40 
41 #define ZEROPARTICLE 0.1
42 
43 /* ---------------------------------------------------------------------- */
44 
BalanceGrid(SPARTA * sparta)45 BalanceGrid::BalanceGrid(SPARTA *sparta) : Pointers(sparta) {
46   last = 0.0;
47 }
48 
49 /* ---------------------------------------------------------------------- */
50 
command(int narg,char ** arg,int outflag)51 void BalanceGrid::command(int narg, char **arg, int outflag)
52 {
53   if (!grid->exist)
54     error->all(FLERR,"Cannot balance grid before grid is defined");
55 
56   if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
57 
58   int bstyle,order;
59   int px,py,pz;
60   int rcbwt;
61   int iarg;
62 
63   if (strcmp(arg[0],"none") == 0) {
64     if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
65     bstyle = NONE;
66 
67   } else if (strcmp(arg[0],"stride") == 0) {
68     if (narg < 2) error->all(FLERR,"Illegal balance_grid command");
69     bstyle = STRIDE;
70     if (strcmp(arg[1],"xyz") == 0) order = XYZ;
71     else if (strcmp(arg[1],"xzy") == 0) order = XZY;
72     else if (strcmp(arg[1],"yxz") == 0) order = YXZ;
73     else if (strcmp(arg[1],"yzx") == 0) order = YZX;
74     else if (strcmp(arg[1],"zxy") == 0) order = ZXY;
75     else if (strcmp(arg[1],"zyx") == 0) order = ZYX;
76     else error->all(FLERR,"Illegal balance_grid command");
77     iarg = 1;
78 
79   } else if (strcmp(arg[0],"clump") == 0) {
80     if (narg < 2) error->all(FLERR,"Illegal balance_grid command");
81     bstyle = CLUMP;
82     if (strcmp(arg[1],"xyz") == 0) order = XYZ;
83     else if (strcmp(arg[1],"xzy") == 0) order = XZY;
84     else if (strcmp(arg[1],"yxz") == 0) order = YXZ;
85     else if (strcmp(arg[1],"yzx") == 0) order = YZX;
86     else if (strcmp(arg[1],"zxy") == 0) order = ZXY;
87     else if (strcmp(arg[1],"zyx") == 0) order = ZYX;
88     else error->all(FLERR,"Illegal balance_grid command");
89     iarg = 2;
90 
91   } else if (strcmp(arg[0],"block") == 0) {
92     if (narg < 4) error->all(FLERR,"Illegal balance_grid command");
93     bstyle = BLOCK;
94     if (strcmp(arg[1],"*") == 0) px = 0;
95     else px = atoi(arg[1]);
96     if (strcmp(arg[2],"*") == 0) py = 0;
97     else py = atoi(arg[2]);
98     if (strcmp(arg[3],"*") == 0) pz = 0;
99     else pz = atoi(arg[3]);
100     iarg = 4;
101 
102   } else if (strcmp(arg[0],"random") == 0) {
103     if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
104     bstyle = RANDOM;
105     iarg = 0;
106 
107   } else if (strcmp(arg[0],"proc") == 0) {
108     if (narg < 1) error->all(FLERR,"Illegal balance_grid command");
109     bstyle = PROC;
110     iarg = 0;
111 
112   } else if (strcmp(arg[0],"rcb") == 0) {
113     if (narg < 2) error->all(FLERR,"Illegal balance_grid command");
114     bstyle = BISECTION;
115     if (strcmp(arg[1],"cell") == 0) rcbwt = CELL;
116     else if (strcmp(arg[1],"part") == 0) rcbwt = PARTICLE;
117     else if (strcmp(arg[1],"time") == 0) rcbwt = TIME;
118     else error->all(FLERR,"Illegal balance_grid command");
119     iarg = 2;
120   }
121 
122   // optional args
123 
124   char eligible[4];
125   strcpy(eligible,"xyz");
126   int rcbflip = 0;
127 
128   while (iarg < narg) {
129     if (strcmp(arg[iarg],"axes") == 0) {
130       if (iarg+2 > narg) error->all(FLERR,"Illegal balance_grid command");
131       if (strlen(arg[iarg+1]) > 3)
132         error->all(FLERR,"Illegal balance_grid command");
133       strcpy(eligible,arg[iarg+1]);
134       int xdim = 0;
135       int ydim = 0;
136       int zdim = 0;
137       if (strchr(eligible,'x')) xdim = 1;
138       if (strchr(eligible,'y')) ydim = 1;
139       if (strchr(eligible,'z')) zdim = 1;
140       if (zdim && domain->dimension == 2)
141         error->all(FLERR,"Illegal balance_grid command");
142       if (xdim+ydim+zdim != strlen(eligible))
143         error->all(FLERR,"Illegal balance_grid command");
144       iarg += 2;
145     } else if (strcmp(arg[iarg],"flip") == 0) {
146       if (iarg+2 > narg) error->all(FLERR,"Illegal balance_grid command");
147       if (strcmp(arg[iarg+1],"yes") == 0) rcbflip = 1;
148       else if (strcmp(arg[iarg+1],"no") == 0) rcbflip = 0;
149       else error->all(FLERR,"Illegal balance_grid command");
150       iarg += 2;
151     } else error->all(FLERR,"Illegal balance_grid command");
152   }
153 
154   // error check on methods only allowed for a uniform grid
155 
156   if (bstyle == STRIDE || bstyle == CLUMP || bstyle == BLOCK)
157     if (!grid->uniform)
158       error->all(FLERR,"Invalid balance_grid style for non-uniform grid");
159 
160   // re-assign each of my local child cells to a proc
161   // only assign unsplit and split cells
162   // do not assign sub cells since they migrate with their split cell
163   // set nmigrate = # of cells that will migrate to a new proc
164   // reset proc field in cells for migrating cells
165   // style NONE performs no re-assignment
166 
167   MPI_Barrier(world);
168   double time1 = MPI_Wtime();
169 
170   Grid::ChildCell *cells = grid->cells;
171   Grid::ChildInfo *cinfo = grid->cinfo;
172   int nglocal = grid->nlocal;
173 
174   int nprocs = comm->nprocs;
175   int newproc;
176   int nmigrate = 0;
177 
178   if (bstyle == STRIDE) {
179     cellint idm1,ix,iy,iz,nth;
180 
181     cellint nx = grid->unx;
182     cellint ny = grid->uny;
183     cellint nz = grid->unz;
184 
185     for (int icell = 0; icell < nglocal; icell++) {
186       if (cells[icell].nsplit <= 0) continue;
187       idm1 = cells[icell].id - 1;
188       ix = idm1 % nx;
189       iy = (idm1 / nx) % ny;
190       iz = idm1 / (nx*ny);
191 
192       if (order == XYZ) nth = iz*nx*ny + iy*nx + ix;
193       else if (order == XZY) nth = iy*nx*nz + iz*nx + ix;
194       else if (order == YXZ) nth = iz*ny*nx + ix*ny + iy;
195       else if (order == YZX) nth = ix*ny*nz + iz*ny + iy;
196       else if (order == ZXY) nth = iy*nz*nx + ix*nz + iz;
197       else if (order == ZYX) nth = ix*nz*ny + iy*nz + iz;
198 
199       newproc = nth % nprocs;
200 
201       if (newproc != cells[icell].proc) nmigrate++;
202       cells[icell].proc = newproc;
203     }
204 
205   } else if (bstyle == CLUMP) {
206     cellint idm1,ix,iy,iz,nth;
207 
208     cellint nx = grid->unx;
209     cellint ny = grid->uny;
210     cellint nz = grid->unz;
211     bigint ntotal = (bigint) nx * ny * nz;
212 
213     for (int icell = 0; icell < nglocal; icell++) {
214       if (cells[icell].nsplit <= 0) continue;
215       idm1 = cells[icell].id - 1;
216       ix = idm1 % nx;
217       iy = (idm1 / nx) % ny;
218       iz = idm1 / (nx*ny);
219 
220       if (order == XYZ) nth = iz*nx*ny + iy*nx + ix;
221       else if (order == XZY) nth = iy*nx*nz + iz*nx + ix;
222       else if (order == YXZ) nth = iz*ny*nx + ix*ny + iy;
223       else if (order == YZX) nth = ix*ny*nz + iz*ny + iy;
224       else if (order == ZXY) nth = iy*nz*nx + ix*nz + iz;
225       else if (order == ZYX) nth = ix*nz*ny + iy*nz + iz;
226 
227       newproc = static_cast<int> (1.0*nth/ntotal * nprocs);
228 
229       if (newproc != cells[icell].proc) nmigrate++;
230       cells[icell].proc = newproc;
231     }
232 
233   } else if (bstyle == BLOCK) {
234     cellint idm1,ix,iy,iz;
235     int ipx,ipy,ipz;
236 
237     int nx = grid->unx;
238     int ny = grid->uny;
239     int nz = grid->unz;
240 
241     procs2grid(nx,ny,nz,px,py,pz);
242     if (px*py*pz != nprocs)
243       error->all(FLERR,"Bad grid of processors for balance_grid block");
244 
245     for (int icell = 0; icell < nglocal; icell++) {
246       if (cells[icell].nsplit <= 0) continue;
247       idm1 = cells[icell].id - 1;
248       ix = idm1 % nx;
249       iy = (idm1 / nx) % ny;
250       iz = idm1 / (nx*ny);
251       ipx = ix*px / nx;
252       ipy = iy*py / ny;
253       ipz = iz*pz / nz;
254 
255       newproc = ipz*px*py + ipy*px + ipx;
256 
257       if (newproc != cells[icell].proc) nmigrate++;
258       cells[icell].proc = newproc;
259     }
260 
261   } else if (bstyle == RANDOM) {
262     int newproc;
263     RanKnuth *random = new RanKnuth(update->ranmaster->uniform());
264     double seed = update->ranmaster->uniform();
265     random->reset(seed,comm->me,100);
266 
267     for (int icell = 0; icell < nglocal; icell++) {
268       if (cells[icell].nsplit <= 0) continue;
269       newproc = nprocs * random->uniform();
270       if (newproc != cells[icell].proc) nmigrate++;
271       cells[icell].proc = newproc;
272     }
273 
274     delete random;
275 
276   } else if (bstyle == PROC) {
277     int newproc;
278     RanKnuth *random = new RanKnuth(update->ranmaster->uniform());
279     newproc = nprocs * random->uniform();
280 
281     for (int icell = 0; icell < nglocal; icell++) {
282       if (cells[icell].nsplit <= 0) continue;
283       if (newproc != cells[icell].proc) nmigrate++;
284       cells[icell].proc = newproc;
285       newproc++;
286       if (newproc == nprocs) newproc = 0;
287     }
288 
289     delete random;
290 
291   } else if (bstyle == BISECTION) {
292     RCB *rcb = new RCB(sparta);
293 
294     double **x;
295     memory->create(x,nglocal,3,"balance_grid:x");
296 
297     double *lo,*hi;
298 
299     int nbalance = 0;
300     for (int icell = 0; icell < nglocal; icell++) {
301       if (cells[icell].nsplit <= 0) continue;
302       lo = cells[icell].lo;
303       hi = cells[icell].hi;
304       x[nbalance][0] = 0.5*(lo[0]+hi[0]);
305       x[nbalance][1] = 0.5*(lo[1]+hi[1]);
306       x[nbalance][2] = 0.5*(lo[2]+hi[2]);
307       nbalance++;
308     }
309 
310     double *wt = NULL;
311     if (rcbwt == PARTICLE) {
312       particle->sort();
313       memory->create(wt,nglocal,"balance_grid:wt");
314       int n;
315       int zero = 0;
316       nbalance = 0;
317       for (int icell = 0; icell < nglocal; icell++) {
318         if (cells[icell].nsplit <= 0) continue;
319         n = cinfo[icell].count;
320         if (n) wt[nbalance++] = n;
321         else {
322           wt[nbalance++] = ZEROPARTICLE;
323           zero++;
324         }
325       }
326     } else if (rcbwt == TIME) {
327       memory->create(wt,nglocal,"balance_grid:wt");
328       timer_cell_weights(wt);
329     }
330 
331     rcb->compute(nbalance,x,wt,eligible,rcbflip);
332 
333     // DEBUG info for dump image
334 
335 #ifdef RCB_DEBUG
336 
337     update->rcblo[0] = rcb->lo[0];
338     update->rcblo[1] = rcb->lo[1];
339     update->rcblo[2] = rcb->lo[2];
340     update->rcbhi[0] = rcb->hi[0];
341     update->rcbhi[1] = rcb->hi[1];
342     update->rcbhi[2] = rcb->hi[2];
343 
344 #endif
345 
346     rcb->invert();
347 
348     nbalance = 0;
349     int *sendproc = rcb->sendproc;
350     for (int icell = 0; icell < nglocal; icell++) {
351       if (cells[icell].nsplit <= 0) continue;
352       cells[icell].proc = sendproc[nbalance++];
353     }
354     nmigrate = nbalance - rcb->nkeep;
355 
356     delete rcb;
357     memory->destroy(x);
358     memory->destroy(wt);
359   }
360 
361   // set clumped or not, depending on style
362   // NONE style does not change clumping
363 
364   if (nprocs == 1 || bstyle == CLUMP || bstyle == BLOCK || bstyle == BISECTION)
365     grid->clumped = 1;
366   else if (bstyle != NONE) grid->clumped = 0;
367 
368   MPI_Barrier(world);
369   double time2 = MPI_Wtime();
370 
371   // sort particles
372   // NOTE: not needed again if rcbwt = PARTICLE for bstyle = BISECTION ??
373 
374   particle->sort();
375 
376   MPI_Barrier(world);
377   double time3 = MPI_Wtime();
378 
379   // invoke init() so all grid cell info, including collide & fixes,
380   //   is ready to migrate
381   // for init, do not require surfs be assigned collision models
382   //   this allows balance call early in script, e.g. from ReadRestart
383   // migrate grid cells and their particles to new owners
384   // invoke grid methods to complete grid setup
385 
386   int ghost_previous = grid->exist_ghost;
387 
388   domain->boundary_collision_check = 0;
389   surf->surf_collision_check = 0;
390   sparta->init();
391   domain->boundary_collision_check = 1;
392   surf->surf_collision_check = 1;
393 
394   grid->unset_neighbors();
395   grid->remove_ghosts();
396 
397   comm->migrate_cells(nmigrate);
398   grid->hashfilled = 0;
399 
400   MPI_Barrier(world);
401   double time4 = MPI_Wtime();
402 
403   grid->setup_owned();
404   grid->acquire_ghosts();
405   if (ghost_previous) grid->reset_neighbors();
406   else grid->find_neighbors();
407   comm->reset_neighbors();
408 
409   MPI_Barrier(world);
410   double time5 = MPI_Wtime();
411 
412   // DEBUG
413 
414   /*
415   sprintf(file,"tmp.aft.%d",comm->me);
416   fp = fopen(file,"w");
417 
418   fprintf(fp,"Cells %d %d\n",grid->nlocal,grid->nghost);
419   for (int i = 0; i < grid->nlocal+grid->nghost; i++) {
420     fprintf(fp,"cell %d " CELLINT_FORMAT ": %d : %d %d %d %d %d %d\n",
421            i,grid->cells[i].id,
422            grid->cells[i].nmask,
423            grid->cells[i].neigh[0],
424            grid->cells[i].neigh[1],
425            grid->cells[i].neigh[2],
426            grid->cells[i].neigh[3],
427            grid->cells[i].neigh[4],
428            grid->cells[i].neigh[5]);
429   }
430   fclose(fp);
431   */
432 
433   // stats on balance operation
434   // only print if outflag = 1
435   // some callers suppress output, e.g. ReadRestart
436 
437   bigint count = nmigrate;
438   bigint nmigrate_all;
439   MPI_Allreduce(&count,&nmigrate_all,1,MPI_SPARTA_BIGINT,MPI_SUM,world);
440   double time_total = time5-time1;
441 
442   if (comm->me == 0 && outflag) {
443     if (screen) {
444       fprintf(screen,"Balance grid migrated " BIGINT_FORMAT " cells\n",
445               nmigrate_all);
446       fprintf(screen,"  CPU time = %g secs\n",time_total);
447       fprintf(screen,"  reassign/sort/migrate/ghost percent = %g %g %g %g\n",
448               100.0*(time2-time1)/time_total,100.0*(time3-time2)/time_total,
449               100.0*(time4-time3)/time_total,100.0*(time5-time4)/time_total);
450     }
451     if (logfile) {
452       fprintf(logfile,"Balance grid migrated " BIGINT_FORMAT " cells\n",
453               nmigrate_all);
454       fprintf(logfile,"  CPU time = %g secs\n",time_total);
455       fprintf(logfile,"  reassign/sort/migrate/ghost percent = %g %g %g %g\n",
456               100.0*(time2-time1)/time_total,100.0*(time3-time2)/time_total,
457               100.0*(time4-time3)/time_total,100.0*(time5-time4)/time_total);
458     }
459   }
460 }
461 
462 /* ----------------------------------------------------------------------
463    assign nprocs to 3d grid so as to minimize surface area
464    area = surface area of each of 3 faces of simulation box
465 ------------------------------------------------------------------------- */
466 
procs2grid(int nx,int ny,int nz,int & px,int & py,int & pz)467 void BalanceGrid::procs2grid(int nx, int ny, int nz,
468                              int &px, int &py, int &pz)
469 {
470   int upx = px;
471   int upy = py;
472   int upz = pz;
473 
474   int nprocs = comm->nprocs;
475 
476   // all 3 proc counts are specified
477 
478   if (px && py && pz) return;
479 
480   // 2 out of 3 proc counts are specified
481 
482   if (py > 0 && pz > 0) {
483     px = nprocs/(py*pz);
484     return;
485   } else if (px > 0 && pz > 0) {
486     py = nprocs/(px*pz);
487     return;
488   } else if (px > 0 && py > 0) {
489     pz = nprocs/(px*py);
490     return;
491   }
492 
493   // determine cross-sectional areas
494   // area[0] = xy, area[1] = xz, area[2] = yz
495 
496   double area[3];
497   area[0] = nx*ny;
498   area[1] = nx*nz;
499   area[2] = ny*nz;
500 
501   double bestsurf = 2.0 * (area[0]+area[1]+area[2]);
502 
503   // loop thru all possible factorizations of nprocs
504   // only consider valid cases that match procgrid settings
505   // surf = surface area of a proc sub-domain
506 
507   int ipx,ipy,ipz,valid;
508   double surf;
509 
510   ipx = 1;
511   while (ipx <= nprocs) {
512     valid = 1;
513     if (upx && ipx != upx) valid = 0;
514     if (nprocs % ipx) valid = 0;
515     if (!valid) {
516       ipx++;
517       continue;
518     }
519 
520     ipy = 1;
521     while (ipy <= nprocs/ipx) {
522       valid = 1;
523       if (upy && ipy != upy) valid = 0;
524       if ((nprocs/ipx) % ipy) valid = 0;
525       if (!valid) {
526 	ipy++;
527 	continue;
528       }
529 
530       ipz = nprocs/ipx/ipy;
531       valid = 1;
532       if (upz && ipz != upz) valid = 0;
533       if (domain->dimension == 2 && ipz != 1) valid = 0;
534       if (!valid) {
535 	ipy++;
536 	continue;
537       }
538 
539       surf = area[0]/ipx/ipy + area[1]/ipx/ipz + area[2]/ipy/ipz;
540       if (surf < bestsurf) {
541 	bestsurf = surf;
542 	px = ipx;
543 	py = ipy;
544 	pz = ipz;
545       }
546       ipy++;
547     }
548 
549     ipx++;
550   }
551 }
552 
553 /* -------------------------------------------------------------------- */
554 
timer_cell_weights(double * weight)555 void BalanceGrid::timer_cell_weights(double *weight)
556 {
557   // cost = CPU time for relevant timers since last invocation
558 
559   double cost = -last;
560   cost += timer->array[TIME_MOVE];
561   cost += timer->array[TIME_SORT];
562   cost += timer->array[TIME_COLLIDE];
563   cost += timer->array[TIME_MODIFY];
564 
565   // localwt = weight assigned to each owned grid cell
566   // just return if no time yet tallied
567 
568   double maxcost;
569   MPI_Allreduce(&cost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world);
570   if (maxcost <= 0.0) {
571     memory->destroy(weight);
572     weight = NULL;
573     return;
574   }
575 
576   Grid::ChildCell *cells = grid->cells;
577   Grid::ChildInfo *cinfo = grid->cinfo;
578   int nglocal = grid->nlocal;
579 
580   double localwt_total = 0.0;
581   if (nglocal) localwt_total = cost/nglocal;
582   if (nglocal && localwt_total <= 0.0) error->one(FLERR,"Balance weight <= 0.0");
583 
584   if (!particle->sorted) particle->sort();
585   double wttotal = 0;
586   int nbalance = 0;
587   double* localwt;
588   memory->create(localwt,nglocal,"imbalance_time:localwt");
589   for (int icell = 0; icell < nglocal; icell++) {
590     localwt[icell] = 0.0;
591     if (cells[icell].nsplit <= 0) continue;
592     int n = cinfo[icell].count;
593     if (n) localwt[nbalance++] = n;
594     else {
595       localwt[nbalance++] = ZEROPARTICLE;
596     }
597     wttotal += localwt[nbalance-1];
598   }
599 
600   for (int icell = 0; icell < nglocal; icell++)
601     weight[icell] = cost*localwt[icell]/wttotal;
602 
603   memory->destroy(localwt);
604 
605   // last = time up to this point
606 
607   last += cost;
608 }
609