1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing authors, for weighted balancing:
17      Axel Kohlmeyer (Temple U), Iain Bethune (EPCC)
18 ------------------------------------------------------------------------- */
19 
20 // #define BALANCE_DEBUG 1
21 
22 #include "balance.h"
23 
24 #include "update.h"
25 #include "atom.h"
26 #include "neighbor.h"
27 #include "comm.h"
28 #include "domain.h"
29 #include "fix_store.h"
30 #include "imbalance.h"
31 #include "imbalance_group.h"
32 #include "imbalance_neigh.h"
33 #include "imbalance_store.h"
34 #include "imbalance_time.h"
35 #include "imbalance_var.h"
36 #include "irregular.h"
37 #include "memory.h"
38 #include "modify.h"
39 #include "rcb.h"
40 #include "error.h"
41 
42 #include <cmath>
43 #include <cstring>
44 
45 using namespace LAMMPS_NS;
46 
47 double EPSNEIGH = 1.0e-3;
48 
49 enum{XYZ,SHIFT,BISECTION};
50 enum{NONE,UNIFORM,USER};
51 enum{X,Y,Z};
52 
53 /* ---------------------------------------------------------------------- */
54 
Balance(LAMMPS * lmp)55 Balance::Balance(LAMMPS *lmp) : Command(lmp)
56 {
57   MPI_Comm_rank(world,&me);
58   MPI_Comm_size(world,&nprocs);
59 
60   user_xsplit = user_ysplit = user_zsplit = nullptr;
61   shift_allocate = 0;
62   proccost = allproccost = nullptr;
63 
64   rcb = nullptr;
65 
66   nimbalance = 0;
67   imbalances = nullptr;
68   fixstore = nullptr;
69 
70   fp = nullptr;
71   firststep = 1;
72 }
73 
74 /* ---------------------------------------------------------------------- */
75 
~Balance()76 Balance::~Balance()
77 {
78   memory->destroy(proccost);
79   memory->destroy(allproccost);
80 
81   delete [] user_xsplit;
82   delete [] user_ysplit;
83   delete [] user_zsplit;
84 
85   if (shift_allocate) {
86     delete [] bdim;
87     delete [] onecost;
88     delete [] allcost;
89     delete [] sum;
90     delete [] target;
91     delete [] lo;
92     delete [] hi;
93     delete [] losum;
94     delete [] hisum;
95   }
96 
97   delete rcb;
98 
99   for (int i = 0; i < nimbalance; i++) delete imbalances[i];
100   delete [] imbalances;
101 
102   // check nfix in case all fixes have already been deleted
103 
104   if (fixstore && modify->nfix) modify->delete_fix(fixstore->id);
105   fixstore = nullptr;
106 
107   if (fp) fclose(fp);
108 }
109 
110 /* ----------------------------------------------------------------------
111    called as balance command in input script
112 ------------------------------------------------------------------------- */
113 
command(int narg,char ** arg)114 void Balance::command(int narg, char **arg)
115 {
116   if (domain->box_exist == 0)
117     error->all(FLERR,"Balance command before simulation box is defined");
118 
119   if (me == 0) utils::logmesg(lmp,"Balancing ...\n");
120 
121   // parse required arguments
122 
123   if (narg < 2) error->all(FLERR,"Illegal balance command");
124 
125   thresh = utils::numeric(FLERR,arg[0],false,lmp);
126 
127   int dimension = domain->dimension;
128   int *procgrid = comm->procgrid;
129   style = -1;
130   xflag = yflag = zflag = NONE;
131 
132   int iarg = 1;
133   while (iarg < narg) {
134     if (strcmp(arg[iarg],"x") == 0) {
135       if (style != -1 && style != XYZ)
136         error->all(FLERR,"Illegal balance command");
137       style = XYZ;
138       if (strcmp(arg[iarg+1],"uniform") == 0) {
139         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
140         xflag = UNIFORM;
141         iarg += 2;
142       } else {
143         if (1 + procgrid[0]-1 > narg)
144           error->all(FLERR,"Illegal balance command");
145         xflag = USER;
146         delete [] user_xsplit;
147         user_xsplit = new double[procgrid[0]+1];
148         user_xsplit[0] = 0.0;
149         iarg++;
150         for (int i = 1; i < procgrid[0]; i++)
151           user_xsplit[i] = utils::numeric(FLERR,arg[iarg++],false,lmp);
152         user_xsplit[procgrid[0]] = 1.0;
153       }
154     } else if (strcmp(arg[iarg],"y") == 0) {
155       if (style != -1 && style != XYZ)
156         error->all(FLERR,"Illegal balance command");
157       style = XYZ;
158       if (strcmp(arg[iarg+1],"uniform") == 0) {
159         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
160         yflag = UNIFORM;
161         iarg += 2;
162       } else {
163         if (1 + procgrid[1]-1 > narg)
164           error->all(FLERR,"Illegal balance command");
165         yflag = USER;
166         delete [] user_ysplit;
167         user_ysplit = new double[procgrid[1]+1];
168         user_ysplit[0] = 0.0;
169         iarg++;
170         for (int i = 1; i < procgrid[1]; i++)
171           user_ysplit[i] = utils::numeric(FLERR,arg[iarg++],false,lmp);
172         user_ysplit[procgrid[1]] = 1.0;
173       }
174     } else if (strcmp(arg[iarg],"z") == 0) {
175       if (style != -1 && style != XYZ)
176         error->all(FLERR,"Illegal balance command");
177       style = XYZ;
178       if (strcmp(arg[iarg+1],"uniform") == 0) {
179         if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
180         zflag = UNIFORM;
181         iarg += 2;
182       } else {
183         if (1 + procgrid[2]-1 > narg)
184           error->all(FLERR,"Illegal balance command");
185         zflag = USER;
186         delete [] user_zsplit;
187         user_zsplit = new double[procgrid[2]+1];
188         user_zsplit[0] = 0.0;
189         iarg++;
190         for (int i = 1; i < procgrid[2]; i++)
191           user_zsplit[i] = utils::numeric(FLERR,arg[iarg++],false,lmp);
192         user_zsplit[procgrid[2]] = 1.0;
193       }
194 
195     } else if (strcmp(arg[iarg],"shift") == 0) {
196       if (style != -1) error->all(FLERR,"Illegal balance command");
197       if (iarg+4 > narg) error->all(FLERR,"Illegal balance command");
198       style = SHIFT;
199       if (strlen(arg[iarg+1]) > BSTR_SIZE) error->all(FLERR,"Illegal balance command");
200       strncpy(bstr,arg[iarg+1],BSTR_SIZE+1);
201       nitermax = utils::inumeric(FLERR,arg[iarg+2],false,lmp);
202       if (nitermax <= 0) error->all(FLERR,"Illegal balance command");
203       stopthresh = utils::numeric(FLERR,arg[iarg+3],false,lmp);
204       if (stopthresh < 1.0) error->all(FLERR,"Illegal balance command");
205       iarg += 4;
206 
207     } else if (strcmp(arg[iarg],"rcb") == 0) {
208       if (style != -1) error->all(FLERR,"Illegal balance command");
209       style = BISECTION;
210       iarg++;
211 
212     } else break;
213   }
214 
215   // error checks
216 
217   if (style == XYZ) {
218     if (zflag != NONE  && dimension == 2)
219       error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
220 
221     if (xflag == USER)
222       for (int i = 1; i <= procgrid[0]; i++)
223         if (user_xsplit[i-1] >= user_xsplit[i])
224           error->all(FLERR,"Illegal balance command");
225     if (yflag == USER)
226       for (int i = 1; i <= procgrid[1]; i++)
227         if (user_ysplit[i-1] >= user_ysplit[i])
228           error->all(FLERR,"Illegal balance command");
229     if (zflag == USER)
230       for (int i = 1; i <= procgrid[2]; i++)
231         if (user_zsplit[i-1] >= user_zsplit[i])
232           error->all(FLERR,"Illegal balance command");
233   }
234 
235   if (style == SHIFT) {
236     const int blen=strlen(bstr);
237     for (int i = 0; i < blen; i++) {
238       if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
239         error->all(FLERR,"Balance shift string is invalid");
240       if (bstr[i] == 'z' && dimension == 2)
241         error->all(FLERR,"Balance shift string is invalid");
242       for (int j = i+1; j < blen; j++)
243         if (bstr[i] == bstr[j])
244           error->all(FLERR,"Balance shift string is invalid");
245     }
246   }
247 
248   if (style == BISECTION && comm->style == 0)
249     error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
250 
251   // process remaining optional args
252 
253   options(iarg,narg,arg);
254   if (wtflag) weight_storage(nullptr);
255 
256   // insure particles are in current box & update box via shrink-wrap
257   // init entire system since comm->setup is done
258   // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
259   // must reset atom map after exchange() since it clears it
260 
261   MPI_Barrier(world);
262   double start_time = MPI_Wtime();
263 
264   lmp->init();
265 
266   if (domain->triclinic) domain->x2lamda(atom->nlocal);
267   domain->pbc();
268   domain->reset_box();
269   comm->setup();
270   comm->exchange();
271   if (atom->map_style != Atom::MAP_NONE) atom->map_set();
272   if (domain->triclinic) domain->lamda2x(atom->nlocal);
273 
274   // imbinit = initial imbalance
275 
276   double maxinit;
277   init_imbalance(0);
278   set_weights();
279   double imbinit = imbalance_factor(maxinit);
280 
281   // no load-balance if imbalance doesn't exceed threshold
282   // unless switching from tiled to non tiled layout, then force rebalance
283 
284   if (comm->layout == Comm::LAYOUT_TILED && style != BISECTION) {
285   } else if (imbinit < thresh) return;
286 
287   // debug output of initial state
288 
289 #ifdef BALANCE_DEBUG
290   if (outflag) dumpout(update->ntimestep);
291 #endif
292 
293   int niter = 0;
294 
295   // perform load-balance
296   // style XYZ = explicit setting of cutting planes of logical 3d grid
297 
298   if (style == XYZ) {
299     if (comm->layout == Comm::LAYOUT_UNIFORM) {
300       if (xflag == USER || yflag == USER || zflag == USER)
301         comm->layout = Comm::LAYOUT_NONUNIFORM;
302     } else if (comm->layout == Comm::LAYOUT_NONUNIFORM) {
303       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
304         comm->layout = Comm::LAYOUT_UNIFORM;
305     } else if (comm->layout == Comm::LAYOUT_TILED) {
306       if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
307         comm->layout = Comm::LAYOUT_UNIFORM;
308       else comm->layout = Comm::LAYOUT_NONUNIFORM;
309     }
310 
311     if (xflag == UNIFORM) {
312       for (int i = 0; i < procgrid[0]; i++)
313         comm->xsplit[i] = i * 1.0/procgrid[0];
314       comm->xsplit[procgrid[0]] = 1.0;
315     } else if (xflag == USER)
316       for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
317 
318     if (yflag == UNIFORM) {
319       for (int i = 0; i < procgrid[1]; i++)
320         comm->ysplit[i] = i * 1.0/procgrid[1];
321       comm->ysplit[procgrid[1]] = 1.0;
322     } else if (yflag == USER)
323       for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
324 
325     if (zflag == UNIFORM) {
326       for (int i = 0; i < procgrid[2]; i++)
327         comm->zsplit[i] = i * 1.0/procgrid[2];
328       comm->zsplit[procgrid[2]] = 1.0;
329     } else if (zflag == USER)
330       for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
331   }
332 
333   // style SHIFT = adjust cutting planes of logical 3d grid
334 
335   if (style == SHIFT) {
336     comm->layout = Comm::LAYOUT_NONUNIFORM;
337     shift_setup_static(bstr);
338     niter = shift();
339   }
340 
341   // style BISECTION = recursive coordinate bisectioning
342 
343   if (style == BISECTION) {
344     comm->layout = Comm::LAYOUT_TILED;
345     bisection(1);
346   }
347 
348   // reset proc sub-domains
349   // for either brick or tiled comm style
350 
351   if (domain->triclinic) domain->set_lamda_box();
352   domain->set_local_box();
353 
354   // move particles to new processors via irregular()
355   // set disable = 0, so weights migrate with atoms for imbfinal calculation
356 
357   if (domain->triclinic) domain->x2lamda(atom->nlocal);
358   Irregular *irregular = new Irregular(lmp);
359   if (wtflag) fixstore->disable = 0;
360   if (style == BISECTION) irregular->migrate_atoms(1,1,rcb->sendproc);
361   else irregular->migrate_atoms(1);
362   delete irregular;
363   if (domain->triclinic) domain->lamda2x(atom->nlocal);
364 
365   // output of final result
366 
367   if (outflag) dumpout(update->ntimestep);
368 
369   // check if any particles were lost
370 
371   bigint natoms;
372   bigint nblocal = atom->nlocal;
373   MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
374   if (natoms != atom->natoms)
375     error->all(FLERR,"Lost atoms via balance: original {}  current {}",
376                atom->natoms,natoms);
377 
378   // imbfinal = final imbalance
379   // set disable = 1, so weights no longer migrate with atoms
380 
381   double maxfinal;
382   double imbfinal = imbalance_factor(maxfinal);
383   if (wtflag) fixstore->disable = 1;
384 
385   // stats output
386 
387   if (me == 0) {
388     std::string mesg = fmt::format(" rebalancing time: {:.3f} seconds\n",
389                                    MPI_Wtime()-start_time);
390     mesg += fmt::format("  iteration count = {}\n",niter);
391     for (int i = 0; i < nimbalance; ++i) mesg += imbalances[i]->info();
392     mesg += fmt::format("  initial/final maximal load/proc = {:.8} {:.8}\n"
393                         "  initial/final imbalance factor  = {:.8} {:.8}\n",
394                         maxinit,maxfinal,imbinit,imbfinal);
395 
396     if (style != BISECTION) {
397       mesg += "  x cuts:";
398       for (int i = 0; i <= comm->procgrid[0]; i++)
399         mesg += fmt::format(" {:.8}",comm->xsplit[i]);
400       mesg += "\n  y cuts:";
401       for (int i = 0; i <= comm->procgrid[1]; i++)
402         mesg += fmt::format(" {:.8}",comm->ysplit[i]);
403       mesg += "\n  z cuts:";
404       for (int i = 0; i <= comm->procgrid[2]; i++)
405         mesg += fmt::format(" {:.8}",comm->zsplit[i]);
406       mesg += "\n";
407     }
408 
409     utils::logmesg(lmp,mesg);
410   }
411 }
412 
413 /* ----------------------------------------------------------------------
414    process optional command args for Balance and FixBalance
415 ------------------------------------------------------------------------- */
416 
options(int iarg,int narg,char ** arg)417 void Balance::options(int iarg, int narg, char **arg)
418 {
419   // count max number of weight settings
420 
421   nimbalance = 0;
422   for (int i = iarg; i < narg; i++)
423     if (strcmp(arg[i],"weight") == 0) nimbalance++;
424   if (nimbalance) imbalances = new Imbalance*[nimbalance];
425   nimbalance = 0;
426 
427   wtflag = 0;
428   varflag = 0;
429   oldrcb = 0;
430   outflag = 0;
431   int outarg = 0;
432   fp = nullptr;
433 
434   while (iarg < narg) {
435     if (strcmp(arg[iarg],"weight") == 0) {
436       wtflag = 1;
437       Imbalance *imb;
438       int nopt = 0;
439       if (strcmp(arg[iarg+1],"group") == 0) {
440         imb = new ImbalanceGroup(lmp);
441         nopt = imb->options(narg-iarg,arg+iarg+2);
442         imbalances[nimbalance++] = imb;
443       } else if (strcmp(arg[iarg+1],"time") == 0) {
444         imb = new ImbalanceTime(lmp);
445         nopt = imb->options(narg-iarg,arg+iarg+2);
446         imbalances[nimbalance++] = imb;
447       } else if (strcmp(arg[iarg+1],"neigh") == 0) {
448         imb = new ImbalanceNeigh(lmp);
449         nopt = imb->options(narg-iarg,arg+iarg+2);
450         imbalances[nimbalance++] = imb;
451       } else if (strcmp(arg[iarg+1],"var") == 0) {
452         varflag = 1;
453         imb = new ImbalanceVar(lmp);
454         nopt = imb->options(narg-iarg,arg+iarg+2);
455         imbalances[nimbalance++] = imb;
456       } else if (strcmp(arg[iarg+1],"store") == 0) {
457         imb = new ImbalanceStore(lmp);
458         nopt = imb->options(narg-iarg,arg+iarg+2);
459         imbalances[nimbalance++] = imb;
460       } else {
461         error->all(FLERR,"Unknown (fix) balance weight method: {}", arg[iarg+1]);
462       }
463       iarg += 2+nopt;
464 
465     } else if (strcmp(arg[iarg],"old") == 0) {
466       oldrcb = 1;
467       iarg++;
468     } else if (strcmp(arg[iarg],"out") == 0) {
469       if (iarg+2 > narg) error->all(FLERR,"Illegal (fix) balance command");
470       outflag = 1;
471       outarg = iarg+1;
472       iarg += 2;
473     } else error->all(FLERR,"Illegal (fix) balance command");
474   }
475 
476   // output file
477 
478   if (outflag && comm->me == 0) {
479     fp = fopen(arg[outarg],"w");
480     if (fp == nullptr)
481       error->one(FLERR,"Cannot open (fix) balance output file {}: {}",
482                                    arg[outarg], utils::getsyserror());
483   }
484 }
485 
486 /* ----------------------------------------------------------------------
487    allocate per-particle weight storage via FixStore
488    use prefix to distinguish Balance vs FixBalance storage
489    fix could already be allocated if fix balance is re-specified
490 ------------------------------------------------------------------------- */
491 
weight_storage(char * prefix)492 void Balance::weight_storage(char *prefix)
493 {
494   std::string cmd = "";
495 
496   if (prefix) cmd = prefix;
497   cmd += "IMBALANCE_WEIGHTS";
498 
499   int ifix = modify->find_fix(cmd);
500   if (ifix < 1) {
501     cmd += " all STORE peratom 0 1";
502     fixstore = (FixStore *) modify->add_fix(cmd);
503   } else fixstore = (FixStore *) modify->fix[ifix];
504 
505   // do not carry weights with atoms during normal atom migration
506 
507   fixstore->disable = 1;
508 }
509 
510 /* ----------------------------------------------------------------------
511    invoke init() for each Imbalance class
512    flag = 0 for call from Balance, 1 for call from FixBalance
513 ------------------------------------------------------------------------- */
514 
init_imbalance(int flag)515 void Balance::init_imbalance(int flag)
516 {
517   if (!wtflag) return;
518   for (int n = 0; n < nimbalance; n++) imbalances[n]->init(flag);
519 }
520 
521 /* ----------------------------------------------------------------------
522    set weight for each particle
523    via list of Nimbalance classes
524 ------------------------------------------------------------------------- */
525 
set_weights()526 void Balance::set_weights()
527 {
528   if (!wtflag) return;
529   weight = fixstore->vstore;
530 
531   int nlocal = atom->nlocal;
532   for (int i = 0; i < nlocal; i++) weight[i] = 1.0;
533   for (int n = 0; n < nimbalance; n++) imbalances[n]->compute(weight);
534 }
535 
536 /* ----------------------------------------------------------------------
537    calculate imbalance factor based on particle count or particle weights
538    return max = max load per proc
539    return imbalance = max load per proc / ave load per proc
540 ------------------------------------------------------------------------- */
541 
imbalance_factor(double & maxcost)542 double Balance::imbalance_factor(double &maxcost)
543 {
544   double mycost,totalcost;
545 
546   if (wtflag) {
547     weight = fixstore->vstore;
548     int nlocal = atom->nlocal;
549 
550     mycost = 0.0;
551     for (int i = 0; i < nlocal; i++) mycost += weight[i];
552 
553   } else mycost = atom->nlocal;
554 
555   MPI_Allreduce(&mycost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world);
556   MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
557 
558   double imbalance = 1.0;
559   if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
560   return imbalance;
561 }
562 
563 /* ----------------------------------------------------------------------
564    perform balancing via RCB class
565    sortflag = flag for sorting order of received messages by proc ID
566    return list of procs to send my atoms to
567 ------------------------------------------------------------------------- */
568 
bisection(int sortflag)569 int *Balance::bisection(int sortflag)
570 {
571   if (!rcb) rcb = new RCB(lmp);
572 
573   int dim = domain->dimension;
574   int triclinic = domain->triclinic;
575 
576   double *boxlo,*boxhi,*prd;
577 
578   if (triclinic == 0) {
579     boxlo = domain->boxlo;
580     boxhi = domain->boxhi;
581     prd = domain->prd;
582   } else {
583     boxlo = domain->boxlo_lamda;
584     boxhi = domain->boxhi_lamda;
585     prd = domain->prd_lamda;
586   }
587 
588   // shrink-wrap simulation box around atoms for input to RCB
589   // leads to better-shaped sub-boxes when atoms are far from box boundaries
590   // if triclinic, do this in lamda coords
591 
592   double shrink[6],shrinkall[6];
593 
594   shrink[0] = boxhi[0]; shrink[1] = boxhi[1]; shrink[2] = boxhi[2];
595   shrink[3] = boxlo[0]; shrink[4] = boxlo[1]; shrink[5] = boxlo[2];
596 
597   double **x = atom->x;
598   int nlocal = atom->nlocal;
599 
600   if (triclinic) domain->x2lamda(nlocal);
601 
602   for (int i = 0; i < nlocal; i++) {
603     shrink[0] = MIN(shrink[0],x[i][0]);
604     shrink[1] = MIN(shrink[1],x[i][1]);
605     shrink[2] = MIN(shrink[2],x[i][2]);
606     shrink[3] = MAX(shrink[3],x[i][0]);
607     shrink[4] = MAX(shrink[4],x[i][1]);
608     shrink[5] = MAX(shrink[5],x[i][2]);
609   }
610 
611   shrink[3] = -shrink[3]; shrink[4] = -shrink[4]; shrink[5] = -shrink[5];
612   MPI_Allreduce(shrink,shrinkall,6,MPI_DOUBLE,MPI_MIN,world);
613   shrinkall[3] = -shrinkall[3];
614   shrinkall[4] = -shrinkall[4];
615   shrinkall[5] = -shrinkall[5];
616 
617   double *shrinklo = &shrinkall[0];
618   double *shrinkhi = &shrinkall[3];
619 
620   // if shrink size in any dim is zero, use box size in that dim
621 
622   if (shrinklo[0] == shrinkhi[0]) {
623     shrinklo[0] = boxlo[0];
624     shrinkhi[0] = boxhi[0];
625   }
626   if (shrinklo[1] == shrinkhi[1]) {
627     shrinklo[1] = boxlo[1];
628     shrinkhi[1] = boxhi[1];
629   }
630   if (shrinklo[2] == shrinkhi[2]) {
631     shrinklo[2] = boxlo[2];
632     shrinkhi[2] = boxhi[2];
633   }
634 
635   // invoke RCB
636   // then invert() to create list of proc assignments for my atoms
637   // if triclinic, RCB operates on lamda coords
638   // NOTE: (3/2017) can remove undocumented "old" option at some point
639   //       ditto in rcb.cpp, or make it an option
640 
641   if (oldrcb) {
642     if (wtflag) {
643       weight = fixstore->vstore;
644       rcb->compute_old(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
645     } else rcb->compute_old(dim,atom->nlocal,atom->x,nullptr,shrinklo,shrinkhi);
646   } else {
647     if (wtflag) {
648       weight = fixstore->vstore;
649       rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
650     } else rcb->compute(dim,atom->nlocal,atom->x,nullptr,shrinklo,shrinkhi);
651   }
652 
653   if (triclinic) domain->lamda2x(nlocal);
654 
655   rcb->invert(sortflag);
656 
657   // reset RCB lo/hi bounding box to full simulation box as needed
658 
659   double *lo = rcb->lo;
660   double *hi = rcb->hi;
661 
662   if (lo[0] == shrinklo[0]) lo[0] = boxlo[0];
663   if (lo[1] == shrinklo[1]) lo[1] = boxlo[1];
664   if (lo[2] == shrinklo[2]) lo[2] = boxlo[2];
665   if (hi[0] == shrinkhi[0]) hi[0] = boxhi[0];
666   if (hi[1] == shrinkhi[1]) hi[1] = boxhi[1];
667   if (hi[2] == shrinkhi[2]) hi[2] = boxhi[2];
668 
669   // store RCB cut, dim, lo/hi box in CommTiled
670   // cut and lo/hi need to be in fractional form so can
671   // OK if changes by epsilon from what RCB used since atoms
672   //   will subsequently migrate to new owning procs by exchange() anyway
673   // ditto for atoms exactly on lo/hi RCB box boundaries due to ties
674 
675   comm->rcbnew = 1;
676 
677   int idim = rcb->cutdim;
678   if (idim >= 0) comm->rcbcutfrac = (rcb->cut - boxlo[idim]) / prd[idim];
679   else comm->rcbcutfrac = 0.0;
680   comm->rcbcutdim = idim;
681 
682   double (*mysplit)[2] = comm->mysplit;
683 
684   mysplit[0][0] = (lo[0] - boxlo[0]) / prd[0];
685   if (hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
686   else mysplit[0][1] = (hi[0] - boxlo[0]) / prd[0];
687 
688   mysplit[1][0] = (lo[1] - boxlo[1]) / prd[1];
689   if (hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
690   else mysplit[1][1] = (hi[1] - boxlo[1]) / prd[1];
691 
692   mysplit[2][0] = (lo[2] - boxlo[2]) / prd[2];
693   if (hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
694   else mysplit[2][1] = (hi[2] - boxlo[2]) / prd[2];
695 
696   // return list of procs to send my atoms to
697 
698   return rcb->sendproc;
699 }
700 
701 /* ----------------------------------------------------------------------
702    setup static load balance operations
703    called from command and indirectly initially from fix balance
704    set rho = 0 for static balancing
705 ------------------------------------------------------------------------- */
706 
shift_setup_static(char * str)707 void Balance::shift_setup_static(char *str)
708 {
709   shift_allocate = 1;
710 
711   memory->create(proccost,nprocs,"balance:proccost");
712   memory->create(allproccost,nprocs,"balance:allproccost");
713 
714   ndim = strlen(str);
715   bdim = new int[ndim];
716 
717   for (int i = 0; i < ndim; i++) {
718     if (str[i] == 'x') bdim[i] = X;
719     if (str[i] == 'y') bdim[i] = Y;
720     if (str[i] == 'z') bdim[i] = Z;
721   }
722 
723   int max = MAX(comm->procgrid[0],comm->procgrid[1]);
724   max = MAX(max,comm->procgrid[2]);
725 
726   onecost = new double[max];
727   allcost = new double[max];
728   sum = new double[max+1];
729   target = new double[max+1];
730   lo = new double[max+1];
731   hi = new double[max+1];
732   losum = new double[max+1];
733   hisum = new double[max+1];
734 
735   // if current layout is TILED, set initial uniform splits in Comm
736   // this gives starting point to subsequent shift balancing
737 
738   if (comm->layout == Comm::LAYOUT_TILED) {
739     int *procgrid = comm->procgrid;
740     double *xsplit = comm->xsplit;
741     double *ysplit = comm->ysplit;
742     double *zsplit = comm->zsplit;
743 
744     for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
745     for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
746     for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
747     xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
748   }
749 
750   rho = 0;
751 }
752 
753 /* ----------------------------------------------------------------------
754    setup shift load balance operations
755    called from fix balance
756    set rho = 1 to do dynamic balancing after call to shift_setup_static()
757 ------------------------------------------------------------------------- */
758 
shift_setup(char * str,int nitermax_in,double thresh_in)759 void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
760 {
761   shift_setup_static(str);
762   nitermax = nitermax_in;
763   stopthresh = thresh_in;
764   rho = 1;
765 }
766 
767 /* ----------------------------------------------------------------------
768    load balance by changing xyz split proc boundaries in Comm
769    called one time from input script command or many times from fix balance
770    return niter = iteration count
771 ------------------------------------------------------------------------- */
772 
shift()773 int Balance::shift()
774 {
775   int i,j,k,m,np;
776   double mycost,totalcost,boxsize;
777   double *split;
778 
779   // no balancing if no atoms
780 
781   bigint natoms = atom->natoms;
782   if (natoms == 0) return 0;
783 
784   // set delta for 1d balancing = root of threshold
785   // root = # of dimensions being balanced on
786 
787   double delta = pow(stopthresh,1.0/ndim) - 1.0;
788   int *procgrid = comm->procgrid;
789 
790   // all balancing done in lamda coords
791 
792   domain->x2lamda(atom->nlocal);
793 
794   // loop over dimensions in balance string
795 
796   double *prd = domain->prd;
797 
798   int niter = 0;
799   for (int idim = 0; idim < ndim; idim++) {
800 
801     // split = ptr to xyz split in Comm
802 
803     if (bdim[idim] == X) {
804       split = comm->xsplit;
805       boxsize = prd[0];
806     } else if (bdim[idim] == Y) {
807       split = comm->ysplit;
808       boxsize = prd[1];
809     } else if (bdim[idim] == Z) {
810       split = comm->zsplit;
811       boxsize = prd[2];
812     } else continue;
813 
814     // initial count and sum
815 
816     np = procgrid[bdim[idim]];
817     tally(bdim[idim],np,split);
818 
819     // target[i] = desired sum at split I
820 
821     if (wtflag) {
822       weight = fixstore->vstore;
823       int nlocal = atom->nlocal;
824       mycost = 0.0;
825       for (i = 0; i < nlocal; i++) mycost += weight[i];
826     } else mycost = atom->nlocal;
827 
828     MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
829 
830     for (i = 0; i < np; i++) target[i] = totalcost/np * i;
831     target[np] = totalcost;
832 
833     // lo[i] = closest split <= split[i] with a sum <= target
834     // hi[i] = closest split >= split[i] with a sum >= target
835 
836     lo[0] = hi[0] = 0.0;
837     lo[np] = hi[np] = 1.0;
838     losum[0] = hisum[0] = 0.0;
839     losum[np] = hisum[np] = totalcost;
840 
841     for (i = 1; i < np; i++) {
842       for (j = i; j >= 0; j--)
843         if (sum[j] <= target[i]) {
844           lo[i] = split[j];
845           losum[i] = sum[j];
846           break;
847         }
848       for (j = i; j <= np; j++)
849         if (sum[j] >= target[i]) {
850           hi[i] = split[j];
851           hisum[i] = sum[j];
852           break;
853         }
854     }
855 
856     // iterate until balanced
857 
858 #ifdef BALANCE_DEBUG
859     if (me == 0) debug_shift_output(idim,0,np,split);
860 #endif
861 
862     int doneflag;
863     int change = 1;
864     for (m = 0; m < nitermax; m++) {
865       change = adjust(np,split);
866       tally(bdim[idim],np,split);
867       niter++;
868 
869 #ifdef BALANCE_DEBUG
870       if (me == 0) debug_shift_output(idim,m+1,np,split);
871       if (outflag) dumpout(update->ntimestep);
872 #endif
873 
874       // stop if no change in splits, b/c all targets are met exactly
875 
876       if (!change) break;
877 
878       // stop if all split sums are within delta of targets
879       // this is a 1d test of particle count per slice
880       // assumption is that this is sufficient accuracy
881       //   for 3d imbalance factor to reach threshold
882 
883       doneflag = 1;
884       for (i = 1; i < np; i++)
885         if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0;
886       if (doneflag) break;
887     }
888 
889     // eliminate final adjacent splits that are duplicates
890     // can happen if particle distribution is narrow and Nitermax is small
891     // set lo = midpt between splits
892     // spread duplicates out evenly between bounding midpts with non-duplicates
893     // i,j = lo/hi indices of set of duplicate splits
894     // delta = new spacing between duplicates
895     // bounding midpts = lo[i-1] and lo[j]
896 
897     int duplicate = 0;
898     for (i = 1; i < np-1; i++)
899       if (split[i] == split[i+1]) duplicate = 1;
900     if (duplicate) {
901       for (i = 0; i < np; i++)
902         lo[i] = 0.5 * (split[i] + split[i+1]);
903       i = 1;
904       while (i < np-1) {
905         j = i+1;
906         while (split[j] == split[i]) j++;
907         j--;
908         if (j > i) {
909           delta = (lo[j] - lo[i-1]) / (j-i+2);
910           for (k = i; k <= j; k++)
911             split[k] = lo[i-1] + (k-i+1)*delta;
912         }
913         i = j+1;
914       }
915     }
916 
917     // adjust adjacent splits that are too close (within neigh skin)
918     // do this with minimal adjustment to splits
919 
920     double close = (1.0+EPSNEIGH) * neighbor->skin / boxsize;
921     double midpt,start,stop,lbound,ubound,spacing;
922 
923     i = 0;
924     while (i < np) {
925       if (split[i+1] - split[i] < close) {
926         j = i+1;
927 
928         // I,J = set of consecutive splits that are collectively too close
929         // if can expand set and not become too close to splits I-1 or J+1, do it
930         // else add split I-1 or J+1 to set and try again
931         // delta = size of expanded split set that will satisy criterion
932 
933         while (1) {
934           delta = (j-i) * close;
935           midpt = 0.5 * (split[i]+split[j]);
936           start = midpt - 0.5*delta;
937           stop = midpt + 0.5*delta;
938 
939           if (i > 0) lbound = split[i-1] + close;
940           else lbound = 0.0;
941           if (j < np) ubound = split[j+1] - close;
942           else ubound = 1.0;
943 
944           // start/stop are within bounds, reset the splits
945 
946           if (start >= lbound && stop <= ubound) break;
947 
948           // try a shift to either bound, reset the splits if delta fits
949           // these tests change start/stop
950 
951           if (start < lbound) {
952             start = lbound;
953             stop = start + delta;
954             if (stop <= ubound) break;
955           } else if (stop > ubound) {
956             stop = ubound;
957             start = stop - delta;
958             if (start >= lbound) break;
959           }
960 
961           // delta does not fit between lbound and ubound
962           // exit if can't expand set, else expand set
963           // if can expand in either direction,
964           //   pick new split closest to current midpt of set
965 
966           if (i == 0 && j == np) {
967             start = 0.0; stop = 1.0;
968             break;
969           }
970           if (i == 0) j++;
971           else if (j == np) i--;
972           else if (midpt-lbound < ubound-midpt) i--;
973           else j++;
974         }
975 
976         // reset all splits between I,J inclusive to be equi-spaced
977 
978         spacing = (stop-start) / (j-i);
979         for (m = i; m <= j; m++)
980           split[m] = start + (m-i)*spacing;
981         if (j == np) split[np] = 1.0;
982 
983         // continue testing beyond the J split
984 
985         i = j+1;
986       } else i++;
987     }
988 
989     // sanity check on bad duplicate or inverted splits
990     // zero or negative width sub-domains will break Comm class
991     // should never happen if recursive multisection algorithm is correct
992 
993     int bad = 0;
994     for (i = 0; i < np; i++)
995       if (split[i] >= split[i+1]) bad = 1;
996     if (bad) error->all(FLERR,"Balance produced bad splits");
997 
998     // stop at this point in bstr if imbalance factor < threshold
999     // this is a true 3d test of particle count per processor
1000 
1001     double imbfactor = imbalance_splits();
1002     if (imbfactor <= stopthresh) break;
1003   }
1004 
1005   // restore real coords
1006 
1007   domain->lamda2x(atom->nlocal);
1008 
1009   return niter;
1010 }
1011 
1012 /* ----------------------------------------------------------------------
1013    count atoms in each slice, based on their dim coordinate
1014    N = # of slices
1015    split = N+1 cuts between N slices
1016    return updated count = particles per slice
1017    return updated sum = cumulative count below each of N+1 splits
1018    use binary search to find which slice each atom is in
1019 ------------------------------------------------------------------------- */
1020 
tally(int dim,int n,double * split)1021 void Balance::tally(int dim, int n, double *split)
1022 {
1023   for (int i = 0; i < n; i++) onecost[i] = 0.0;
1024 
1025   double **x = atom->x;
1026   int nlocal = atom->nlocal;
1027   int index;
1028 
1029   if (wtflag) {
1030     weight = fixstore->vstore;
1031     for (int i = 0; i < nlocal; i++) {
1032       index = utils::binary_search(x[i][dim],n,split);
1033       onecost[index] += weight[i];
1034     }
1035   } else {
1036     for (int i = 0; i < nlocal; i++) {
1037       index = utils::binary_search(x[i][dim],n,split);
1038       onecost[index] += 1.0;
1039     }
1040   }
1041 
1042   MPI_Allreduce(onecost,allcost,n,MPI_DOUBLE,MPI_SUM,world);
1043 
1044   sum[0] = 0.0;
1045   for (int i = 1; i < n+1; i++)
1046     sum[i] = sum[i-1] + allcost[i-1];
1047 }
1048 
1049 /* ----------------------------------------------------------------------
1050    adjust cuts between N slices in a dim via recursive multisectioning method
1051    split = current N+1 cuts, with 0.0 and 1.0 at end points
1052    sum = cumulative count up to each split
1053    target = desired cumulative count up to each split
1054    lo/hi = split values that bound current split
1055    update lo/hi to reflect sums at current split values
1056    overwrite split with new cuts
1057      guaranteed that splits will remain in ascending order,
1058      though adjacent values may be identical
1059    recursive bisectioning zooms in on each cut by halving lo/hi
1060    return 0 if no changes in any splits, b/c they are all perfect
1061 ------------------------------------------------------------------------- */
1062 
adjust(int n,double * split)1063 int Balance::adjust(int n, double *split)
1064 {
1065   int i;
1066   double fraction;
1067 
1068   // reset lo/hi based on current sum and splits
1069   // insure lo is monotonically increasing, ties are OK
1070   // insure hi is monotonically decreasing, ties are OK
1071   // this effectively uses info from nearby splits
1072   // to possibly tighten bounds on lo/hi
1073 
1074   for (i = 1; i < n; i++) {
1075     if (sum[i] <= target[i]) {
1076       lo[i] = split[i];
1077       losum[i] = sum[i];
1078     }
1079     if (sum[i] >= target[i]) {
1080       hi[i] = split[i];
1081       hisum[i] = sum[i];
1082     }
1083   }
1084   for (i = 1; i < n; i++)
1085     if (lo[i] < lo[i-1]) {
1086       lo[i] = lo[i-1];
1087       losum[i] = losum[i-1];
1088     }
1089   for (i = n-1; i > 0; i--)
1090     if (hi[i] > hi[i+1]) {
1091       hi[i] = hi[i+1];
1092       hisum[i] = hisum[i+1];
1093     }
1094 
1095   int change = 0;
1096   for (i = 1; i < n; i++)
1097     if (sum[i] != target[i]) {
1098       change = 1;
1099       if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]);
1100       else {
1101         fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]);
1102         split[i] = lo[i] + fraction * (hi[i]-lo[i]);
1103       }
1104     }
1105   return change;
1106 }
1107 
1108 /* ----------------------------------------------------------------------
1109    calculate imbalance based on processor splits in 3 dims
1110    atoms must be in lamda coords (0-1) before called
1111    map particles to 3d grid of procs
1112    return imbalance factor = max load per proc / ave load per proc
1113 ------------------------------------------------------------------------- */
1114 
imbalance_splits()1115 double Balance::imbalance_splits()
1116 {
1117   double *xsplit = comm->xsplit;
1118   double *ysplit = comm->ysplit;
1119   double *zsplit = comm->zsplit;
1120 
1121   int nx = comm->procgrid[0];
1122   int ny = comm->procgrid[1];
1123   int nz = comm->procgrid[2];
1124 
1125   for (int i = 0; i < nprocs; i++) proccost[i] = 0.0;
1126 
1127   double **x = atom->x;
1128   int nlocal = atom->nlocal;
1129   int ix,iy,iz;
1130 
1131   if (wtflag) {
1132     weight = fixstore->vstore;
1133     for (int i = 0; i < nlocal; i++) {
1134       ix = utils::binary_search(x[i][0],nx,xsplit);
1135       iy = utils::binary_search(x[i][1],ny,ysplit);
1136       iz = utils::binary_search(x[i][2],nz,zsplit);
1137       proccost[iz*nx*ny + iy*nx + ix] += weight[i];
1138     }
1139   } else {
1140     for (int i = 0; i < nlocal; i++) {
1141       ix = utils::binary_search(x[i][0],nx,xsplit);
1142       iy = utils::binary_search(x[i][1],ny,ysplit);
1143       iz = utils::binary_search(x[i][2],nz,zsplit);
1144       proccost[iz*nx*ny + iy*nx + ix] += 1.0;
1145     }
1146   }
1147 
1148   // one proc's particles may map to many partitions, so must Allreduce
1149 
1150   MPI_Allreduce(proccost,allproccost,nprocs,MPI_DOUBLE,MPI_SUM,world);
1151 
1152   double maxcost = 0.0;
1153   double totalcost = 0.0;
1154   for (int i = 0; i < nprocs; i++) {
1155     maxcost = MAX(maxcost,allproccost[i]);
1156     totalcost += allproccost[i];
1157   }
1158 
1159   double imbalance = 1.0;
1160   if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
1161   return imbalance;
1162 }
1163 
1164 /* ----------------------------------------------------------------------
1165    write dump snapshot of line segments in Pizza.py mdump mesh format
1166    write xy lines around each proc's sub-domain for 2d
1167    write xyz cubes around each proc's sub-domain for 3d
1168    only called by proc 0
1169    NOTE: only implemented for orthogonal boxes, not triclinic
1170 ------------------------------------------------------------------------- */
1171 
dumpout(bigint tstep)1172 void Balance::dumpout(bigint tstep)
1173 {
1174   int dimension = domain->dimension;
1175   int triclinic = domain->triclinic;
1176 
1177   // Allgather each proc's sub-box
1178   // could use Gather, but that requires MPI to alloc memory
1179 
1180   double *lo,*hi;
1181   if (triclinic == 0) {
1182     lo = domain->sublo;
1183     hi = domain->subhi;
1184   } else {
1185     lo = domain->sublo_lamda;
1186     hi = domain->subhi_lamda;
1187   }
1188 
1189   double box[6];
1190   box[0] = lo[0]; box[1] = lo[1]; box[2] = lo[2];
1191   box[3] = hi[0]; box[4] = hi[1]; box[5] = hi[2];
1192 
1193   double **boxall;
1194   memory->create(boxall,nprocs,6,"balance:dumpout");
1195   MPI_Allgather(box,6,MPI_DOUBLE,&boxall[0][0],6,MPI_DOUBLE,world);
1196 
1197   if (me) {
1198     memory->destroy(boxall);
1199     return;
1200   }
1201 
1202   // proc 0 writes out nodal coords
1203   // some will be duplicates
1204 
1205   double *boxlo = domain->boxlo;
1206   double *boxhi = domain->boxhi;
1207 
1208   fmt::print(fp,"ITEM: TIMESTEP\n{}\n",tstep);
1209   fprintf(fp,"ITEM: NUMBER OF NODES\n");
1210   if (dimension == 2) fprintf(fp,"%d\n",4*nprocs);
1211   else fprintf(fp,"%d\n",8*nprocs);
1212   fprintf(fp,"ITEM: BOX BOUNDS\n");
1213   fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
1214   fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
1215   fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
1216   fprintf(fp,"ITEM: NODES\n");
1217 
1218   if (triclinic == 0) {
1219     if (dimension == 2) {
1220       int m = 0;
1221       for (int i = 0; i < nprocs; i++) {
1222         fprintf(fp,"%d %d %g %g %g\n",m+1,1,boxall[i][0],boxall[i][1],0.0);
1223         fprintf(fp,"%d %d %g %g %g\n",m+2,1,boxall[i][3],boxall[i][1],0.0);
1224         fprintf(fp,"%d %d %g %g %g\n",m+3,1,boxall[i][3],boxall[i][4],0.0);
1225         fprintf(fp,"%d %d %g %g %g\n",m+4,1,boxall[i][0],boxall[i][4],0.0);
1226         m += 4;
1227       }
1228     } else {
1229       int m = 0;
1230       for (int i = 0; i < nprocs; i++) {
1231         fprintf(fp,"%d %d %g %g %g\n",m+1,1,
1232                 boxall[i][0],boxall[i][1],boxall[i][2]);
1233         fprintf(fp,"%d %d %g %g %g\n",m+2,1,
1234                 boxall[i][3],boxall[i][1],boxall[i][2]);
1235         fprintf(fp,"%d %d %g %g %g\n",m+3,1,
1236                 boxall[i][3],boxall[i][4],boxall[i][2]);
1237         fprintf(fp,"%d %d %g %g %g\n",m+4,1,
1238                 boxall[i][0],boxall[i][4],boxall[i][2]);
1239         fprintf(fp,"%d %d %g %g %g\n",m+5,1,
1240                 boxall[i][0],boxall[i][1],boxall[i][5]);
1241         fprintf(fp,"%d %d %g %g %g\n",m+6,1,
1242                 boxall[i][3],boxall[i][1],boxall[i][5]);
1243         fprintf(fp,"%d %d %g %g %g\n",m+7,1,
1244                 boxall[i][3],boxall[i][4],boxall[i][5]);
1245         fprintf(fp,"%d %d %g %g %g\n",m+8,1,
1246                 boxall[i][0],boxall[i][4],boxall[i][5]);
1247         m += 8;
1248       }
1249     }
1250 
1251   } else {
1252     double (*bc)[3] = domain->corners;
1253 
1254     if (dimension == 2) {
1255       int m = 0;
1256       for (int i = 0; i < nprocs; i++) {
1257         domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
1258         fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],0.0);
1259         fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],0.0);
1260         fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],0.0);
1261         fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],0.0);
1262         m += 4;
1263       }
1264     } else {
1265       int m = 0;
1266       for (int i = 0; i < nprocs; i++) {
1267         domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
1268         fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],bc[0][2]);
1269         fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],bc[1][2]);
1270         fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],bc[2][2]);
1271         fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],bc[3][2]);
1272         fprintf(fp,"%d %d %g %g %g\n",m+5,1,bc[4][0],bc[4][1],bc[4][2]);
1273         fprintf(fp,"%d %d %g %g %g\n",m+6,1,bc[5][0],bc[5][1],bc[5][2]);
1274         fprintf(fp,"%d %d %g %g %g\n",m+7,1,bc[6][0],bc[6][1],bc[6][2]);
1275         fprintf(fp,"%d %d %g %g %g\n",m+8,1,bc[7][0],bc[7][1],bc[7][2]);
1276         m += 8;
1277       }
1278     }
1279   }
1280 
1281   // write out one square/cube per processor for 2d/3d
1282 
1283   fmt::print(fp,"ITEM: TIMESTEP\n{}\n",tstep);
1284   if (dimension == 2) fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
1285   else fprintf(fp,"ITEM: NUMBER OF CUBES\n");
1286   fprintf(fp,"%d\n",nprocs);
1287   if (dimension == 2) fprintf(fp,"ITEM: SQUARES\n");
1288   else fprintf(fp,"ITEM: CUBES\n");
1289 
1290   if (dimension == 2) {
1291     int m = 0;
1292     for (int i = 0; i < nprocs; i++) {
1293       fprintf(fp,"%d %d %d %d %d %d\n",i+1,1,m+1,m+2,m+3,m+4);
1294       m += 4;
1295     }
1296   } else {
1297     int m = 0;
1298     for (int i = 0; i < nprocs; i++) {
1299       fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
1300               i+1,1,m+1,m+2,m+3,m+4,m+5,m+6,m+7,m+8);
1301       m += 8;
1302     }
1303   }
1304 
1305   memory->destroy(boxall);
1306 }
1307 
1308 /* ----------------------------------------------------------------------
1309    debug output for Idim and count
1310    only called by proc 0
1311 ------------------------------------------------------------------------- */
1312 
1313 #ifdef BALANCE_DEBUG
debug_shift_output(int idim,int m,int np,double * split)1314 void Balance::debug_shift_output(int idim, int m, int np, double *split)
1315 {
1316   int i;
1317   const char *dim = nullptr;
1318 
1319   double *boxlo = domain->boxlo;
1320   double *prd = domain->prd;
1321 
1322   if (bdim[idim] == X) dim = "X";
1323   else if (bdim[idim] == Y) dim = "Y";
1324   else if (bdim[idim] == Z) dim = "Z";
1325   fprintf(stderr,"Dimension %s, Iteration %d\n",dim,m);
1326 
1327   fprintf(stderr,"  Count:");
1328   for (i = 0; i <= np; i++) fmt::print(stderr," {}",count[i]);
1329   fprintf(stderr,"\n");
1330   fprintf(stderr,"  Sum:");
1331   for (i = 0; i <= np; i++) fmt::print(stderr," {}",sum[i]);
1332   fprintf(stderr,"\n");
1333   fprintf(stderr,"  Target:");
1334   for (i = 0; i <= np; i++) fmt::print(stderr," {}",target[i]);
1335   fprintf(stderr,"\n");
1336   fprintf(stderr,"  Actual cut:");
1337   for (i = 0; i <= np; i++)
1338     fprintf(stderr," %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]);
1339   fprintf(stderr,"\n");
1340   fprintf(stderr,"  Split:");
1341   for (i = 0; i <= np; i++) fprintf(stderr," %g",split[i]);
1342   fprintf(stderr,"\n");
1343   fprintf(stderr,"  Low:");
1344   for (i = 0; i <= np; i++) fprintf(stderr," %g",lo[i]);
1345   fprintf(stderr,"\n");
1346   fprintf(stderr,"  Low-sum:");
1347   for (i = 0; i <= np; i++) fmt::print(stderr," {}",losum[i]);
1348   fprintf(stderr,"\n");
1349   fprintf(stderr,"  Hi:");
1350   for (i = 0; i <= np; i++) fprintf(stderr," %g",hi[i]);
1351   fprintf(stderr,"\n");
1352   fprintf(stderr,"  Hi-sum:");
1353   for (i = 0; i <= np; i++) fmt::print(stderr," {}",hisum[i]);
1354   fprintf(stderr,"\n");
1355   fprintf(stderr,"  Delta:");
1356   for (i = 0; i < np; i++) fprintf(stderr," %g",split[i+1]-split[i]);
1357   fprintf(stderr,"\n");
1358 }
1359 #endif
1360