1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing authors, for weighted balancing:
17 Axel Kohlmeyer (Temple U), Iain Bethune (EPCC)
18 ------------------------------------------------------------------------- */
19
20 // #define BALANCE_DEBUG 1
21
22 #include "balance.h"
23
24 #include "update.h"
25 #include "atom.h"
26 #include "neighbor.h"
27 #include "comm.h"
28 #include "domain.h"
29 #include "fix_store.h"
30 #include "imbalance.h"
31 #include "imbalance_group.h"
32 #include "imbalance_neigh.h"
33 #include "imbalance_store.h"
34 #include "imbalance_time.h"
35 #include "imbalance_var.h"
36 #include "irregular.h"
37 #include "memory.h"
38 #include "modify.h"
39 #include "rcb.h"
40 #include "error.h"
41
42 #include <cmath>
43 #include <cstring>
44
45 using namespace LAMMPS_NS;
46
47 double EPSNEIGH = 1.0e-3;
48
49 enum{XYZ,SHIFT,BISECTION};
50 enum{NONE,UNIFORM,USER};
51 enum{X,Y,Z};
52
53 /* ---------------------------------------------------------------------- */
54
Balance(LAMMPS * lmp)55 Balance::Balance(LAMMPS *lmp) : Command(lmp)
56 {
57 MPI_Comm_rank(world,&me);
58 MPI_Comm_size(world,&nprocs);
59
60 user_xsplit = user_ysplit = user_zsplit = nullptr;
61 shift_allocate = 0;
62 proccost = allproccost = nullptr;
63
64 rcb = nullptr;
65
66 nimbalance = 0;
67 imbalances = nullptr;
68 fixstore = nullptr;
69
70 fp = nullptr;
71 firststep = 1;
72 }
73
74 /* ---------------------------------------------------------------------- */
75
~Balance()76 Balance::~Balance()
77 {
78 memory->destroy(proccost);
79 memory->destroy(allproccost);
80
81 delete [] user_xsplit;
82 delete [] user_ysplit;
83 delete [] user_zsplit;
84
85 if (shift_allocate) {
86 delete [] bdim;
87 delete [] onecost;
88 delete [] allcost;
89 delete [] sum;
90 delete [] target;
91 delete [] lo;
92 delete [] hi;
93 delete [] losum;
94 delete [] hisum;
95 }
96
97 delete rcb;
98
99 for (int i = 0; i < nimbalance; i++) delete imbalances[i];
100 delete [] imbalances;
101
102 // check nfix in case all fixes have already been deleted
103
104 if (fixstore && modify->nfix) modify->delete_fix(fixstore->id);
105 fixstore = nullptr;
106
107 if (fp) fclose(fp);
108 }
109
110 /* ----------------------------------------------------------------------
111 called as balance command in input script
112 ------------------------------------------------------------------------- */
113
command(int narg,char ** arg)114 void Balance::command(int narg, char **arg)
115 {
116 if (domain->box_exist == 0)
117 error->all(FLERR,"Balance command before simulation box is defined");
118
119 if (me == 0) utils::logmesg(lmp,"Balancing ...\n");
120
121 // parse required arguments
122
123 if (narg < 2) error->all(FLERR,"Illegal balance command");
124
125 thresh = utils::numeric(FLERR,arg[0],false,lmp);
126
127 int dimension = domain->dimension;
128 int *procgrid = comm->procgrid;
129 style = -1;
130 xflag = yflag = zflag = NONE;
131
132 int iarg = 1;
133 while (iarg < narg) {
134 if (strcmp(arg[iarg],"x") == 0) {
135 if (style != -1 && style != XYZ)
136 error->all(FLERR,"Illegal balance command");
137 style = XYZ;
138 if (strcmp(arg[iarg+1],"uniform") == 0) {
139 if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
140 xflag = UNIFORM;
141 iarg += 2;
142 } else {
143 if (1 + procgrid[0]-1 > narg)
144 error->all(FLERR,"Illegal balance command");
145 xflag = USER;
146 delete [] user_xsplit;
147 user_xsplit = new double[procgrid[0]+1];
148 user_xsplit[0] = 0.0;
149 iarg++;
150 for (int i = 1; i < procgrid[0]; i++)
151 user_xsplit[i] = utils::numeric(FLERR,arg[iarg++],false,lmp);
152 user_xsplit[procgrid[0]] = 1.0;
153 }
154 } else if (strcmp(arg[iarg],"y") == 0) {
155 if (style != -1 && style != XYZ)
156 error->all(FLERR,"Illegal balance command");
157 style = XYZ;
158 if (strcmp(arg[iarg+1],"uniform") == 0) {
159 if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
160 yflag = UNIFORM;
161 iarg += 2;
162 } else {
163 if (1 + procgrid[1]-1 > narg)
164 error->all(FLERR,"Illegal balance command");
165 yflag = USER;
166 delete [] user_ysplit;
167 user_ysplit = new double[procgrid[1]+1];
168 user_ysplit[0] = 0.0;
169 iarg++;
170 for (int i = 1; i < procgrid[1]; i++)
171 user_ysplit[i] = utils::numeric(FLERR,arg[iarg++],false,lmp);
172 user_ysplit[procgrid[1]] = 1.0;
173 }
174 } else if (strcmp(arg[iarg],"z") == 0) {
175 if (style != -1 && style != XYZ)
176 error->all(FLERR,"Illegal balance command");
177 style = XYZ;
178 if (strcmp(arg[iarg+1],"uniform") == 0) {
179 if (iarg+2 > narg) error->all(FLERR,"Illegal balance command");
180 zflag = UNIFORM;
181 iarg += 2;
182 } else {
183 if (1 + procgrid[2]-1 > narg)
184 error->all(FLERR,"Illegal balance command");
185 zflag = USER;
186 delete [] user_zsplit;
187 user_zsplit = new double[procgrid[2]+1];
188 user_zsplit[0] = 0.0;
189 iarg++;
190 for (int i = 1; i < procgrid[2]; i++)
191 user_zsplit[i] = utils::numeric(FLERR,arg[iarg++],false,lmp);
192 user_zsplit[procgrid[2]] = 1.0;
193 }
194
195 } else if (strcmp(arg[iarg],"shift") == 0) {
196 if (style != -1) error->all(FLERR,"Illegal balance command");
197 if (iarg+4 > narg) error->all(FLERR,"Illegal balance command");
198 style = SHIFT;
199 if (strlen(arg[iarg+1]) > BSTR_SIZE) error->all(FLERR,"Illegal balance command");
200 strncpy(bstr,arg[iarg+1],BSTR_SIZE+1);
201 nitermax = utils::inumeric(FLERR,arg[iarg+2],false,lmp);
202 if (nitermax <= 0) error->all(FLERR,"Illegal balance command");
203 stopthresh = utils::numeric(FLERR,arg[iarg+3],false,lmp);
204 if (stopthresh < 1.0) error->all(FLERR,"Illegal balance command");
205 iarg += 4;
206
207 } else if (strcmp(arg[iarg],"rcb") == 0) {
208 if (style != -1) error->all(FLERR,"Illegal balance command");
209 style = BISECTION;
210 iarg++;
211
212 } else break;
213 }
214
215 // error checks
216
217 if (style == XYZ) {
218 if (zflag != NONE && dimension == 2)
219 error->all(FLERR,"Cannot balance in z dimension for 2d simulation");
220
221 if (xflag == USER)
222 for (int i = 1; i <= procgrid[0]; i++)
223 if (user_xsplit[i-1] >= user_xsplit[i])
224 error->all(FLERR,"Illegal balance command");
225 if (yflag == USER)
226 for (int i = 1; i <= procgrid[1]; i++)
227 if (user_ysplit[i-1] >= user_ysplit[i])
228 error->all(FLERR,"Illegal balance command");
229 if (zflag == USER)
230 for (int i = 1; i <= procgrid[2]; i++)
231 if (user_zsplit[i-1] >= user_zsplit[i])
232 error->all(FLERR,"Illegal balance command");
233 }
234
235 if (style == SHIFT) {
236 const int blen=strlen(bstr);
237 for (int i = 0; i < blen; i++) {
238 if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
239 error->all(FLERR,"Balance shift string is invalid");
240 if (bstr[i] == 'z' && dimension == 2)
241 error->all(FLERR,"Balance shift string is invalid");
242 for (int j = i+1; j < blen; j++)
243 if (bstr[i] == bstr[j])
244 error->all(FLERR,"Balance shift string is invalid");
245 }
246 }
247
248 if (style == BISECTION && comm->style == 0)
249 error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
250
251 // process remaining optional args
252
253 options(iarg,narg,arg);
254 if (wtflag) weight_storage(nullptr);
255
256 // insure particles are in current box & update box via shrink-wrap
257 // init entire system since comm->setup is done
258 // comm::init needs neighbor::init needs pair::init needs kspace::init, etc
259 // must reset atom map after exchange() since it clears it
260
261 MPI_Barrier(world);
262 double start_time = MPI_Wtime();
263
264 lmp->init();
265
266 if (domain->triclinic) domain->x2lamda(atom->nlocal);
267 domain->pbc();
268 domain->reset_box();
269 comm->setup();
270 comm->exchange();
271 if (atom->map_style != Atom::MAP_NONE) atom->map_set();
272 if (domain->triclinic) domain->lamda2x(atom->nlocal);
273
274 // imbinit = initial imbalance
275
276 double maxinit;
277 init_imbalance(0);
278 set_weights();
279 double imbinit = imbalance_factor(maxinit);
280
281 // no load-balance if imbalance doesn't exceed threshold
282 // unless switching from tiled to non tiled layout, then force rebalance
283
284 if (comm->layout == Comm::LAYOUT_TILED && style != BISECTION) {
285 } else if (imbinit < thresh) return;
286
287 // debug output of initial state
288
289 #ifdef BALANCE_DEBUG
290 if (outflag) dumpout(update->ntimestep);
291 #endif
292
293 int niter = 0;
294
295 // perform load-balance
296 // style XYZ = explicit setting of cutting planes of logical 3d grid
297
298 if (style == XYZ) {
299 if (comm->layout == Comm::LAYOUT_UNIFORM) {
300 if (xflag == USER || yflag == USER || zflag == USER)
301 comm->layout = Comm::LAYOUT_NONUNIFORM;
302 } else if (comm->layout == Comm::LAYOUT_NONUNIFORM) {
303 if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
304 comm->layout = Comm::LAYOUT_UNIFORM;
305 } else if (comm->layout == Comm::LAYOUT_TILED) {
306 if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
307 comm->layout = Comm::LAYOUT_UNIFORM;
308 else comm->layout = Comm::LAYOUT_NONUNIFORM;
309 }
310
311 if (xflag == UNIFORM) {
312 for (int i = 0; i < procgrid[0]; i++)
313 comm->xsplit[i] = i * 1.0/procgrid[0];
314 comm->xsplit[procgrid[0]] = 1.0;
315 } else if (xflag == USER)
316 for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
317
318 if (yflag == UNIFORM) {
319 for (int i = 0; i < procgrid[1]; i++)
320 comm->ysplit[i] = i * 1.0/procgrid[1];
321 comm->ysplit[procgrid[1]] = 1.0;
322 } else if (yflag == USER)
323 for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
324
325 if (zflag == UNIFORM) {
326 for (int i = 0; i < procgrid[2]; i++)
327 comm->zsplit[i] = i * 1.0/procgrid[2];
328 comm->zsplit[procgrid[2]] = 1.0;
329 } else if (zflag == USER)
330 for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
331 }
332
333 // style SHIFT = adjust cutting planes of logical 3d grid
334
335 if (style == SHIFT) {
336 comm->layout = Comm::LAYOUT_NONUNIFORM;
337 shift_setup_static(bstr);
338 niter = shift();
339 }
340
341 // style BISECTION = recursive coordinate bisectioning
342
343 if (style == BISECTION) {
344 comm->layout = Comm::LAYOUT_TILED;
345 bisection(1);
346 }
347
348 // reset proc sub-domains
349 // for either brick or tiled comm style
350
351 if (domain->triclinic) domain->set_lamda_box();
352 domain->set_local_box();
353
354 // move particles to new processors via irregular()
355 // set disable = 0, so weights migrate with atoms for imbfinal calculation
356
357 if (domain->triclinic) domain->x2lamda(atom->nlocal);
358 Irregular *irregular = new Irregular(lmp);
359 if (wtflag) fixstore->disable = 0;
360 if (style == BISECTION) irregular->migrate_atoms(1,1,rcb->sendproc);
361 else irregular->migrate_atoms(1);
362 delete irregular;
363 if (domain->triclinic) domain->lamda2x(atom->nlocal);
364
365 // output of final result
366
367 if (outflag) dumpout(update->ntimestep);
368
369 // check if any particles were lost
370
371 bigint natoms;
372 bigint nblocal = atom->nlocal;
373 MPI_Allreduce(&nblocal,&natoms,1,MPI_LMP_BIGINT,MPI_SUM,world);
374 if (natoms != atom->natoms)
375 error->all(FLERR,"Lost atoms via balance: original {} current {}",
376 atom->natoms,natoms);
377
378 // imbfinal = final imbalance
379 // set disable = 1, so weights no longer migrate with atoms
380
381 double maxfinal;
382 double imbfinal = imbalance_factor(maxfinal);
383 if (wtflag) fixstore->disable = 1;
384
385 // stats output
386
387 if (me == 0) {
388 std::string mesg = fmt::format(" rebalancing time: {:.3f} seconds\n",
389 MPI_Wtime()-start_time);
390 mesg += fmt::format(" iteration count = {}\n",niter);
391 for (int i = 0; i < nimbalance; ++i) mesg += imbalances[i]->info();
392 mesg += fmt::format(" initial/final maximal load/proc = {:.8} {:.8}\n"
393 " initial/final imbalance factor = {:.8} {:.8}\n",
394 maxinit,maxfinal,imbinit,imbfinal);
395
396 if (style != BISECTION) {
397 mesg += " x cuts:";
398 for (int i = 0; i <= comm->procgrid[0]; i++)
399 mesg += fmt::format(" {:.8}",comm->xsplit[i]);
400 mesg += "\n y cuts:";
401 for (int i = 0; i <= comm->procgrid[1]; i++)
402 mesg += fmt::format(" {:.8}",comm->ysplit[i]);
403 mesg += "\n z cuts:";
404 for (int i = 0; i <= comm->procgrid[2]; i++)
405 mesg += fmt::format(" {:.8}",comm->zsplit[i]);
406 mesg += "\n";
407 }
408
409 utils::logmesg(lmp,mesg);
410 }
411 }
412
413 /* ----------------------------------------------------------------------
414 process optional command args for Balance and FixBalance
415 ------------------------------------------------------------------------- */
416
options(int iarg,int narg,char ** arg)417 void Balance::options(int iarg, int narg, char **arg)
418 {
419 // count max number of weight settings
420
421 nimbalance = 0;
422 for (int i = iarg; i < narg; i++)
423 if (strcmp(arg[i],"weight") == 0) nimbalance++;
424 if (nimbalance) imbalances = new Imbalance*[nimbalance];
425 nimbalance = 0;
426
427 wtflag = 0;
428 varflag = 0;
429 oldrcb = 0;
430 outflag = 0;
431 int outarg = 0;
432 fp = nullptr;
433
434 while (iarg < narg) {
435 if (strcmp(arg[iarg],"weight") == 0) {
436 wtflag = 1;
437 Imbalance *imb;
438 int nopt = 0;
439 if (strcmp(arg[iarg+1],"group") == 0) {
440 imb = new ImbalanceGroup(lmp);
441 nopt = imb->options(narg-iarg,arg+iarg+2);
442 imbalances[nimbalance++] = imb;
443 } else if (strcmp(arg[iarg+1],"time") == 0) {
444 imb = new ImbalanceTime(lmp);
445 nopt = imb->options(narg-iarg,arg+iarg+2);
446 imbalances[nimbalance++] = imb;
447 } else if (strcmp(arg[iarg+1],"neigh") == 0) {
448 imb = new ImbalanceNeigh(lmp);
449 nopt = imb->options(narg-iarg,arg+iarg+2);
450 imbalances[nimbalance++] = imb;
451 } else if (strcmp(arg[iarg+1],"var") == 0) {
452 varflag = 1;
453 imb = new ImbalanceVar(lmp);
454 nopt = imb->options(narg-iarg,arg+iarg+2);
455 imbalances[nimbalance++] = imb;
456 } else if (strcmp(arg[iarg+1],"store") == 0) {
457 imb = new ImbalanceStore(lmp);
458 nopt = imb->options(narg-iarg,arg+iarg+2);
459 imbalances[nimbalance++] = imb;
460 } else {
461 error->all(FLERR,"Unknown (fix) balance weight method: {}", arg[iarg+1]);
462 }
463 iarg += 2+nopt;
464
465 } else if (strcmp(arg[iarg],"old") == 0) {
466 oldrcb = 1;
467 iarg++;
468 } else if (strcmp(arg[iarg],"out") == 0) {
469 if (iarg+2 > narg) error->all(FLERR,"Illegal (fix) balance command");
470 outflag = 1;
471 outarg = iarg+1;
472 iarg += 2;
473 } else error->all(FLERR,"Illegal (fix) balance command");
474 }
475
476 // output file
477
478 if (outflag && comm->me == 0) {
479 fp = fopen(arg[outarg],"w");
480 if (fp == nullptr)
481 error->one(FLERR,"Cannot open (fix) balance output file {}: {}",
482 arg[outarg], utils::getsyserror());
483 }
484 }
485
486 /* ----------------------------------------------------------------------
487 allocate per-particle weight storage via FixStore
488 use prefix to distinguish Balance vs FixBalance storage
489 fix could already be allocated if fix balance is re-specified
490 ------------------------------------------------------------------------- */
491
weight_storage(char * prefix)492 void Balance::weight_storage(char *prefix)
493 {
494 std::string cmd = "";
495
496 if (prefix) cmd = prefix;
497 cmd += "IMBALANCE_WEIGHTS";
498
499 int ifix = modify->find_fix(cmd);
500 if (ifix < 1) {
501 cmd += " all STORE peratom 0 1";
502 fixstore = (FixStore *) modify->add_fix(cmd);
503 } else fixstore = (FixStore *) modify->fix[ifix];
504
505 // do not carry weights with atoms during normal atom migration
506
507 fixstore->disable = 1;
508 }
509
510 /* ----------------------------------------------------------------------
511 invoke init() for each Imbalance class
512 flag = 0 for call from Balance, 1 for call from FixBalance
513 ------------------------------------------------------------------------- */
514
init_imbalance(int flag)515 void Balance::init_imbalance(int flag)
516 {
517 if (!wtflag) return;
518 for (int n = 0; n < nimbalance; n++) imbalances[n]->init(flag);
519 }
520
521 /* ----------------------------------------------------------------------
522 set weight for each particle
523 via list of Nimbalance classes
524 ------------------------------------------------------------------------- */
525
set_weights()526 void Balance::set_weights()
527 {
528 if (!wtflag) return;
529 weight = fixstore->vstore;
530
531 int nlocal = atom->nlocal;
532 for (int i = 0; i < nlocal; i++) weight[i] = 1.0;
533 for (int n = 0; n < nimbalance; n++) imbalances[n]->compute(weight);
534 }
535
536 /* ----------------------------------------------------------------------
537 calculate imbalance factor based on particle count or particle weights
538 return max = max load per proc
539 return imbalance = max load per proc / ave load per proc
540 ------------------------------------------------------------------------- */
541
imbalance_factor(double & maxcost)542 double Balance::imbalance_factor(double &maxcost)
543 {
544 double mycost,totalcost;
545
546 if (wtflag) {
547 weight = fixstore->vstore;
548 int nlocal = atom->nlocal;
549
550 mycost = 0.0;
551 for (int i = 0; i < nlocal; i++) mycost += weight[i];
552
553 } else mycost = atom->nlocal;
554
555 MPI_Allreduce(&mycost,&maxcost,1,MPI_DOUBLE,MPI_MAX,world);
556 MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
557
558 double imbalance = 1.0;
559 if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
560 return imbalance;
561 }
562
563 /* ----------------------------------------------------------------------
564 perform balancing via RCB class
565 sortflag = flag for sorting order of received messages by proc ID
566 return list of procs to send my atoms to
567 ------------------------------------------------------------------------- */
568
bisection(int sortflag)569 int *Balance::bisection(int sortflag)
570 {
571 if (!rcb) rcb = new RCB(lmp);
572
573 int dim = domain->dimension;
574 int triclinic = domain->triclinic;
575
576 double *boxlo,*boxhi,*prd;
577
578 if (triclinic == 0) {
579 boxlo = domain->boxlo;
580 boxhi = domain->boxhi;
581 prd = domain->prd;
582 } else {
583 boxlo = domain->boxlo_lamda;
584 boxhi = domain->boxhi_lamda;
585 prd = domain->prd_lamda;
586 }
587
588 // shrink-wrap simulation box around atoms for input to RCB
589 // leads to better-shaped sub-boxes when atoms are far from box boundaries
590 // if triclinic, do this in lamda coords
591
592 double shrink[6],shrinkall[6];
593
594 shrink[0] = boxhi[0]; shrink[1] = boxhi[1]; shrink[2] = boxhi[2];
595 shrink[3] = boxlo[0]; shrink[4] = boxlo[1]; shrink[5] = boxlo[2];
596
597 double **x = atom->x;
598 int nlocal = atom->nlocal;
599
600 if (triclinic) domain->x2lamda(nlocal);
601
602 for (int i = 0; i < nlocal; i++) {
603 shrink[0] = MIN(shrink[0],x[i][0]);
604 shrink[1] = MIN(shrink[1],x[i][1]);
605 shrink[2] = MIN(shrink[2],x[i][2]);
606 shrink[3] = MAX(shrink[3],x[i][0]);
607 shrink[4] = MAX(shrink[4],x[i][1]);
608 shrink[5] = MAX(shrink[5],x[i][2]);
609 }
610
611 shrink[3] = -shrink[3]; shrink[4] = -shrink[4]; shrink[5] = -shrink[5];
612 MPI_Allreduce(shrink,shrinkall,6,MPI_DOUBLE,MPI_MIN,world);
613 shrinkall[3] = -shrinkall[3];
614 shrinkall[4] = -shrinkall[4];
615 shrinkall[5] = -shrinkall[5];
616
617 double *shrinklo = &shrinkall[0];
618 double *shrinkhi = &shrinkall[3];
619
620 // if shrink size in any dim is zero, use box size in that dim
621
622 if (shrinklo[0] == shrinkhi[0]) {
623 shrinklo[0] = boxlo[0];
624 shrinkhi[0] = boxhi[0];
625 }
626 if (shrinklo[1] == shrinkhi[1]) {
627 shrinklo[1] = boxlo[1];
628 shrinkhi[1] = boxhi[1];
629 }
630 if (shrinklo[2] == shrinkhi[2]) {
631 shrinklo[2] = boxlo[2];
632 shrinkhi[2] = boxhi[2];
633 }
634
635 // invoke RCB
636 // then invert() to create list of proc assignments for my atoms
637 // if triclinic, RCB operates on lamda coords
638 // NOTE: (3/2017) can remove undocumented "old" option at some point
639 // ditto in rcb.cpp, or make it an option
640
641 if (oldrcb) {
642 if (wtflag) {
643 weight = fixstore->vstore;
644 rcb->compute_old(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
645 } else rcb->compute_old(dim,atom->nlocal,atom->x,nullptr,shrinklo,shrinkhi);
646 } else {
647 if (wtflag) {
648 weight = fixstore->vstore;
649 rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
650 } else rcb->compute(dim,atom->nlocal,atom->x,nullptr,shrinklo,shrinkhi);
651 }
652
653 if (triclinic) domain->lamda2x(nlocal);
654
655 rcb->invert(sortflag);
656
657 // reset RCB lo/hi bounding box to full simulation box as needed
658
659 double *lo = rcb->lo;
660 double *hi = rcb->hi;
661
662 if (lo[0] == shrinklo[0]) lo[0] = boxlo[0];
663 if (lo[1] == shrinklo[1]) lo[1] = boxlo[1];
664 if (lo[2] == shrinklo[2]) lo[2] = boxlo[2];
665 if (hi[0] == shrinkhi[0]) hi[0] = boxhi[0];
666 if (hi[1] == shrinkhi[1]) hi[1] = boxhi[1];
667 if (hi[2] == shrinkhi[2]) hi[2] = boxhi[2];
668
669 // store RCB cut, dim, lo/hi box in CommTiled
670 // cut and lo/hi need to be in fractional form so can
671 // OK if changes by epsilon from what RCB used since atoms
672 // will subsequently migrate to new owning procs by exchange() anyway
673 // ditto for atoms exactly on lo/hi RCB box boundaries due to ties
674
675 comm->rcbnew = 1;
676
677 int idim = rcb->cutdim;
678 if (idim >= 0) comm->rcbcutfrac = (rcb->cut - boxlo[idim]) / prd[idim];
679 else comm->rcbcutfrac = 0.0;
680 comm->rcbcutdim = idim;
681
682 double (*mysplit)[2] = comm->mysplit;
683
684 mysplit[0][0] = (lo[0] - boxlo[0]) / prd[0];
685 if (hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
686 else mysplit[0][1] = (hi[0] - boxlo[0]) / prd[0];
687
688 mysplit[1][0] = (lo[1] - boxlo[1]) / prd[1];
689 if (hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
690 else mysplit[1][1] = (hi[1] - boxlo[1]) / prd[1];
691
692 mysplit[2][0] = (lo[2] - boxlo[2]) / prd[2];
693 if (hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
694 else mysplit[2][1] = (hi[2] - boxlo[2]) / prd[2];
695
696 // return list of procs to send my atoms to
697
698 return rcb->sendproc;
699 }
700
701 /* ----------------------------------------------------------------------
702 setup static load balance operations
703 called from command and indirectly initially from fix balance
704 set rho = 0 for static balancing
705 ------------------------------------------------------------------------- */
706
shift_setup_static(char * str)707 void Balance::shift_setup_static(char *str)
708 {
709 shift_allocate = 1;
710
711 memory->create(proccost,nprocs,"balance:proccost");
712 memory->create(allproccost,nprocs,"balance:allproccost");
713
714 ndim = strlen(str);
715 bdim = new int[ndim];
716
717 for (int i = 0; i < ndim; i++) {
718 if (str[i] == 'x') bdim[i] = X;
719 if (str[i] == 'y') bdim[i] = Y;
720 if (str[i] == 'z') bdim[i] = Z;
721 }
722
723 int max = MAX(comm->procgrid[0],comm->procgrid[1]);
724 max = MAX(max,comm->procgrid[2]);
725
726 onecost = new double[max];
727 allcost = new double[max];
728 sum = new double[max+1];
729 target = new double[max+1];
730 lo = new double[max+1];
731 hi = new double[max+1];
732 losum = new double[max+1];
733 hisum = new double[max+1];
734
735 // if current layout is TILED, set initial uniform splits in Comm
736 // this gives starting point to subsequent shift balancing
737
738 if (comm->layout == Comm::LAYOUT_TILED) {
739 int *procgrid = comm->procgrid;
740 double *xsplit = comm->xsplit;
741 double *ysplit = comm->ysplit;
742 double *zsplit = comm->zsplit;
743
744 for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
745 for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
746 for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
747 xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
748 }
749
750 rho = 0;
751 }
752
753 /* ----------------------------------------------------------------------
754 setup shift load balance operations
755 called from fix balance
756 set rho = 1 to do dynamic balancing after call to shift_setup_static()
757 ------------------------------------------------------------------------- */
758
shift_setup(char * str,int nitermax_in,double thresh_in)759 void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
760 {
761 shift_setup_static(str);
762 nitermax = nitermax_in;
763 stopthresh = thresh_in;
764 rho = 1;
765 }
766
767 /* ----------------------------------------------------------------------
768 load balance by changing xyz split proc boundaries in Comm
769 called one time from input script command or many times from fix balance
770 return niter = iteration count
771 ------------------------------------------------------------------------- */
772
shift()773 int Balance::shift()
774 {
775 int i,j,k,m,np;
776 double mycost,totalcost,boxsize;
777 double *split;
778
779 // no balancing if no atoms
780
781 bigint natoms = atom->natoms;
782 if (natoms == 0) return 0;
783
784 // set delta for 1d balancing = root of threshold
785 // root = # of dimensions being balanced on
786
787 double delta = pow(stopthresh,1.0/ndim) - 1.0;
788 int *procgrid = comm->procgrid;
789
790 // all balancing done in lamda coords
791
792 domain->x2lamda(atom->nlocal);
793
794 // loop over dimensions in balance string
795
796 double *prd = domain->prd;
797
798 int niter = 0;
799 for (int idim = 0; idim < ndim; idim++) {
800
801 // split = ptr to xyz split in Comm
802
803 if (bdim[idim] == X) {
804 split = comm->xsplit;
805 boxsize = prd[0];
806 } else if (bdim[idim] == Y) {
807 split = comm->ysplit;
808 boxsize = prd[1];
809 } else if (bdim[idim] == Z) {
810 split = comm->zsplit;
811 boxsize = prd[2];
812 } else continue;
813
814 // initial count and sum
815
816 np = procgrid[bdim[idim]];
817 tally(bdim[idim],np,split);
818
819 // target[i] = desired sum at split I
820
821 if (wtflag) {
822 weight = fixstore->vstore;
823 int nlocal = atom->nlocal;
824 mycost = 0.0;
825 for (i = 0; i < nlocal; i++) mycost += weight[i];
826 } else mycost = atom->nlocal;
827
828 MPI_Allreduce(&mycost,&totalcost,1,MPI_DOUBLE,MPI_SUM,world);
829
830 for (i = 0; i < np; i++) target[i] = totalcost/np * i;
831 target[np] = totalcost;
832
833 // lo[i] = closest split <= split[i] with a sum <= target
834 // hi[i] = closest split >= split[i] with a sum >= target
835
836 lo[0] = hi[0] = 0.0;
837 lo[np] = hi[np] = 1.0;
838 losum[0] = hisum[0] = 0.0;
839 losum[np] = hisum[np] = totalcost;
840
841 for (i = 1; i < np; i++) {
842 for (j = i; j >= 0; j--)
843 if (sum[j] <= target[i]) {
844 lo[i] = split[j];
845 losum[i] = sum[j];
846 break;
847 }
848 for (j = i; j <= np; j++)
849 if (sum[j] >= target[i]) {
850 hi[i] = split[j];
851 hisum[i] = sum[j];
852 break;
853 }
854 }
855
856 // iterate until balanced
857
858 #ifdef BALANCE_DEBUG
859 if (me == 0) debug_shift_output(idim,0,np,split);
860 #endif
861
862 int doneflag;
863 int change = 1;
864 for (m = 0; m < nitermax; m++) {
865 change = adjust(np,split);
866 tally(bdim[idim],np,split);
867 niter++;
868
869 #ifdef BALANCE_DEBUG
870 if (me == 0) debug_shift_output(idim,m+1,np,split);
871 if (outflag) dumpout(update->ntimestep);
872 #endif
873
874 // stop if no change in splits, b/c all targets are met exactly
875
876 if (!change) break;
877
878 // stop if all split sums are within delta of targets
879 // this is a 1d test of particle count per slice
880 // assumption is that this is sufficient accuracy
881 // for 3d imbalance factor to reach threshold
882
883 doneflag = 1;
884 for (i = 1; i < np; i++)
885 if (fabs(1.0*(sum[i]-target[i]))/target[i] > delta) doneflag = 0;
886 if (doneflag) break;
887 }
888
889 // eliminate final adjacent splits that are duplicates
890 // can happen if particle distribution is narrow and Nitermax is small
891 // set lo = midpt between splits
892 // spread duplicates out evenly between bounding midpts with non-duplicates
893 // i,j = lo/hi indices of set of duplicate splits
894 // delta = new spacing between duplicates
895 // bounding midpts = lo[i-1] and lo[j]
896
897 int duplicate = 0;
898 for (i = 1; i < np-1; i++)
899 if (split[i] == split[i+1]) duplicate = 1;
900 if (duplicate) {
901 for (i = 0; i < np; i++)
902 lo[i] = 0.5 * (split[i] + split[i+1]);
903 i = 1;
904 while (i < np-1) {
905 j = i+1;
906 while (split[j] == split[i]) j++;
907 j--;
908 if (j > i) {
909 delta = (lo[j] - lo[i-1]) / (j-i+2);
910 for (k = i; k <= j; k++)
911 split[k] = lo[i-1] + (k-i+1)*delta;
912 }
913 i = j+1;
914 }
915 }
916
917 // adjust adjacent splits that are too close (within neigh skin)
918 // do this with minimal adjustment to splits
919
920 double close = (1.0+EPSNEIGH) * neighbor->skin / boxsize;
921 double midpt,start,stop,lbound,ubound,spacing;
922
923 i = 0;
924 while (i < np) {
925 if (split[i+1] - split[i] < close) {
926 j = i+1;
927
928 // I,J = set of consecutive splits that are collectively too close
929 // if can expand set and not become too close to splits I-1 or J+1, do it
930 // else add split I-1 or J+1 to set and try again
931 // delta = size of expanded split set that will satisy criterion
932
933 while (1) {
934 delta = (j-i) * close;
935 midpt = 0.5 * (split[i]+split[j]);
936 start = midpt - 0.5*delta;
937 stop = midpt + 0.5*delta;
938
939 if (i > 0) lbound = split[i-1] + close;
940 else lbound = 0.0;
941 if (j < np) ubound = split[j+1] - close;
942 else ubound = 1.0;
943
944 // start/stop are within bounds, reset the splits
945
946 if (start >= lbound && stop <= ubound) break;
947
948 // try a shift to either bound, reset the splits if delta fits
949 // these tests change start/stop
950
951 if (start < lbound) {
952 start = lbound;
953 stop = start + delta;
954 if (stop <= ubound) break;
955 } else if (stop > ubound) {
956 stop = ubound;
957 start = stop - delta;
958 if (start >= lbound) break;
959 }
960
961 // delta does not fit between lbound and ubound
962 // exit if can't expand set, else expand set
963 // if can expand in either direction,
964 // pick new split closest to current midpt of set
965
966 if (i == 0 && j == np) {
967 start = 0.0; stop = 1.0;
968 break;
969 }
970 if (i == 0) j++;
971 else if (j == np) i--;
972 else if (midpt-lbound < ubound-midpt) i--;
973 else j++;
974 }
975
976 // reset all splits between I,J inclusive to be equi-spaced
977
978 spacing = (stop-start) / (j-i);
979 for (m = i; m <= j; m++)
980 split[m] = start + (m-i)*spacing;
981 if (j == np) split[np] = 1.0;
982
983 // continue testing beyond the J split
984
985 i = j+1;
986 } else i++;
987 }
988
989 // sanity check on bad duplicate or inverted splits
990 // zero or negative width sub-domains will break Comm class
991 // should never happen if recursive multisection algorithm is correct
992
993 int bad = 0;
994 for (i = 0; i < np; i++)
995 if (split[i] >= split[i+1]) bad = 1;
996 if (bad) error->all(FLERR,"Balance produced bad splits");
997
998 // stop at this point in bstr if imbalance factor < threshold
999 // this is a true 3d test of particle count per processor
1000
1001 double imbfactor = imbalance_splits();
1002 if (imbfactor <= stopthresh) break;
1003 }
1004
1005 // restore real coords
1006
1007 domain->lamda2x(atom->nlocal);
1008
1009 return niter;
1010 }
1011
1012 /* ----------------------------------------------------------------------
1013 count atoms in each slice, based on their dim coordinate
1014 N = # of slices
1015 split = N+1 cuts between N slices
1016 return updated count = particles per slice
1017 return updated sum = cumulative count below each of N+1 splits
1018 use binary search to find which slice each atom is in
1019 ------------------------------------------------------------------------- */
1020
tally(int dim,int n,double * split)1021 void Balance::tally(int dim, int n, double *split)
1022 {
1023 for (int i = 0; i < n; i++) onecost[i] = 0.0;
1024
1025 double **x = atom->x;
1026 int nlocal = atom->nlocal;
1027 int index;
1028
1029 if (wtflag) {
1030 weight = fixstore->vstore;
1031 for (int i = 0; i < nlocal; i++) {
1032 index = utils::binary_search(x[i][dim],n,split);
1033 onecost[index] += weight[i];
1034 }
1035 } else {
1036 for (int i = 0; i < nlocal; i++) {
1037 index = utils::binary_search(x[i][dim],n,split);
1038 onecost[index] += 1.0;
1039 }
1040 }
1041
1042 MPI_Allreduce(onecost,allcost,n,MPI_DOUBLE,MPI_SUM,world);
1043
1044 sum[0] = 0.0;
1045 for (int i = 1; i < n+1; i++)
1046 sum[i] = sum[i-1] + allcost[i-1];
1047 }
1048
1049 /* ----------------------------------------------------------------------
1050 adjust cuts between N slices in a dim via recursive multisectioning method
1051 split = current N+1 cuts, with 0.0 and 1.0 at end points
1052 sum = cumulative count up to each split
1053 target = desired cumulative count up to each split
1054 lo/hi = split values that bound current split
1055 update lo/hi to reflect sums at current split values
1056 overwrite split with new cuts
1057 guaranteed that splits will remain in ascending order,
1058 though adjacent values may be identical
1059 recursive bisectioning zooms in on each cut by halving lo/hi
1060 return 0 if no changes in any splits, b/c they are all perfect
1061 ------------------------------------------------------------------------- */
1062
adjust(int n,double * split)1063 int Balance::adjust(int n, double *split)
1064 {
1065 int i;
1066 double fraction;
1067
1068 // reset lo/hi based on current sum and splits
1069 // insure lo is monotonically increasing, ties are OK
1070 // insure hi is monotonically decreasing, ties are OK
1071 // this effectively uses info from nearby splits
1072 // to possibly tighten bounds on lo/hi
1073
1074 for (i = 1; i < n; i++) {
1075 if (sum[i] <= target[i]) {
1076 lo[i] = split[i];
1077 losum[i] = sum[i];
1078 }
1079 if (sum[i] >= target[i]) {
1080 hi[i] = split[i];
1081 hisum[i] = sum[i];
1082 }
1083 }
1084 for (i = 1; i < n; i++)
1085 if (lo[i] < lo[i-1]) {
1086 lo[i] = lo[i-1];
1087 losum[i] = losum[i-1];
1088 }
1089 for (i = n-1; i > 0; i--)
1090 if (hi[i] > hi[i+1]) {
1091 hi[i] = hi[i+1];
1092 hisum[i] = hisum[i+1];
1093 }
1094
1095 int change = 0;
1096 for (i = 1; i < n; i++)
1097 if (sum[i] != target[i]) {
1098 change = 1;
1099 if (rho == 0) split[i] = 0.5 * (lo[i]+hi[i]);
1100 else {
1101 fraction = 1.0*(target[i]-losum[i]) / (hisum[i]-losum[i]);
1102 split[i] = lo[i] + fraction * (hi[i]-lo[i]);
1103 }
1104 }
1105 return change;
1106 }
1107
1108 /* ----------------------------------------------------------------------
1109 calculate imbalance based on processor splits in 3 dims
1110 atoms must be in lamda coords (0-1) before called
1111 map particles to 3d grid of procs
1112 return imbalance factor = max load per proc / ave load per proc
1113 ------------------------------------------------------------------------- */
1114
imbalance_splits()1115 double Balance::imbalance_splits()
1116 {
1117 double *xsplit = comm->xsplit;
1118 double *ysplit = comm->ysplit;
1119 double *zsplit = comm->zsplit;
1120
1121 int nx = comm->procgrid[0];
1122 int ny = comm->procgrid[1];
1123 int nz = comm->procgrid[2];
1124
1125 for (int i = 0; i < nprocs; i++) proccost[i] = 0.0;
1126
1127 double **x = atom->x;
1128 int nlocal = atom->nlocal;
1129 int ix,iy,iz;
1130
1131 if (wtflag) {
1132 weight = fixstore->vstore;
1133 for (int i = 0; i < nlocal; i++) {
1134 ix = utils::binary_search(x[i][0],nx,xsplit);
1135 iy = utils::binary_search(x[i][1],ny,ysplit);
1136 iz = utils::binary_search(x[i][2],nz,zsplit);
1137 proccost[iz*nx*ny + iy*nx + ix] += weight[i];
1138 }
1139 } else {
1140 for (int i = 0; i < nlocal; i++) {
1141 ix = utils::binary_search(x[i][0],nx,xsplit);
1142 iy = utils::binary_search(x[i][1],ny,ysplit);
1143 iz = utils::binary_search(x[i][2],nz,zsplit);
1144 proccost[iz*nx*ny + iy*nx + ix] += 1.0;
1145 }
1146 }
1147
1148 // one proc's particles may map to many partitions, so must Allreduce
1149
1150 MPI_Allreduce(proccost,allproccost,nprocs,MPI_DOUBLE,MPI_SUM,world);
1151
1152 double maxcost = 0.0;
1153 double totalcost = 0.0;
1154 for (int i = 0; i < nprocs; i++) {
1155 maxcost = MAX(maxcost,allproccost[i]);
1156 totalcost += allproccost[i];
1157 }
1158
1159 double imbalance = 1.0;
1160 if (maxcost > 0.0) imbalance = maxcost / (totalcost/nprocs);
1161 return imbalance;
1162 }
1163
1164 /* ----------------------------------------------------------------------
1165 write dump snapshot of line segments in Pizza.py mdump mesh format
1166 write xy lines around each proc's sub-domain for 2d
1167 write xyz cubes around each proc's sub-domain for 3d
1168 only called by proc 0
1169 NOTE: only implemented for orthogonal boxes, not triclinic
1170 ------------------------------------------------------------------------- */
1171
dumpout(bigint tstep)1172 void Balance::dumpout(bigint tstep)
1173 {
1174 int dimension = domain->dimension;
1175 int triclinic = domain->triclinic;
1176
1177 // Allgather each proc's sub-box
1178 // could use Gather, but that requires MPI to alloc memory
1179
1180 double *lo,*hi;
1181 if (triclinic == 0) {
1182 lo = domain->sublo;
1183 hi = domain->subhi;
1184 } else {
1185 lo = domain->sublo_lamda;
1186 hi = domain->subhi_lamda;
1187 }
1188
1189 double box[6];
1190 box[0] = lo[0]; box[1] = lo[1]; box[2] = lo[2];
1191 box[3] = hi[0]; box[4] = hi[1]; box[5] = hi[2];
1192
1193 double **boxall;
1194 memory->create(boxall,nprocs,6,"balance:dumpout");
1195 MPI_Allgather(box,6,MPI_DOUBLE,&boxall[0][0],6,MPI_DOUBLE,world);
1196
1197 if (me) {
1198 memory->destroy(boxall);
1199 return;
1200 }
1201
1202 // proc 0 writes out nodal coords
1203 // some will be duplicates
1204
1205 double *boxlo = domain->boxlo;
1206 double *boxhi = domain->boxhi;
1207
1208 fmt::print(fp,"ITEM: TIMESTEP\n{}\n",tstep);
1209 fprintf(fp,"ITEM: NUMBER OF NODES\n");
1210 if (dimension == 2) fprintf(fp,"%d\n",4*nprocs);
1211 else fprintf(fp,"%d\n",8*nprocs);
1212 fprintf(fp,"ITEM: BOX BOUNDS\n");
1213 fprintf(fp,"%g %g\n",boxlo[0],boxhi[0]);
1214 fprintf(fp,"%g %g\n",boxlo[1],boxhi[1]);
1215 fprintf(fp,"%g %g\n",boxlo[2],boxhi[2]);
1216 fprintf(fp,"ITEM: NODES\n");
1217
1218 if (triclinic == 0) {
1219 if (dimension == 2) {
1220 int m = 0;
1221 for (int i = 0; i < nprocs; i++) {
1222 fprintf(fp,"%d %d %g %g %g\n",m+1,1,boxall[i][0],boxall[i][1],0.0);
1223 fprintf(fp,"%d %d %g %g %g\n",m+2,1,boxall[i][3],boxall[i][1],0.0);
1224 fprintf(fp,"%d %d %g %g %g\n",m+3,1,boxall[i][3],boxall[i][4],0.0);
1225 fprintf(fp,"%d %d %g %g %g\n",m+4,1,boxall[i][0],boxall[i][4],0.0);
1226 m += 4;
1227 }
1228 } else {
1229 int m = 0;
1230 for (int i = 0; i < nprocs; i++) {
1231 fprintf(fp,"%d %d %g %g %g\n",m+1,1,
1232 boxall[i][0],boxall[i][1],boxall[i][2]);
1233 fprintf(fp,"%d %d %g %g %g\n",m+2,1,
1234 boxall[i][3],boxall[i][1],boxall[i][2]);
1235 fprintf(fp,"%d %d %g %g %g\n",m+3,1,
1236 boxall[i][3],boxall[i][4],boxall[i][2]);
1237 fprintf(fp,"%d %d %g %g %g\n",m+4,1,
1238 boxall[i][0],boxall[i][4],boxall[i][2]);
1239 fprintf(fp,"%d %d %g %g %g\n",m+5,1,
1240 boxall[i][0],boxall[i][1],boxall[i][5]);
1241 fprintf(fp,"%d %d %g %g %g\n",m+6,1,
1242 boxall[i][3],boxall[i][1],boxall[i][5]);
1243 fprintf(fp,"%d %d %g %g %g\n",m+7,1,
1244 boxall[i][3],boxall[i][4],boxall[i][5]);
1245 fprintf(fp,"%d %d %g %g %g\n",m+8,1,
1246 boxall[i][0],boxall[i][4],boxall[i][5]);
1247 m += 8;
1248 }
1249 }
1250
1251 } else {
1252 double (*bc)[3] = domain->corners;
1253
1254 if (dimension == 2) {
1255 int m = 0;
1256 for (int i = 0; i < nprocs; i++) {
1257 domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
1258 fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],0.0);
1259 fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],0.0);
1260 fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],0.0);
1261 fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],0.0);
1262 m += 4;
1263 }
1264 } else {
1265 int m = 0;
1266 for (int i = 0; i < nprocs; i++) {
1267 domain->lamda_box_corners(&boxall[i][0],&boxall[i][3]);
1268 fprintf(fp,"%d %d %g %g %g\n",m+1,1,bc[0][0],bc[0][1],bc[0][2]);
1269 fprintf(fp,"%d %d %g %g %g\n",m+2,1,bc[1][0],bc[1][1],bc[1][2]);
1270 fprintf(fp,"%d %d %g %g %g\n",m+3,1,bc[2][0],bc[2][1],bc[2][2]);
1271 fprintf(fp,"%d %d %g %g %g\n",m+4,1,bc[3][0],bc[3][1],bc[3][2]);
1272 fprintf(fp,"%d %d %g %g %g\n",m+5,1,bc[4][0],bc[4][1],bc[4][2]);
1273 fprintf(fp,"%d %d %g %g %g\n",m+6,1,bc[5][0],bc[5][1],bc[5][2]);
1274 fprintf(fp,"%d %d %g %g %g\n",m+7,1,bc[6][0],bc[6][1],bc[6][2]);
1275 fprintf(fp,"%d %d %g %g %g\n",m+8,1,bc[7][0],bc[7][1],bc[7][2]);
1276 m += 8;
1277 }
1278 }
1279 }
1280
1281 // write out one square/cube per processor for 2d/3d
1282
1283 fmt::print(fp,"ITEM: TIMESTEP\n{}\n",tstep);
1284 if (dimension == 2) fprintf(fp,"ITEM: NUMBER OF SQUARES\n");
1285 else fprintf(fp,"ITEM: NUMBER OF CUBES\n");
1286 fprintf(fp,"%d\n",nprocs);
1287 if (dimension == 2) fprintf(fp,"ITEM: SQUARES\n");
1288 else fprintf(fp,"ITEM: CUBES\n");
1289
1290 if (dimension == 2) {
1291 int m = 0;
1292 for (int i = 0; i < nprocs; i++) {
1293 fprintf(fp,"%d %d %d %d %d %d\n",i+1,1,m+1,m+2,m+3,m+4);
1294 m += 4;
1295 }
1296 } else {
1297 int m = 0;
1298 for (int i = 0; i < nprocs; i++) {
1299 fprintf(fp,"%d %d %d %d %d %d %d %d %d %d\n",
1300 i+1,1,m+1,m+2,m+3,m+4,m+5,m+6,m+7,m+8);
1301 m += 8;
1302 }
1303 }
1304
1305 memory->destroy(boxall);
1306 }
1307
1308 /* ----------------------------------------------------------------------
1309 debug output for Idim and count
1310 only called by proc 0
1311 ------------------------------------------------------------------------- */
1312
1313 #ifdef BALANCE_DEBUG
debug_shift_output(int idim,int m,int np,double * split)1314 void Balance::debug_shift_output(int idim, int m, int np, double *split)
1315 {
1316 int i;
1317 const char *dim = nullptr;
1318
1319 double *boxlo = domain->boxlo;
1320 double *prd = domain->prd;
1321
1322 if (bdim[idim] == X) dim = "X";
1323 else if (bdim[idim] == Y) dim = "Y";
1324 else if (bdim[idim] == Z) dim = "Z";
1325 fprintf(stderr,"Dimension %s, Iteration %d\n",dim,m);
1326
1327 fprintf(stderr," Count:");
1328 for (i = 0; i <= np; i++) fmt::print(stderr," {}",count[i]);
1329 fprintf(stderr,"\n");
1330 fprintf(stderr," Sum:");
1331 for (i = 0; i <= np; i++) fmt::print(stderr," {}",sum[i]);
1332 fprintf(stderr,"\n");
1333 fprintf(stderr," Target:");
1334 for (i = 0; i <= np; i++) fmt::print(stderr," {}",target[i]);
1335 fprintf(stderr,"\n");
1336 fprintf(stderr," Actual cut:");
1337 for (i = 0; i <= np; i++)
1338 fprintf(stderr," %g",boxlo[bdim[idim]] + split[i]*prd[bdim[idim]]);
1339 fprintf(stderr,"\n");
1340 fprintf(stderr," Split:");
1341 for (i = 0; i <= np; i++) fprintf(stderr," %g",split[i]);
1342 fprintf(stderr,"\n");
1343 fprintf(stderr," Low:");
1344 for (i = 0; i <= np; i++) fprintf(stderr," %g",lo[i]);
1345 fprintf(stderr,"\n");
1346 fprintf(stderr," Low-sum:");
1347 for (i = 0; i <= np; i++) fmt::print(stderr," {}",losum[i]);
1348 fprintf(stderr,"\n");
1349 fprintf(stderr," Hi:");
1350 for (i = 0; i <= np; i++) fprintf(stderr," %g",hi[i]);
1351 fprintf(stderr,"\n");
1352 fprintf(stderr," Hi-sum:");
1353 for (i = 0; i <= np; i++) fmt::print(stderr," {}",hisum[i]);
1354 fprintf(stderr,"\n");
1355 fprintf(stderr," Delta:");
1356 for (i = 0; i < np; i++) fprintf(stderr," %g",split[i+1]-split[i]);
1357 fprintf(stderr,"\n");
1358 }
1359 #endif
1360