1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author (multi and triclinic support):
17      Adrian Diaz (University of Florida)
18 ------------------------------------------------------------------------- */
19 
20 #include "comm_tiled.h"
21 
22 #include "atom.h"
23 #include "atom_vec.h"
24 #include "compute.h"
25 #include "domain.h"
26 #include "dump.h"
27 #include "error.h"
28 #include "fix.h"
29 #include "memory.h"
30 #include "neighbor.h"
31 #include "pair.h"
32 
33 #include <cmath>
34 #include <cstring>
35 
36 using namespace LAMMPS_NS;
37 
38 #define BUFFACTOR 1.5
39 #define BUFFACTOR 1.5
40 #define BUFMIN 1024
41 #define EPSILON 1.0e-6
42 
43 #define DELTA_PROCS 16
44 
45 /* ---------------------------------------------------------------------- */
46 
CommTiled(LAMMPS * lmp)47 CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
48 {
49   style = 1;
50   layout = Comm::LAYOUT_UNIFORM;
51   pbc_flag = nullptr;
52   buf_send = nullptr;
53   buf_recv = nullptr;
54   overlap = nullptr;
55   rcbinfo = nullptr;
56   cutghostmulti = nullptr;
57   cutghostmultiold = nullptr;
58   init_buffers();
59 }
60 
61 /* ---------------------------------------------------------------------- */
62 //IMPORTANT: we *MUST* pass "*oldcomm" to the Comm initializer here, as
63 //           the code below *requires* that the (implicit) copy constructor
64 //           for Comm is run and thus creating a shallow copy of "oldcomm".
65 //           The call to Comm::copy_arrays() then converts the shallow copy
66 //           into a deep copy of the class with the new layout.
67 
CommTiled(LAMMPS *,Comm * oldcomm)68 CommTiled::CommTiled(LAMMPS * /*lmp*/, Comm *oldcomm) : Comm(*oldcomm)
69 {
70   style = 1;
71   layout = oldcomm->layout;
72   Comm::copy_arrays(oldcomm);
73   init_buffers();
74 }
75 
76 /* ---------------------------------------------------------------------- */
77 
~CommTiled()78 CommTiled::~CommTiled()
79 {
80   memory->destroy(buf_send);
81   memory->destroy(buf_recv);
82   memory->destroy(overlap);
83   deallocate_swap(maxswap);
84   memory->sfree(rcbinfo);
85   memory->destroy(cutghostmulti);
86   memory->destroy(cutghostmultiold);
87 }
88 
89 /* ----------------------------------------------------------------------
90    initialize comm buffers and other data structs local to CommTiled
91 ------------------------------------------------------------------------- */
92 
init_buffers()93 void CommTiled::init_buffers()
94 {
95   buf_send = buf_recv = nullptr;
96   maxsend = maxrecv = BUFMIN;
97   grow_send(maxsend,2);
98   memory->create(buf_recv,maxrecv,"comm:buf_recv");
99 
100   maxoverlap = 0;
101   overlap = nullptr;
102   rcbinfo = nullptr;
103   cutghostmulti = nullptr;
104   cutghostmultiold = nullptr;
105   sendbox_multi = nullptr;
106   sendbox_multiold = nullptr;
107 
108   // Note this may skip growing multi arrays, will call again in init()
109   maxswap = 6;
110   allocate_swap(maxswap);
111 }
112 
113 /* ---------------------------------------------------------------------- */
114 
init()115 void CommTiled::init()
116 {
117   Comm::init();
118 
119   // cannot set nswap in init_buffers() b/c
120   // dimension command can be after comm_style command
121 
122   nswap = 2*domain->dimension;
123 
124   memory->destroy(cutghostmulti);
125   if (mode == Comm::MULTI) {
126     // If inconsitent # of collections, destroy any preexisting arrays (may be missized)
127     if (ncollections != neighbor->ncollections) {
128       ncollections = neighbor->ncollections;
129     }
130 
131     // delete any old user cutoffs if # of collections chanaged
132     if (cutusermulti && ncollections != ncollections_cutoff) {
133       if(me == 0) error->warning(FLERR, "cutoff/multi settings discarded, must be defined"
134                                         " after customizing collections in neigh_modify");
135       memory->destroy(cutusermulti);
136       cutusermulti = nullptr;
137     }
138 
139     // grow sendbox_multi now that ncollections is known
140     for (int i = 0; i < maxswap; i ++)
141       grow_swap_send_multi(i,DELTA_PROCS);
142 
143     memory->create(cutghostmulti,ncollections,3,"comm:cutghostmulti");
144   }
145 
146   memory->destroy(cutghostmultiold);
147   if (mode == Comm::MULTIOLD)
148     memory->create(cutghostmultiold,atom->ntypes+1,3,"comm:cutghostmultiold");
149 
150   int bufextra_old = bufextra;
151   init_exchange();
152   if (bufextra > bufextra_old) grow_send(maxsend+bufextra,2);
153 }
154 
155 /* ----------------------------------------------------------------------
156    setup spatial-decomposition communication patterns
157    function of neighbor cutoff(s) & cutghostuser & current box size and tiling
158 ------------------------------------------------------------------------- */
159 
setup()160 void CommTiled::setup()
161 {
162   int i,j,n;
163 
164   // domain properties used in setup method and methods it calls
165 
166   dimension = domain->dimension;
167   int *periodicity = domain->periodicity;
168   int ntypes = atom->ntypes;
169 
170   if (triclinic == 0) {
171     prd = domain->prd;
172     boxlo = domain->boxlo;
173     boxhi = domain->boxhi;
174     sublo = domain->sublo;
175     subhi = domain->subhi;
176   } else {
177     prd = domain->prd_lamda;
178     boxlo = domain->boxlo_lamda;
179     boxhi = domain->boxhi_lamda;
180     sublo = domain->sublo_lamda;
181     subhi = domain->subhi_lamda;
182   }
183 
184   // set function pointers
185 
186   if (layout != Comm::LAYOUT_TILED) {
187     box_drop = &CommTiled::box_drop_brick;
188     box_other = &CommTiled::box_other_brick;
189     box_touch = &CommTiled::box_touch_brick;
190     point_drop = &CommTiled::point_drop_brick;
191   } else {
192     box_drop = &CommTiled::box_drop_tiled;
193     box_other = &CommTiled::box_other_tiled;
194     box_touch = &CommTiled::box_touch_tiled;
195     point_drop = &CommTiled::point_drop_tiled;
196   }
197 
198   // if RCB decomp exists and just changed, gather needed global RCB info
199 
200   if (layout == Comm::LAYOUT_TILED) coord2proc_setup();
201 
202   // set cutoff for comm forward and comm reverse
203   // check that cutoff < any periodic box length
204 
205   if (mode == Comm::MULTI) {
206     double **cutcollectionsq = neighbor->cutcollectionsq;
207 
208     // build collection array for atom exchange
209     neighbor->build_collection(0);
210 
211     // If using multi/reduce, communicate particles a distance equal
212     // to the max cutoff with equally sized or smaller collections
213     // If not, communicate the maximum cutoff of the entire collection
214     for (i = 0; i < ncollections; i++) {
215       if (cutusermulti) {
216         cutghostmulti[i][0] = cutusermulti[i];
217         cutghostmulti[i][1] = cutusermulti[i];
218         cutghostmulti[i][2] = cutusermulti[i];
219       } else {
220         cutghostmulti[i][0] = 0.0;
221         cutghostmulti[i][1] = 0.0;
222         cutghostmulti[i][2] = 0.0;
223       }
224 
225       for (j = 0; j < ncollections; j++){
226         if (multi_reduce && (cutcollectionsq[j][j] > cutcollectionsq[i][i])) continue;
227         cutghostmulti[i][0] = MAX(cutghostmulti[i][0],sqrt(cutcollectionsq[i][j]));
228         cutghostmulti[i][1] = MAX(cutghostmulti[i][1],sqrt(cutcollectionsq[i][j]));
229         cutghostmulti[i][2] = MAX(cutghostmulti[i][2],sqrt(cutcollectionsq[i][j]));
230       }
231     }
232   }
233 
234   if (mode == Comm::MULTIOLD) {
235     double *cuttype = neighbor->cuttype;
236     for (i = 1; i <= ntypes; i++) {
237       double tmp = 0.0;
238       if (cutusermultiold) tmp = cutusermultiold[i];
239       cutghostmultiold[i][0] = MAX(tmp,cuttype[i]);
240       cutghostmultiold[i][1] = MAX(tmp,cuttype[i]);
241       cutghostmultiold[i][2] = MAX(tmp,cuttype[i]);
242     }
243   }
244 
245   double cut = get_comm_cutoff();
246   if ((cut == 0.0) && (me == 0))
247     error->warning(FLERR,"Communication cutoff is 0.0. No ghost atoms "
248                    "will be generated. Atoms may get lost.");
249 
250   if (triclinic == 0) cutghost[0] = cutghost[1] = cutghost[2] = cut;
251   else {
252     double *h_inv = domain->h_inv;
253     double length0,length1,length2;
254     length0 = sqrt(h_inv[0]*h_inv[0] + h_inv[5]*h_inv[5] + h_inv[4]*h_inv[4]);
255     cutghost[0] = cut * length0;
256     length1 = sqrt(h_inv[1]*h_inv[1] + h_inv[3]*h_inv[3]);
257     cutghost[1] = cut * length1;
258     length2 = h_inv[2];
259     cutghost[2] = cut * length2;
260     if (mode == Comm::MULTI) {
261       for (i = 0; i < ncollections; i++) {
262         cutghostmulti[i][0] *= length0;
263         cutghostmulti[i][1] *= length1;
264         cutghostmulti[i][2] *= length2;
265       }
266     }
267 
268     if (mode == Comm::MULTIOLD) {
269       for (i = 1; i <= ntypes; i++) {
270         cutghostmultiold[i][0] *= length0;
271         cutghostmultiold[i][1] *= length1;
272         cutghostmultiold[i][2] *= length2;
273       }
274     }
275   }
276 
277   if ((periodicity[0] && cutghost[0] > prd[0]) ||
278       (periodicity[1] && cutghost[1] > prd[1]) ||
279       (dimension == 3 && periodicity[2] && cutghost[2] > prd[2]))
280     error->all(FLERR,"Communication cutoff for comm_style tiled "
281                "cannot exceed periodic box length");
282 
283   // if cut = 0.0, set to epsilon to induce nearest neighbor comm
284   // this is b/c sendproc is used below to infer touching exchange procs
285   // exchange procs will be empty (leading to lost atoms) if sendproc = 0
286   // will reset sendproc/etc to 0 after exchange is setup, down below
287 
288   int cutzero = 0;
289   if (cut == 0.0) {
290     cutzero = 1;
291     cut = MIN(prd[0],prd[1]);
292     if (dimension == 3) cut = MIN(cut,prd[2]);
293     cut *= EPSILON*EPSILON;
294     cutghost[0] = cutghost[1] = cutghost[2] = cut;
295   }
296 
297   // setup forward/reverse communication
298   // loop over 6 swap directions
299   // determine which procs I will send to and receive from in each swap
300   // done by intersecting ghost box with all proc sub-boxes it overlaps
301   // sets nsendproc, nrecvproc, sendproc, recvproc
302   // sets sendother, recvother, sendself, pbc_flag, pbc, sendbox
303   // resets nprocmax
304 
305   int noverlap1,indexme;
306   double lo1[3],hi1[3],lo2[3],hi2[3];
307   int one,two;
308 
309   int iswap = 0;
310   for (int idim = 0; idim < dimension; idim++) {
311     for (int idir = 0; idir < 2; idir++) {
312 
313       // one = first ghost box in same periodic image
314       // two = second ghost box wrapped across periodic boundary
315       // either may not exist
316 
317       one = 1;
318       lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2];
319       hi1[0] = subhi[0]; hi1[1] = subhi[1]; hi1[2] = subhi[2];
320       if (idir == 0) {
321         lo1[idim] = sublo[idim] - cutghost[idim];
322         hi1[idim] = sublo[idim];
323       } else {
324         lo1[idim] = subhi[idim];
325         hi1[idim] = subhi[idim] + cutghost[idim];
326       }
327 
328       two = 0;
329       if (idir == 0 && periodicity[idim] && lo1[idim] < boxlo[idim]) two = 1;
330       if (idir == 1 && periodicity[idim] && hi1[idim] > boxhi[idim]) two = 1;
331 
332       if (two) {
333         lo2[0] = sublo[0]; lo2[1] = sublo[1]; lo2[2] = sublo[2];
334         hi2[0] = subhi[0]; hi2[1] = subhi[1]; hi2[2] = subhi[2];
335         if (idir == 0) {
336           lo2[idim] = lo1[idim] + prd[idim];
337           hi2[idim] = boxhi[idim];
338           if (sublo[idim] == boxlo[idim]) one = 0;
339         } else {
340           lo2[idim] = boxlo[idim];
341           hi2[idim] = hi1[idim] - prd[idim];
342           if (subhi[idim] == boxhi[idim]) one = 0;
343         }
344       }
345 
346       if (one) {
347         if (idir == 0) lo1[idim] = MAX(lo1[idim],boxlo[idim]);
348         else hi1[idim] = MIN(hi1[idim],boxhi[idim]);
349         if (lo1[idim] == hi1[idim]) one = 0;
350       }
351 
352       // noverlap = # of overlaps of box1/2 with procs via box_drop()
353       // overlap = list of overlapping procs
354       // if overlap with self, indexme = index of me in list
355 
356       indexme = -1;
357       noverlap = 0;
358       if (one) (this->*box_drop)(idim,lo1,hi1,indexme);
359       noverlap1 = noverlap;
360       if (two) (this->*box_drop)(idim,lo2,hi2,indexme);
361 
362       // if self is in overlap list, move it to end of list
363 
364       if (indexme >= 0) {
365         int tmp = overlap[noverlap-1];
366         overlap[noverlap-1] = overlap[indexme];
367         overlap[indexme] = tmp;
368       }
369 
370       // reallocate 2nd dimensions of all send/recv arrays, based on noverlap
371       // # of sends of this swap = # of recvs of iswap +/- 1
372 
373       if (noverlap > nprocmax[iswap]) {
374         int oldmax = nprocmax[iswap];
375         while (nprocmax[iswap] < noverlap) nprocmax[iswap] += DELTA_PROCS;
376         grow_swap_send(iswap,nprocmax[iswap],oldmax);
377         if (idir == 0) grow_swap_recv(iswap+1,nprocmax[iswap]);
378         else grow_swap_recv(iswap-1,nprocmax[iswap]);
379       }
380 
381       // overlap how has list of noverlap procs
382       // includes PBC effects
383 
384       if (noverlap && overlap[noverlap-1] == me) sendself[iswap] = 1;
385       else sendself[iswap] = 0;
386       if (noverlap && noverlap-sendself[iswap]) sendother[iswap] = 1;
387       else sendother[iswap] = 0;
388 
389       nsendproc[iswap] = noverlap;
390       for (i = 0; i < noverlap; i++) sendproc[iswap][i] = overlap[i];
391 
392       if (idir == 0) {
393         recvother[iswap+1] = sendother[iswap];
394         nrecvproc[iswap+1] = noverlap;
395         for (i = 0; i < noverlap; i++) recvproc[iswap+1][i] = overlap[i];
396       } else {
397         recvother[iswap-1] = sendother[iswap];
398         nrecvproc[iswap-1] = noverlap;
399         for (i = 0; i < noverlap; i++) recvproc[iswap-1][i] = overlap[i];
400       }
401 
402       // compute sendbox for each of my sends
403       // obox = intersection of ghostbox with other proc's sub-domain
404       // sbox = what I need to send to other proc
405       //      = sublo to MIN(sublo+cut,subhi) in idim, for idir = 0
406       //      = MIN(subhi-cut,sublo) to subhi in idim, for idir = 1
407       //      = obox in other 2 dims
408       // if sbox touches other proc's sub-box boundaries in lower dims,
409       //   extend sbox in those lower dims to include ghost atoms
410       // single mode and multi mode
411 
412       double oboxlo[3],oboxhi[3],sbox[6],sbox_multi[6],sbox_multiold[6];
413 
414       if (mode == Comm::SINGLE) {
415         for (i = 0; i < noverlap; i++) {
416           pbc_flag[iswap][i] = 0;
417           pbc[iswap][i][0] = pbc[iswap][i][1] = pbc[iswap][i][2] =
418             pbc[iswap][i][3] = pbc[iswap][i][4] = pbc[iswap][i][5] = 0;
419 
420           (this->*box_other)(idim,idir,overlap[i],oboxlo,oboxhi);
421 
422           if (i < noverlap1) {
423             sbox[0] = MAX(oboxlo[0],lo1[0]);
424             sbox[1] = MAX(oboxlo[1],lo1[1]);
425             sbox[2] = MAX(oboxlo[2],lo1[2]);
426             sbox[3] = MIN(oboxhi[0],hi1[0]);
427             sbox[4] = MIN(oboxhi[1],hi1[1]);
428             sbox[5] = MIN(oboxhi[2],hi1[2]);
429           } else {
430             pbc_flag[iswap][i] = 1;
431             if (idir == 0) pbc[iswap][i][idim] = 1;
432             else pbc[iswap][i][idim] = -1;
433             if (triclinic) {
434               if (idim == 1) pbc[iswap][i][5] = pbc[iswap][i][idim];
435               if (idim == 2) pbc[iswap][i][4] = pbc[iswap][i][3] = pbc[iswap][i][idim];
436             }
437             sbox[0] = MAX(oboxlo[0],lo2[0]);
438             sbox[1] = MAX(oboxlo[1],lo2[1]);
439             sbox[2] = MAX(oboxlo[2],lo2[2]);
440             sbox[3] = MIN(oboxhi[0],hi2[0]);
441             sbox[4] = MIN(oboxhi[1],hi2[1]);
442             sbox[5] = MIN(oboxhi[2],hi2[2]);
443           }
444 
445           if (idir == 0) {
446             sbox[idim] = sublo[idim];
447             if (i < noverlap1)
448               sbox[3+idim] = MIN(sbox[3+idim]+cutghost[idim],subhi[idim]);
449             else
450               sbox[3+idim] = MIN(sbox[3+idim]-prd[idim]+cutghost[idim],subhi[idim]);
451           } else {
452             if (i < noverlap1) sbox[idim] = MAX(sbox[idim]-cutghost[idim],sublo[idim]);
453             else sbox[idim] = MAX(sbox[idim]+prd[idim]-cutghost[idim],sublo[idim]);
454             sbox[3+idim] = subhi[idim];
455           }
456 
457           if (idim >= 1) {
458             if (sbox[0] == oboxlo[0]) sbox[0] -= cutghost[0];
459             if (sbox[3] == oboxhi[0]) sbox[3] += cutghost[0];
460           }
461           if (idim == 2) {
462             if (sbox[1] == oboxlo[1]) sbox[1] -= cutghost[1];
463             if (sbox[4] == oboxhi[1]) sbox[4] += cutghost[1];
464           }
465 
466           memcpy(sendbox[iswap][i],sbox,6*sizeof(double));
467         }
468       }
469 
470       if (mode == Comm::MULTI) {
471         for (i = 0; i < noverlap; i++) {
472           pbc_flag[iswap][i] = 0;
473           pbc[iswap][i][0] = pbc[iswap][i][1] = pbc[iswap][i][2] =
474             pbc[iswap][i][3] = pbc[iswap][i][4] = pbc[iswap][i][5] = 0;
475 
476           (this->*box_other)(idim,idir,overlap[i],oboxlo,oboxhi);
477 
478           if (i < noverlap1) {
479             sbox[0] = MAX(oboxlo[0],lo1[0]);
480             sbox[1] = MAX(oboxlo[1],lo1[1]);
481             sbox[2] = MAX(oboxlo[2],lo1[2]);
482             sbox[3] = MIN(oboxhi[0],hi1[0]);
483             sbox[4] = MIN(oboxhi[1],hi1[1]);
484             sbox[5] = MIN(oboxhi[2],hi1[2]);
485           } else {
486             pbc_flag[iswap][i] = 1;
487             if (idir == 0) pbc[iswap][i][idim] = 1;
488             else pbc[iswap][i][idim] = -1;
489             if (triclinic) {
490               if (idim == 1) pbc[iswap][i][5] = pbc[iswap][i][idim];
491               if (idim == 2) pbc[iswap][i][4] = pbc[iswap][i][3] = pbc[iswap][i][idim];
492             }
493             sbox[0] = MAX(oboxlo[0],lo2[0]);
494             sbox[1] = MAX(oboxlo[1],lo2[1]);
495             sbox[2] = MAX(oboxlo[2],lo2[2]);
496             sbox[3] = MIN(oboxhi[0],hi2[0]);
497             sbox[4] = MIN(oboxhi[1],hi2[1]);
498             sbox[5] = MIN(oboxhi[2],hi2[2]);
499           }
500 
501           for (int icollection = 0; icollection < ncollections; icollection++) {
502             sbox_multi[0] = sbox[0];
503             sbox_multi[1] = sbox[1];
504             sbox_multi[2] = sbox[2];
505             sbox_multi[3] = sbox[3];
506             sbox_multi[4] = sbox[4];
507             sbox_multi[5] = sbox[5];
508             if (idir == 0) {
509               sbox_multi[idim] = sublo[idim];
510               if (i < noverlap1)
511                 sbox_multi[3+idim] =
512                   MIN(sbox_multi[3+idim]+cutghostmulti[icollection][idim],subhi[idim]);
513               else
514                 sbox_multi[3+idim] =
515                   MIN(sbox_multi[3+idim]-prd[idim]+cutghostmulti[icollection][idim],
516                       subhi[idim]);
517             } else {
518               if (i < noverlap1)
519                 sbox_multi[idim] =
520                   MAX(sbox_multi[idim]-cutghostmulti[icollection][idim],sublo[idim]);
521               else
522                 sbox_multi[idim] =
523                   MAX(sbox_multi[idim]+prd[idim]-cutghostmulti[icollection][idim],
524                       sublo[idim]);
525               sbox_multi[3+idim] = subhi[idim];
526             }
527 
528             if (idim >= 1) {
529               if (sbox_multi[0] == oboxlo[0])
530                 sbox_multi[0] -= cutghostmulti[icollection][idim];
531               if (sbox_multi[3] == oboxhi[0])
532                 sbox_multi[3] += cutghostmulti[icollection][idim];
533             }
534             if (idim == 2) {
535               if (sbox_multi[1] == oboxlo[1])
536                 sbox_multi[1] -= cutghostmulti[icollection][idim];
537               if (sbox_multi[4] == oboxhi[1])
538                 sbox_multi[4] += cutghostmulti[icollection][idim];
539             }
540 
541             memcpy(sendbox_multi[iswap][i][icollection],sbox_multi,6*sizeof(double));
542           }
543         }
544       }
545 
546       if (mode == Comm::MULTIOLD) {
547         for (i = 0; i < noverlap; i++) {
548           pbc_flag[iswap][i] = 0;
549           pbc[iswap][i][0] = pbc[iswap][i][1] = pbc[iswap][i][2] =
550             pbc[iswap][i][3] = pbc[iswap][i][4] = pbc[iswap][i][5] = 0;
551 
552           (this->*box_other)(idim,idir,overlap[i],oboxlo,oboxhi);
553 
554           if (i < noverlap1) {
555             sbox[0] = MAX(oboxlo[0],lo1[0]);
556             sbox[1] = MAX(oboxlo[1],lo1[1]);
557             sbox[2] = MAX(oboxlo[2],lo1[2]);
558             sbox[3] = MIN(oboxhi[0],hi1[0]);
559             sbox[4] = MIN(oboxhi[1],hi1[1]);
560             sbox[5] = MIN(oboxhi[2],hi1[2]);
561           } else {
562             pbc_flag[iswap][i] = 1;
563             if (idir == 0) pbc[iswap][i][idim] = 1;
564             else pbc[iswap][i][idim] = -1;
565             if (triclinic) {
566               if (idim == 1) pbc[iswap][i][5] = pbc[iswap][i][idim];
567               if (idim == 2) pbc[iswap][i][4] = pbc[iswap][i][3] = pbc[iswap][i][idim];
568             }
569             sbox[0] = MAX(oboxlo[0],lo2[0]);
570             sbox[1] = MAX(oboxlo[1],lo2[1]);
571             sbox[2] = MAX(oboxlo[2],lo2[2]);
572             sbox[3] = MIN(oboxhi[0],hi2[0]);
573             sbox[4] = MIN(oboxhi[1],hi2[1]);
574             sbox[5] = MIN(oboxhi[2],hi2[2]);
575           }
576 
577           for (int itype = 1; itype <= atom->ntypes; itype++) {
578             sbox_multiold[0] = sbox[0];
579             sbox_multiold[1] = sbox[1];
580             sbox_multiold[2] = sbox[2];
581             sbox_multiold[3] = sbox[3];
582             sbox_multiold[4] = sbox[4];
583             sbox_multiold[5] = sbox[5];
584             if (idir == 0) {
585               sbox_multiold[idim] = sublo[idim];
586               if (i < noverlap1)
587                 sbox_multiold[3+idim] =
588                   MIN(sbox_multiold[3+idim]+cutghostmultiold[itype][idim],subhi[idim]);
589               else
590                 sbox_multiold[3+idim] =
591                   MIN(sbox_multiold[3+idim]-prd[idim]+cutghostmultiold[itype][idim],
592                       subhi[idim]);
593             } else {
594               if (i < noverlap1)
595                 sbox_multiold[idim] =
596                   MAX(sbox_multiold[idim]-cutghostmultiold[itype][idim],sublo[idim]);
597               else
598                 sbox_multiold[idim] =
599                   MAX(sbox_multiold[idim]+prd[idim]-cutghostmultiold[itype][idim],
600                       sublo[idim]);
601               sbox_multiold[3+idim] = subhi[idim];
602             }
603 
604             if (idim >= 1) {
605               if (sbox_multiold[0] == oboxlo[0])
606                 sbox_multiold[0] -= cutghostmultiold[itype][idim];
607               if (sbox_multiold[3] == oboxhi[0])
608                 sbox_multiold[3] += cutghostmultiold[itype][idim];
609             }
610             if (idim == 2) {
611               if (sbox_multiold[1] == oboxlo[1])
612                 sbox_multiold[1] -= cutghostmultiold[itype][idim];
613               if (sbox_multiold[4] == oboxhi[1])
614                 sbox_multiold[4] += cutghostmultiold[itype][idim];
615             }
616 
617             memcpy(sendbox_multiold[iswap][i][itype],sbox_multiold,6*sizeof(double));
618           }
619         }
620       }
621 
622       iswap++;
623     }
624   }
625 
626 
627   // setup exchange communication = subset of forward/reverse comm procs
628   // loop over dimensions
629   // determine which procs I will exchange with in each dimension
630   // subset of procs that touch my proc in forward/reverse comm
631   // sets nexchproc & exchproc, resets nexchprocmax
632 
633   int proc;
634 
635   for (int idim = 0; idim < dimension; idim++) {
636 
637     // overlap = list of procs that touch my sub-box in idim
638     // proc can appear twice in list if touches in both directions
639     // 2nd add-to-list checks to insure each proc appears exactly once
640 
641     noverlap = 0;
642     iswap = 2*idim;
643     n = nsendproc[iswap];
644     for (i = 0; i < n; i++) {
645       proc = sendproc[iswap][i];
646       if (proc == me) continue;
647       if ((this->*box_touch)(proc,idim,0)) {
648         if (noverlap == maxoverlap) {
649           maxoverlap += DELTA_PROCS;
650           memory->grow(overlap,maxoverlap,"comm:overlap");
651         }
652         overlap[noverlap++] = proc;
653       }
654     }
655     noverlap1 = noverlap;
656     iswap = 2*idim+1;
657     n = nsendproc[iswap];
658 
659     MPI_Barrier(world);
660 
661     for (i = 0; i < n; i++) {
662       proc = sendproc[iswap][i];
663       if (proc == me) continue;
664       if ((this->*box_touch)(proc,idim,1)) {
665         for (j = 0; j < noverlap1; j++)
666           if (overlap[j] == proc) break;
667         if (j < noverlap1) continue;
668         if (noverlap == maxoverlap) {
669           maxoverlap += DELTA_PROCS;
670           memory->grow(overlap,maxoverlap,"comm:overlap");
671         }
672         overlap[noverlap++] = proc;
673       }
674     }
675 
676     MPI_Barrier(world);
677 
678     // reallocate exchproc and exchnum if needed based on noverlap
679 
680     if (noverlap > nexchprocmax[idim]) {
681       while (nexchprocmax[idim] < noverlap) nexchprocmax[idim] += DELTA_PROCS;
682       delete [] exchproc[idim];
683       exchproc[idim] = new int[nexchprocmax[idim]];
684       delete [] exchnum[idim];
685       exchnum[idim] = new int[nexchprocmax[idim]];
686     }
687 
688     nexchproc[idim] = noverlap;
689     for (i = 0; i < noverlap; i++) exchproc[idim][i] = overlap[i];
690   }
691 
692   // reset sendproc/etc to 0 if cut is really 0.0
693 
694   if (cutzero) {
695     for (i = 0; i < nswap; i++) {
696       nsendproc[i] = nrecvproc[i] =
697         sendother[i] = recvother[i] = sendself[i] = 0;
698     }
699   }
700 
701   // reallocate MPI Requests as needed
702 
703   int nmax = 0;
704   for (i = 0; i < nswap; i++) nmax = MAX(nmax,nprocmax[i]);
705   for (i = 0; i < dimension; i++) nmax = MAX(nmax,nexchprocmax[i]);
706   if (nmax > maxrequest) {
707     maxrequest = nmax;
708     delete [] requests;
709     requests = new MPI_Request[maxrequest];
710   }
711 }
712 
713 /* ----------------------------------------------------------------------
714    forward communication of atom coords every timestep
715    other per-atom attributes may also be sent via pack/unpack routines
716 ------------------------------------------------------------------------- */
717 
forward_comm(int)718 void CommTiled::forward_comm(int /*dummy*/)
719 {
720   int i,irecv,n,nsend,nrecv;
721   AtomVec *avec = atom->avec;
722   double **x = atom->x;
723 
724   // exchange data with another set of procs in each swap
725   // post recvs from all procs except self
726   // send data to all procs except self
727   // copy data to self if sendself is set
728   // wait on all procs except self and unpack received data
729   // if comm_x_only set, exchange or copy directly to x, don't unpack
730 
731   for (int iswap = 0; iswap < nswap; iswap++) {
732     nsend = nsendproc[iswap] - sendself[iswap];
733     nrecv = nrecvproc[iswap] - sendself[iswap];
734 
735     if (comm_x_only) {
736       if (recvother[iswap]) {
737         for (i = 0; i < nrecv; i++)
738           MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i],
739                     MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
740       }
741       if (sendother[iswap]) {
742         for (i = 0; i < nsend; i++) {
743           n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
744                               buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
745           MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
746         }
747       }
748       if (sendself[iswap]) {
749         avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
750                         x[firstrecv[iswap][nrecv]],pbc_flag[iswap][nsend],
751                         pbc[iswap][nsend]);
752       }
753       if (recvother[iswap]) MPI_Waitall(nrecv,requests,MPI_STATUS_IGNORE);
754 
755     } else if (ghost_velocity) {
756       if (recvother[iswap]) {
757         for (i = 0; i < nrecv; i++)
758           MPI_Irecv(&buf_recv[size_forward*forward_recv_offset[iswap][i]],
759                     size_forward_recv[iswap][i],
760                     MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
761       }
762       if (sendother[iswap]) {
763         for (i = 0; i < nsend; i++) {
764           n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i],
765                                   buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
766           MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
767         }
768       }
769       if (sendself[iswap]) {
770         avec->pack_comm_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
771                             buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
772         avec->unpack_comm_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
773                               buf_send);
774       }
775       if (recvother[iswap]) {
776         for (i = 0; i < nrecv; i++) {
777           MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
778           avec->unpack_comm_vel(recvnum[iswap][irecv],firstrecv[iswap][irecv],
779                                 &buf_recv[size_forward*
780                                           forward_recv_offset[iswap][irecv]]);
781         }
782       }
783 
784     } else {
785       if (recvother[iswap]) {
786         for (i = 0; i < nrecv; i++)
787           MPI_Irecv(&buf_recv[size_forward*forward_recv_offset[iswap][i]],
788                     size_forward_recv[iswap][i],
789                     MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
790       }
791       if (sendother[iswap]) {
792         for (i = 0; i < nsend; i++) {
793           n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
794                               buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
795           MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
796         }
797       }
798       if (sendself[iswap]) {
799         avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
800                         buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
801         avec->unpack_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
802                           buf_send);
803       }
804       if (recvother[iswap]) {
805         for (i = 0; i < nrecv; i++) {
806           MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
807           avec->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
808                             &buf_recv[size_forward*
809                                       forward_recv_offset[iswap][irecv]]);
810         }
811       }
812     }
813   }
814 }
815 
816 /* ----------------------------------------------------------------------
817    reverse communication of forces on atoms every timestep
818    other per-atom attributes may also be sent via pack/unpack routines
819 ------------------------------------------------------------------------- */
820 
reverse_comm()821 void CommTiled::reverse_comm()
822 {
823   int i,irecv,n,nsend,nrecv;
824   AtomVec *avec = atom->avec;
825   double **f = atom->f;
826 
827   // exchange data with another set of procs in each swap
828   // post recvs from all procs except self
829   // send data to all procs except self
830   // copy data to self if sendself is set
831   // wait on all procs except self and unpack received data
832   // if comm_f_only set, exchange or copy directly from f, don't pack
833 
834   for (int iswap = nswap-1; iswap >= 0; iswap--) {
835     nsend = nsendproc[iswap] - sendself[iswap];
836     nrecv = nrecvproc[iswap] - sendself[iswap];
837 
838     if (comm_f_only) {
839       if (sendother[iswap]) {
840         for (i = 0; i < nsend; i++) {
841           MPI_Irecv(&buf_recv[size_reverse*reverse_recv_offset[iswap][i]],
842                     size_reverse_recv[iswap][i],MPI_DOUBLE,
843                     sendproc[iswap][i],0,world,&requests[i]);
844         }
845       }
846       if (recvother[iswap]) {
847         for (i = 0; i < nrecv; i++)
848           MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i],
849                    MPI_DOUBLE,recvproc[iswap][i],0,world);
850       }
851       if (sendself[iswap]) {
852         avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
853                              f[firstrecv[iswap][nrecv]]);
854       }
855       if (sendother[iswap]) {
856         for (i = 0; i < nsend; i++) {
857           MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
858           avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
859                                &buf_recv[size_reverse*
860                                          reverse_recv_offset[iswap][irecv]]);
861         }
862       }
863 
864     } else {
865       if (sendother[iswap]) {
866         for (i = 0; i < nsend; i++)
867           MPI_Irecv(&buf_recv[size_reverse*reverse_recv_offset[iswap][i]],
868                     size_reverse_recv[iswap][i],MPI_DOUBLE,
869                     sendproc[iswap][i],0,world,&requests[i]);
870       }
871       if (recvother[iswap]) {
872         for (i = 0; i < nrecv; i++) {
873           n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
874                                  buf_send);
875           MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
876         }
877       }
878       if (sendself[iswap]) {
879         avec->pack_reverse(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
880                            buf_send);
881         avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
882                              buf_send);
883       }
884       if (sendother[iswap]) {
885         for (i = 0; i < nsend; i++) {
886           MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
887           avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
888                                &buf_recv[size_reverse*
889                                          reverse_recv_offset[iswap][irecv]]);
890         }
891       }
892     }
893   }
894 }
895 
896 /* ----------------------------------------------------------------------
897    exchange: move atoms to correct processors
898    atoms exchanged with procs that touch sub-box in each of 3 dims
899    send out atoms that have left my box, receive ones entering my box
900    atoms will be lost if not inside a touching proc's box
901      can happen if atom moves outside of non-periodic boundary
902      or if atom moves more than one proc away
903    this routine called before every reneighboring
904    for triclinic, atoms must be in lamda coords (0-1) before exchange is called
905 ------------------------------------------------------------------------- */
906 
exchange()907 void CommTiled::exchange()
908 {
909   int i,m,nexch,nsend,nrecv,nlocal,proc,offset;
910   double lo,hi,value;
911   double **x;
912   AtomVec *avec = atom->avec;
913 
914   // clear global->local map for owned and ghost atoms
915   // b/c atoms migrate to new procs in exchange() and
916   //   new ghosts are created in borders()
917   // map_set() is done at end of borders()
918   // clear ghost count and any ghost bonus data internal to AtomVec
919 
920   if (map_style != Atom::MAP_NONE) atom->map_clear();
921   atom->nghost = 0;
922   atom->avec->clear_bonus();
923 
924   // insure send buf has extra space for a single atom
925   // only need to reset if a fix can dynamically add to size of single atom
926 
927   if (maxexchange_fix_dynamic) {
928     int bufextra_old = bufextra;
929     init_exchange();
930     if (bufextra > bufextra_old) grow_send(maxsend+bufextra,2);
931   }
932 
933   // domain properties used in exchange method and methods it calls
934   // subbox bounds for orthogonal or triclinic
935 
936   if (triclinic == 0) {
937     prd = domain->prd;
938     boxlo = domain->boxlo;
939     boxhi = domain->boxhi;
940     sublo = domain->sublo;
941     subhi = domain->subhi;
942   } else {
943     prd = domain->prd_lamda;
944     boxlo = domain->boxlo_lamda;
945     boxhi = domain->boxhi_lamda;
946     sublo = domain->sublo_lamda;
947     subhi = domain->subhi_lamda;
948   }
949 
950   // loop over dimensions
951 
952   dimension = domain->dimension;
953 
954   for (int dim = 0; dim < dimension; dim++) {
955 
956     // fill buffer with atoms leaving my box, using < and >=
957     // when atom is deleted, fill it in with last atom
958 
959     x = atom->x;
960     lo = sublo[dim];
961     hi = subhi[dim];
962     nlocal = atom->nlocal;
963     i = nsend = 0;
964 
965     while (i < nlocal) {
966       if (x[i][dim] < lo || x[i][dim] >= hi) {
967         if (nsend > maxsend) grow_send(nsend,1);
968         proc = (this->*point_drop)(dim,x[i]);
969         if (proc != me) {
970           buf_send[nsend++] = proc;
971           nsend += avec->pack_exchange(i,&buf_send[nsend]);
972         } else {
973           // DEBUG statment
974           // error->warning(FLERR,"Losing atom in CommTiled::exchange() send, "
975           //                "likely bad dynamics");
976         }
977         avec->copy(nlocal-1,i,1);
978         nlocal--;
979       } else i++;
980     }
981     atom->nlocal = nlocal;
982 
983     // send and recv atoms from neighbor procs that touch my sub-box in dim
984     // no send/recv with self
985     // send size of message first
986     // receiver may receive multiple messages, realloc buf_recv if needed
987 
988     nexch = nexchproc[dim];
989     if (!nexch) continue;
990 
991     for (m = 0; m < nexch; m++)
992       MPI_Irecv(&exchnum[dim][m],1,MPI_INT,
993                 exchproc[dim][m],0,world,&requests[m]);
994     for (m = 0; m < nexch; m++)
995       MPI_Send(&nsend,1,MPI_INT,exchproc[dim][m],0,world);
996     MPI_Waitall(nexch,requests,MPI_STATUS_IGNORE);
997 
998     nrecv = 0;
999     for (m = 0; m < nexch; m++) nrecv += exchnum[dim][m];
1000     if (nrecv > maxrecv) grow_recv(nrecv);
1001 
1002     offset = 0;
1003     for (m = 0; m < nexch; m++) {
1004       MPI_Irecv(&buf_recv[offset],exchnum[dim][m],
1005                 MPI_DOUBLE,exchproc[dim][m],0,world,&requests[m]);
1006       offset += exchnum[dim][m];
1007     }
1008     for (m = 0; m < nexch; m++)
1009       MPI_Send(buf_send,nsend,MPI_DOUBLE,exchproc[dim][m],0,world);
1010     MPI_Waitall(nexch,requests,MPI_STATUS_IGNORE);
1011 
1012     // check incoming atoms to see if I own it and they are in my box
1013     // if so, add to my list
1014     // box check is only for this dimension,
1015     //   atom may be passed to another proc in later dims
1016 
1017     m = 0;
1018     while (m < nrecv) {
1019       proc = static_cast<int> (buf_recv[m++]);
1020       if (proc == me) {
1021         value = buf_recv[m+dim+1];
1022         if (value >= lo && value < hi) {
1023           m += avec->unpack_exchange(&buf_recv[m]);
1024           continue;
1025         } else {
1026           // DEBUG statment
1027           // error->warning(FLERR,"Losing atom in CommTiled::exchange() recv");
1028         }
1029       }
1030       m += static_cast<int> (buf_recv[m]);
1031     }
1032   }
1033 
1034   if (atom->firstgroupname) atom->first_reorder();
1035 }
1036 
1037 /* ----------------------------------------------------------------------
1038    borders: list nearby atoms to send to neighboring procs at every timestep
1039    one list is created per swap/proc that will be made
1040    as list is made, actually do communication
1041    this does equivalent of a forward_comm(), so don't need to explicitly
1042      call forward_comm() on reneighboring timestep
1043    this routine is called before every reneighboring
1044    for triclinic, atoms must be in lamda coords (0-1) before borders is called
1045 ------------------------------------------------------------------------- */
1046 
borders()1047 void CommTiled::borders()
1048 {
1049   int i,m,n,nlast,nsend,nrecv,ngroup,nprior,ncount,ncountall;
1050   double xlo,xhi,ylo,yhi,zlo,zhi;
1051   double *bbox;
1052   double **x;
1053   AtomVec *avec = atom->avec;
1054 
1055   // After exchanging, need to reconstruct collection array for border communication
1056   if (mode == Comm::MULTI) neighbor->build_collection(0);
1057 
1058   // send/recv max one = max # of atoms in single send/recv for any swap
1059   // send/recv max all = max # of atoms in all sends/recvs within any swap
1060 
1061   smaxone = smaxall = 0;
1062   rmaxone = rmaxall = 0;
1063 
1064   // loop over swaps in all dimensions
1065 
1066   for (int iswap = 0; iswap < nswap; iswap++) {
1067 
1068     // find atoms within sendboxes using >= and <
1069     // hi test with ">" is important b/c don't want to send an atom
1070     //   in lower dim (on boundary) that a proc will recv again in higher dim
1071     // for x-dim swaps, check owned atoms
1072     // for yz-dim swaps, check owned and ghost atoms
1073     // store sent atom indices in sendlist for use in future timesteps
1074     // single mode and multi mode
1075 
1076     x = atom->x;
1077     if (iswap % 2 == 0) nlast = atom->nlocal + atom->nghost;
1078 
1079     ncountall = 0;
1080 
1081     for (m = 0; m < nsendproc[iswap]; m++) {
1082 
1083       if (mode == Comm::SINGLE) {
1084         bbox = sendbox[iswap][m];
1085         xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1086         xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1087 
1088         ncount = 0;
1089 
1090         if (!bordergroup) {
1091           for (i = 0; i < nlast; i++) {
1092             if (x[i][0] >= xlo && x[i][0] < xhi &&
1093                 x[i][1] >= ylo && x[i][1] < yhi &&
1094                 x[i][2] >= zlo && x[i][2] < zhi) {
1095               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1096               sendlist[iswap][m][ncount++] = i;
1097             }
1098           }
1099         } else {
1100           ngroup = atom->nfirst;
1101           for (i = 0; i < ngroup; i++) {
1102             if (x[i][0] >= xlo && x[i][0] < xhi &&
1103                 x[i][1] >= ylo && x[i][1] < yhi &&
1104                 x[i][2] >= zlo && x[i][2] < zhi) {
1105               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1106               sendlist[iswap][m][ncount++] = i;
1107             }
1108           }
1109           for (i = atom->nlocal; i < nlast; i++) {
1110             if (x[i][0] >= xlo && x[i][0] < xhi &&
1111                 x[i][1] >= ylo && x[i][1] < yhi &&
1112                 x[i][2] >= zlo && x[i][2] < zhi) {
1113               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1114               sendlist[iswap][m][ncount++] = i;
1115             }
1116           }
1117         }
1118 
1119         sendnum[iswap][m] = ncount;
1120         smaxone = MAX(smaxone,ncount);
1121         ncountall += ncount;
1122 
1123       } else if (mode == Comm::MULTI) {
1124 
1125         int* collection=neighbor->collection;
1126         int icollection;
1127         ncount = 0;
1128 
1129         if (!bordergroup) {
1130           for (i = 0; i < nlast; i++) {
1131             icollection=collection[i];
1132             bbox = sendbox_multi[iswap][m][icollection];
1133             xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1134             xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1135             if (x[i][0] >= xlo && x[i][0] < xhi &&
1136                 x[i][1] >= ylo && x[i][1] < yhi &&
1137                 x[i][2] >= zlo && x[i][2] < zhi) {
1138               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1139               sendlist[iswap][m][ncount++] = i;
1140             }
1141           }
1142         } else {
1143           ngroup = atom->nfirst;
1144           for (i = 0; i < ngroup; i++) {
1145             icollection=collection[i];
1146             bbox = sendbox_multi[iswap][m][icollection];
1147             xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1148             xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1149             if (x[i][0] >= xlo && x[i][0] < xhi &&
1150                 x[i][1] >= ylo && x[i][1] < yhi &&
1151                 x[i][2] >= zlo && x[i][2] < zhi) {
1152               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1153               sendlist[iswap][m][ncount++] = i;
1154             }
1155           }
1156           for (i = atom->nlocal; i < nlast; i++) {
1157             icollection=collection[i];
1158             bbox = sendbox_multi[iswap][m][icollection];
1159             xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1160             xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1161             if (x[i][0] >= xlo && x[i][0] < xhi &&
1162                 x[i][1] >= ylo && x[i][1] < yhi &&
1163                 x[i][2] >= zlo && x[i][2] < zhi) {
1164               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1165               sendlist[iswap][m][ncount++] = i;
1166             }
1167           }
1168         }
1169 
1170         sendnum[iswap][m] = ncount;
1171         smaxone = MAX(smaxone,ncount);
1172         ncountall += ncount;
1173       } else {
1174 
1175         int* type=atom->type;
1176         int itype;
1177         ncount = 0;
1178 
1179         if (!bordergroup) {
1180           for (i = 0; i < nlast; i++) {
1181             itype=type[i];
1182             bbox = sendbox_multiold[iswap][m][itype];
1183             xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1184             xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1185             if (x[i][0] >= xlo && x[i][0] < xhi &&
1186                 x[i][1] >= ylo && x[i][1] < yhi &&
1187                 x[i][2] >= zlo && x[i][2] < zhi) {
1188               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1189               sendlist[iswap][m][ncount++] = i;
1190             }
1191           }
1192         } else {
1193           ngroup = atom->nfirst;
1194           for (i = 0; i < ngroup; i++) {
1195             itype=type[i];
1196             bbox = sendbox_multiold[iswap][m][itype];
1197             xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1198             xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1199             if (x[i][0] >= xlo && x[i][0] < xhi &&
1200                 x[i][1] >= ylo && x[i][1] < yhi &&
1201                 x[i][2] >= zlo && x[i][2] < zhi) {
1202               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1203               sendlist[iswap][m][ncount++] = i;
1204             }
1205           }
1206           for (i = atom->nlocal; i < nlast; i++) {
1207             itype=type[i];
1208             bbox = sendbox_multiold[iswap][m][itype];
1209             xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
1210             xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
1211             if (x[i][0] >= xlo && x[i][0] < xhi &&
1212                 x[i][1] >= ylo && x[i][1] < yhi &&
1213                 x[i][2] >= zlo && x[i][2] < zhi) {
1214               if (ncount == maxsendlist[iswap][m]) grow_list(iswap,m,ncount);
1215               sendlist[iswap][m][ncount++] = i;
1216             }
1217           }
1218         }
1219 
1220         sendnum[iswap][m] = ncount;
1221         smaxone = MAX(smaxone,ncount);
1222         ncountall += ncount;
1223       }
1224     }
1225 
1226     smaxall = MAX(smaxall,ncountall);
1227 
1228     // send sendnum counts to procs who recv from me except self
1229     // copy data to self if sendself is set
1230 
1231     nsend = nsendproc[iswap] - sendself[iswap];
1232     nrecv = nrecvproc[iswap] - sendself[iswap];
1233 
1234     if (recvother[iswap])
1235       for (m = 0; m < nrecv; m++)
1236         MPI_Irecv(&recvnum[iswap][m],1,MPI_INT,
1237                   recvproc[iswap][m],0,world,&requests[m]);
1238     if (sendother[iswap])
1239       for (m = 0; m < nsend; m++)
1240         MPI_Send(&sendnum[iswap][m],1,MPI_INT,sendproc[iswap][m],0,world);
1241     if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend];
1242     if (recvother[iswap]) MPI_Waitall(nrecv,requests,MPI_STATUS_IGNORE);
1243 
1244     // setup other per swap/proc values from sendnum and recvnum
1245 
1246     for (m = 0; m < nsendproc[iswap]; m++) {
1247       size_reverse_recv[iswap][m] = sendnum[iswap][m]*size_reverse;
1248       if (m == 0) reverse_recv_offset[iswap][0] = 0;
1249       else reverse_recv_offset[iswap][m] =
1250              reverse_recv_offset[iswap][m-1] + sendnum[iswap][m-1];
1251     }
1252 
1253     ncountall = 0;
1254     for (m = 0; m < nrecvproc[iswap]; m++) {
1255       ncount = recvnum[iswap][m];
1256       rmaxone = MAX(rmaxone,ncount);
1257       ncountall += ncount;
1258 
1259       size_forward_recv[iswap][m] = ncount*size_forward;
1260       size_reverse_send[iswap][m] = ncount*size_reverse;
1261       if (m == 0) {
1262         firstrecv[iswap][0] = atom->nlocal + atom->nghost;
1263         forward_recv_offset[iswap][0] = 0;
1264       } else {
1265         firstrecv[iswap][m] = firstrecv[iswap][m-1] + recvnum[iswap][m-1];
1266         forward_recv_offset[iswap][m] =
1267           forward_recv_offset[iswap][m-1] + recvnum[iswap][m-1];
1268       }
1269     }
1270     rmaxall = MAX(rmaxall,ncountall);
1271 
1272     // insure send/recv buffers are large enough for this border comm swap
1273 
1274     if (smaxone*size_border > maxsend) grow_send(smaxone*size_border,0);
1275     if (rmaxall*size_border > maxrecv) grow_recv(rmaxall*size_border);
1276 
1277     // swap atoms with other procs using pack_border(), unpack_border()
1278     // can use Waitany() because calls to unpack_border()
1279     //   increment per-atom arrays as much as needed
1280 
1281     if (ghost_velocity) {
1282       if (recvother[iswap]) {
1283         for (m = 0; m < nrecv; m++)
1284           MPI_Irecv(&buf_recv[size_border*forward_recv_offset[iswap][m]],
1285                     recvnum[iswap][m]*size_border,
1286                     MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]);
1287       }
1288       if (sendother[iswap]) {
1289         for (m = 0; m < nsend; m++) {
1290           n = avec->pack_border_vel(sendnum[iswap][m],sendlist[iswap][m],
1291                                     buf_send,pbc_flag[iswap][m],pbc[iswap][m]);
1292           MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][m],0,world);
1293         }
1294       }
1295       if (sendself[iswap]) {
1296         avec->pack_border_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
1297                               buf_send,pbc_flag[iswap][nsend],
1298                               pbc[iswap][nsend]);
1299         avec->unpack_border_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1300                                 buf_send);
1301       }
1302       if (recvother[iswap]) {
1303         for (i = 0; i < nrecv; i++) {
1304           MPI_Waitany(nrecv,requests,&m,MPI_STATUS_IGNORE);
1305           avec->unpack_border_vel(recvnum[iswap][m],firstrecv[iswap][m],
1306                                   &buf_recv[size_border*
1307                                             forward_recv_offset[iswap][m]]);
1308         }
1309       }
1310 
1311     } else {
1312       if (recvother[iswap]) {
1313         for (m = 0; m < nrecv; m++)
1314           MPI_Irecv(&buf_recv[size_border*forward_recv_offset[iswap][m]],
1315                     recvnum[iswap][m]*size_border,
1316                     MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]);
1317       }
1318       if (sendother[iswap]) {
1319         for (m = 0; m < nsend; m++) {
1320           n = avec->pack_border(sendnum[iswap][m],sendlist[iswap][m],
1321                                 buf_send,pbc_flag[iswap][m],pbc[iswap][m]);
1322           MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][m],0,world);
1323         }
1324       }
1325       if (sendself[iswap]) {
1326         avec->pack_border(sendnum[iswap][nsend],sendlist[iswap][nsend],
1327                           buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
1328         avec->unpack_border(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1329                             buf_send);
1330       }
1331       if (recvother[iswap]) {
1332         for (i = 0; i < nrecv; i++) {
1333           MPI_Waitany(nrecv,requests,&m,MPI_STATUS_IGNORE);
1334           avec->unpack_border(recvnum[iswap][m],firstrecv[iswap][m],
1335                               &buf_recv[size_border*
1336                                         forward_recv_offset[iswap][m]]);
1337         }
1338       }
1339     }
1340 
1341     // increment ghost atoms
1342 
1343     n = nrecvproc[iswap];
1344     if (n) {
1345       nprior = atom->nghost + atom->nlocal;
1346       atom->nghost += forward_recv_offset[iswap][n-1] + recvnum[iswap][n-1];
1347       if (neighbor->style == Neighbor::MULTI) neighbor->build_collection(nprior);
1348     }
1349   }
1350 
1351   // For molecular systems we lose some bits for local atom indices due
1352   // to encoding of special pairs in neighbor lists. Check for overflows.
1353 
1354   if ((atom->molecular != Atom::ATOMIC)
1355       && ((atom->nlocal + atom->nghost) > NEIGHMASK))
1356     error->one(FLERR,"Per-processor number of atoms is too large for "
1357                "molecular neighbor lists");
1358 
1359   // insure send/recv buffers are long enough for all forward & reverse comm
1360   // send buf is for one forward or reverse sends to one proc
1361   // recv buf is for all forward or reverse recvs in one swap
1362 
1363   int max = MAX(maxforward*smaxone,maxreverse*rmaxone);
1364   if (max > maxsend) grow_send(max,0);
1365   max = MAX(maxforward*rmaxall,maxreverse*smaxall);
1366   if (max > maxrecv) grow_recv(max);
1367 
1368   // reset global->local map
1369 
1370   if (map_style != Atom::MAP_NONE) atom->map_set();
1371 }
1372 
1373 /* ----------------------------------------------------------------------
1374    forward communication invoked by a Pair
1375    nsize used only to set recv buffer limit
1376 ------------------------------------------------------------------------- */
1377 
forward_comm_pair(Pair * pair)1378 void CommTiled::forward_comm_pair(Pair *pair)
1379 {
1380   int i,irecv,n,nsend,nrecv;
1381 
1382   int nsize = pair->comm_forward;
1383 
1384   for (int iswap = 0; iswap < nswap; iswap++) {
1385     nsend = nsendproc[iswap] - sendself[iswap];
1386     nrecv = nrecvproc[iswap] - sendself[iswap];
1387 
1388     if (recvother[iswap]) {
1389       for (i = 0; i < nrecv; i++)
1390         MPI_Irecv(&buf_recv[nsize*forward_recv_offset[iswap][i]],
1391                   nsize*recvnum[iswap][i],
1392                   MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
1393     }
1394 
1395     if (sendother[iswap]) {
1396       for (i = 0; i < nsend; i++) {
1397         n = pair->pack_forward_comm(sendnum[iswap][i],sendlist[iswap][i],
1398                                     buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
1399         MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
1400       }
1401     }
1402 
1403     if (sendself[iswap]) {
1404       pair->pack_forward_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1405                               buf_send,pbc_flag[iswap][nsend],
1406                               pbc[iswap][nsend]);
1407       pair->unpack_forward_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1408                                 buf_send);
1409     }
1410     if (recvother[iswap]) {
1411       for (i = 0; i < nrecv; i++) {
1412         MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
1413         pair->unpack_forward_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
1414                                   &buf_recv[nsize*
1415                                             forward_recv_offset[iswap][irecv]]);
1416       }
1417     }
1418   }
1419 }
1420 
1421 /* ----------------------------------------------------------------------
1422    reverse communication invoked by a Pair
1423    nsize used only to set recv buffer limit
1424 ------------------------------------------------------------------------- */
1425 
reverse_comm_pair(Pair * pair)1426 void CommTiled::reverse_comm_pair(Pair *pair)
1427 {
1428   int i,irecv,n,nsend,nrecv;
1429 
1430   int nsize = MAX(pair->comm_reverse,pair->comm_reverse_off);
1431 
1432   for (int iswap = nswap-1; iswap >= 0; iswap--) {
1433     nsend = nsendproc[iswap] - sendself[iswap];
1434     nrecv = nrecvproc[iswap] - sendself[iswap];
1435 
1436     if (sendother[iswap]) {
1437       for (i = 0; i < nsend; i++)
1438         MPI_Irecv(&buf_recv[nsize*reverse_recv_offset[iswap][i]],
1439                   nsize*sendnum[iswap][i],MPI_DOUBLE,
1440                   sendproc[iswap][i],0,world,&requests[i]);
1441     }
1442     if (recvother[iswap]) {
1443       for (i = 0; i < nrecv; i++) {
1444         n = pair->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i],
1445                                     buf_send);
1446         MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
1447       }
1448     }
1449     if (sendself[iswap]) {
1450       pair->pack_reverse_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1451                               buf_send);
1452       pair->unpack_reverse_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1453                                 buf_send);
1454     }
1455     if (sendother[iswap]) {
1456       for (i = 0; i < nsend; i++) {
1457         MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
1458         pair->unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv],
1459                                   &buf_recv[nsize*
1460                                             reverse_recv_offset[iswap][irecv]]);
1461       }
1462     }
1463   }
1464 }
1465 
1466 /* ----------------------------------------------------------------------
1467    forward communication invoked by a Fix
1468    size/nsize used only to set recv buffer limit
1469    size = 0 (default) -> use comm_forward from Fix
1470    size > 0 -> Fix passes max size per atom
1471    the latter is only useful if Fix does several comm modes,
1472      some are smaller than max stored in its comm_forward
1473 ------------------------------------------------------------------------- */
1474 
forward_comm_fix(Fix * fix,int size)1475 void CommTiled::forward_comm_fix(Fix *fix, int size)
1476 {
1477   int i,irecv,n,nsize,nsend,nrecv;
1478 
1479   if (size) nsize = size;
1480   else nsize = fix->comm_forward;
1481 
1482   for (int iswap = 0; iswap < nswap; iswap++) {
1483     nsend = nsendproc[iswap] - sendself[iswap];
1484     nrecv = nrecvproc[iswap] - sendself[iswap];
1485 
1486     if (recvother[iswap]) {
1487       for (i = 0; i < nrecv; i++)
1488         MPI_Irecv(&buf_recv[nsize*forward_recv_offset[iswap][i]],
1489                   nsize*recvnum[iswap][i],
1490                   MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
1491     }
1492     if (sendother[iswap]) {
1493       for (i = 0; i < nsend; i++) {
1494         n = fix->pack_forward_comm(sendnum[iswap][i],sendlist[iswap][i],
1495                                    buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
1496         MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
1497       }
1498     }
1499     if (sendself[iswap]) {
1500       fix->pack_forward_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1501                              buf_send,pbc_flag[iswap][nsend],
1502                              pbc[iswap][nsend]);
1503       fix->unpack_forward_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1504                                buf_send);
1505     }
1506     if (recvother[iswap]) {
1507       for (i = 0; i < nrecv; i++) {
1508         MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
1509         fix->unpack_forward_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
1510                                  &buf_recv[nsize*
1511                                            forward_recv_offset[iswap][irecv]]);
1512       }
1513     }
1514   }
1515 }
1516 
1517 /* ----------------------------------------------------------------------
1518    reverse communication invoked by a Fix
1519    size/nsize used only to set recv buffer limit
1520    size = 0 (default) -> use comm_forward from Fix
1521    size > 0 -> Fix passes max size per atom
1522    the latter is only useful if Fix does several comm modes,
1523      some are smaller than max stored in its comm_forward
1524 ------------------------------------------------------------------------- */
1525 
reverse_comm_fix(Fix * fix,int size)1526 void CommTiled::reverse_comm_fix(Fix *fix, int size)
1527 {
1528   int i,irecv,n,nsize,nsend,nrecv;
1529 
1530   if (size) nsize = size;
1531   else nsize = fix->comm_reverse;
1532 
1533   for (int iswap = nswap-1; iswap >= 0; iswap--) {
1534     nsend = nsendproc[iswap] - sendself[iswap];
1535     nrecv = nrecvproc[iswap] - sendself[iswap];
1536 
1537     if (sendother[iswap]) {
1538       for (i = 0; i < nsend; i++)
1539         MPI_Irecv(&buf_recv[nsize*reverse_recv_offset[iswap][i]],
1540                   nsize*sendnum[iswap][i],MPI_DOUBLE,
1541                   sendproc[iswap][i],0,world,&requests[i]);
1542     }
1543     if (recvother[iswap]) {
1544       for (i = 0; i < nrecv; i++) {
1545         n = fix->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i],
1546                                    buf_send);
1547         MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
1548       }
1549     }
1550     if (sendself[iswap]) {
1551       fix->pack_reverse_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1552                              buf_send);
1553       fix->unpack_reverse_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1554                                buf_send);
1555     }
1556     if (sendother[iswap]) {
1557       for (i = 0; i < nsend; i++) {
1558         MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
1559         fix->unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv],
1560                                  &buf_recv[nsize*
1561                                            reverse_recv_offset[iswap][irecv]]);
1562       }
1563     }
1564   }
1565 }
1566 
1567 /* ----------------------------------------------------------------------
1568    reverse communication invoked by a Fix with variable size data
1569    query fix for all pack sizes to insure buf_send is big enough
1570    handshake sizes before irregular comm to insure buf_recv is big enough
1571    NOTE: how to setup one big buf recv with correct offsets ??
1572 ------------------------------------------------------------------------- */
1573 
reverse_comm_fix_variable(Fix *)1574 void CommTiled::reverse_comm_fix_variable(Fix * /*fix*/)
1575 {
1576   error->all(FLERR,"Reverse comm fix variable not yet supported by CommTiled");
1577 }
1578 
1579 /* ----------------------------------------------------------------------
1580    forward communication invoked by a Compute
1581    nsize used only to set recv buffer limit
1582 ------------------------------------------------------------------------- */
1583 
forward_comm_compute(Compute * compute)1584 void CommTiled::forward_comm_compute(Compute *compute)
1585 {
1586   int i,irecv,n,nsend,nrecv;
1587 
1588   int nsize = compute->comm_forward;
1589 
1590   for (int iswap = 0; iswap < nswap; iswap++) {
1591     nsend = nsendproc[iswap] - sendself[iswap];
1592     nrecv = nrecvproc[iswap] - sendself[iswap];
1593 
1594     if (recvother[iswap]) {
1595       for (i = 0; i < nrecv; i++)
1596         MPI_Irecv(&buf_recv[nsize*forward_recv_offset[iswap][i]],
1597                   nsize*recvnum[iswap][i],
1598                   MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
1599     }
1600     if (sendother[iswap]) {
1601       for (i = 0; i < nsend; i++) {
1602         n = compute->pack_forward_comm(sendnum[iswap][i],sendlist[iswap][i],
1603                                        buf_send,pbc_flag[iswap][i],
1604                                        pbc[iswap][i]);
1605         MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
1606       }
1607     }
1608     if (sendself[iswap]) {
1609       compute->pack_forward_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1610                                  buf_send,pbc_flag[iswap][nsend],
1611                                  pbc[iswap][nsend]);
1612       compute->unpack_forward_comm(recvnum[iswap][nrecv],
1613                                    firstrecv[iswap][nrecv],buf_send);
1614     }
1615     if (recvother[iswap]) {
1616       for (i = 0; i < nrecv; i++) {
1617         MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
1618         compute->
1619           unpack_forward_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
1620                               &buf_recv[nsize*
1621                                         forward_recv_offset[iswap][irecv]]);
1622       }
1623     }
1624   }
1625 }
1626 
1627 /* ----------------------------------------------------------------------
1628    reverse communication invoked by a Compute
1629    nsize used only to set recv buffer limit
1630 ------------------------------------------------------------------------- */
1631 
reverse_comm_compute(Compute * compute)1632 void CommTiled::reverse_comm_compute(Compute *compute)
1633 {
1634   int i,irecv,n,nsend,nrecv;
1635 
1636   int nsize = compute->comm_reverse;
1637 
1638   for (int iswap = nswap-1; iswap >= 0; iswap--) {
1639     nsend = nsendproc[iswap] - sendself[iswap];
1640     nrecv = nrecvproc[iswap] - sendself[iswap];
1641 
1642     if (sendother[iswap]) {
1643       for (i = 0; i < nsend; i++)
1644         MPI_Irecv(&buf_recv[nsize*reverse_recv_offset[iswap][i]],
1645                   nsize*sendnum[iswap][i],MPI_DOUBLE,
1646                   sendproc[iswap][i],0,world,&requests[i]);
1647     }
1648     if (recvother[iswap]) {
1649       for (i = 0; i < nrecv; i++) {
1650         n = compute->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i],
1651                                        buf_send);
1652         MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
1653       }
1654     }
1655     if (sendself[iswap]) {
1656       compute->pack_reverse_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1657                                  buf_send);
1658       compute->unpack_reverse_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1659                                    buf_send);
1660     }
1661     if (sendother[iswap]) {
1662       for (i = 0; i < nsend; i++) {
1663         MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
1664         compute->
1665           unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv],
1666                               &buf_recv[nsize*
1667                                         reverse_recv_offset[iswap][irecv]]);
1668       }
1669     }
1670   }
1671 }
1672 
1673 /* ----------------------------------------------------------------------
1674    forward communication invoked by a Dump
1675    nsize used only to set recv buffer limit
1676 ------------------------------------------------------------------------- */
1677 
forward_comm_dump(Dump * dump)1678 void CommTiled::forward_comm_dump(Dump *dump)
1679 {
1680   int i,irecv,n,nsend,nrecv;
1681 
1682   int nsize = dump->comm_forward;
1683 
1684   for (int iswap = 0; iswap < nswap; iswap++) {
1685     nsend = nsendproc[iswap] - sendself[iswap];
1686     nrecv = nrecvproc[iswap] - sendself[iswap];
1687 
1688     if (recvother[iswap]) {
1689       for (i = 0; i < nrecv; i++)
1690         MPI_Irecv(&buf_recv[nsize*forward_recv_offset[iswap][i]],
1691                   nsize*recvnum[iswap][i],
1692                   MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
1693     }
1694     if (sendother[iswap]) {
1695       for (i = 0; i < nsend; i++) {
1696         n = dump->pack_forward_comm(sendnum[iswap][i],sendlist[iswap][i],
1697                                     buf_send,pbc_flag[iswap][i],
1698                                     pbc[iswap][i]);
1699         MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
1700       }
1701     }
1702     if (sendself[iswap]) {
1703       dump->pack_forward_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1704                               buf_send,pbc_flag[iswap][nsend],
1705                               pbc[iswap][nsend]);
1706       dump->unpack_forward_comm(recvnum[iswap][nrecv],
1707                                 firstrecv[iswap][nrecv],buf_send);
1708     }
1709     if (recvother[iswap]) {
1710       for (i = 0; i < nrecv; i++) {
1711         MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
1712         dump->unpack_forward_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
1713                                   &buf_recv[nsize*
1714                                             forward_recv_offset[iswap][irecv]]);
1715       }
1716     }
1717   }
1718 }
1719 
1720 /* ----------------------------------------------------------------------
1721    reverse communication invoked by a Dump
1722    nsize used only to set recv buffer limit
1723 ------------------------------------------------------------------------- */
1724 
reverse_comm_dump(Dump * dump)1725 void CommTiled::reverse_comm_dump(Dump *dump)
1726 {
1727   int i,irecv,n,nsend,nrecv;
1728 
1729   int nsize = dump->comm_reverse;
1730 
1731   for (int iswap = nswap-1; iswap >= 0; iswap--) {
1732     nsend = nsendproc[iswap] - sendself[iswap];
1733     nrecv = nrecvproc[iswap] - sendself[iswap];
1734 
1735     if (sendother[iswap]) {
1736       for (i = 0; i < nsend; i++)
1737         MPI_Irecv(&buf_recv[nsize*reverse_recv_offset[iswap][i]],
1738                   nsize*sendnum[iswap][i],MPI_DOUBLE,
1739                   sendproc[iswap][i],0,world,&requests[i]);
1740     }
1741     if (recvother[iswap]) {
1742       for (i = 0; i < nrecv; i++) {
1743         n = dump->pack_reverse_comm(recvnum[iswap][i],firstrecv[iswap][i],
1744                                     buf_send);
1745         MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
1746       }
1747     }
1748     if (sendself[iswap]) {
1749       dump->pack_reverse_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
1750                               buf_send);
1751       dump->unpack_reverse_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
1752                                 buf_send);
1753     }
1754     if (sendother[iswap]) {
1755       for (i = 0; i < nsend; i++) {
1756         MPI_Waitany(nsend,requests,&irecv,MPI_STATUS_IGNORE);
1757         dump->unpack_reverse_comm(sendnum[iswap][irecv],sendlist[iswap][irecv],
1758                                   &buf_recv[nsize*
1759                                             reverse_recv_offset[iswap][irecv]]);
1760       }
1761     }
1762   }
1763 }
1764 
1765 /* ----------------------------------------------------------------------
1766    forward communication of Nsize values in per-atom array
1767 ------------------------------------------------------------------------- */
1768 
forward_comm_array(int nsize,double ** array)1769 void CommTiled::forward_comm_array(int nsize, double **array)
1770 {
1771   int i,j,k,m,iatom,last,irecv,nsend,nrecv;
1772 
1773   // insure send/recv bufs are big enough for nsize
1774   // based on smaxone/rmaxall from most recent borders() invocation
1775 
1776   if (nsize > maxforward) {
1777     maxforward = nsize;
1778     if (maxforward*smaxone > maxsend) grow_send(maxforward*smaxone,0);
1779     if (maxforward*rmaxall > maxrecv) grow_recv(maxforward*rmaxall);
1780   }
1781 
1782   for (int iswap = 0; iswap < nswap; iswap++) {
1783     nsend = nsendproc[iswap] - sendself[iswap];
1784     nrecv = nrecvproc[iswap] - sendself[iswap];
1785 
1786     MPI_Barrier(world);
1787 
1788     if (recvother[iswap]) {
1789       for (i = 0; i < nrecv; i++)
1790         MPI_Irecv(&buf_recv[nsize*forward_recv_offset[iswap][i]],
1791                   nsize*recvnum[iswap][i],
1792                   MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
1793     }
1794     if (sendother[iswap]) {
1795       for (i = 0; i < nsend; i++) {
1796         m = 0;
1797         for (iatom = 0; iatom < sendnum[iswap][i]; iatom++) {
1798           j = sendlist[iswap][i][iatom];
1799           for (k = 0; k < nsize; k++)
1800             buf_send[m++] = array[j][k];
1801         }
1802         MPI_Send(buf_send,nsize*sendnum[iswap][i],
1803                  MPI_DOUBLE,sendproc[iswap][i],0,world);
1804       }
1805     }
1806     if (sendself[iswap]) {
1807       m = 0;
1808       for (iatom = 0; iatom < sendnum[iswap][nsend]; iatom++) {
1809         j = sendlist[iswap][nsend][iatom];
1810         for (k = 0; k < nsize; k++)
1811           buf_send[m++] = array[j][k];
1812       }
1813       m = 0;
1814       last = firstrecv[iswap][nrecv] + recvnum[iswap][nrecv];
1815       for (iatom = firstrecv[iswap][nrecv]; iatom < last; iatom++)
1816         for (k = 0; k < nsize; k++)
1817           array[iatom][k] = buf_send[m++];
1818     }
1819 
1820     if (recvother[iswap]) {
1821       for (i = 0; i < nrecv; i++) {
1822         MPI_Waitany(nrecv,requests,&irecv,MPI_STATUS_IGNORE);
1823         m = nsize*forward_recv_offset[iswap][irecv];
1824         last = firstrecv[iswap][irecv] + recvnum[iswap][irecv];
1825         for (iatom = firstrecv[iswap][irecv]; iatom < last; iatom++)
1826           for (k = 0; k < nsize; k++)
1827             array[iatom][k] = buf_recv[m++];
1828       }
1829     }
1830   }
1831 }
1832 
1833 /* ----------------------------------------------------------------------
1834    exchange info provided with all 6 stencil neighbors
1835    NOTE: this method is currently not used
1836 ------------------------------------------------------------------------- */
1837 
exchange_variable(int n,double *,double * &)1838 int CommTiled::exchange_variable(int n, double * /*inbuf*/, double *& /*outbuf*/)
1839 {
1840   int nrecv = n;
1841   return nrecv;
1842 }
1843 
1844 /* ----------------------------------------------------------------------
1845    determine overlap list of Noverlap procs the lo/hi box overlaps
1846    overlap = non-zero area in common between box and proc sub-domain
1847    box is owned by me and extends in dim
1848 ------------------------------------------------------------------------- */
1849 
box_drop_brick(int idim,double * lo,double * hi,int & indexme)1850 void CommTiled::box_drop_brick(int idim, double *lo, double *hi, int &indexme)
1851 {
1852   int dir;
1853   int index = -1;
1854 
1855   if (hi[idim] == sublo[idim]) {
1856     index = myloc[idim] - 1;
1857     dir = -1;
1858   } else if (lo[idim] == subhi[idim]) {
1859     index = myloc[idim] + 1;
1860     dir = 1;
1861   } else if (hi[idim] == boxhi[idim]) {
1862     index = procgrid[idim] - 1;
1863     dir = -1;
1864   } else if (lo[idim] == boxlo[idim]) {
1865     index = 0;
1866     dir = 1;
1867   } else error->one(FLERR,"Comm tiled mis-match in box drop brick");
1868 
1869   int other1,other2,proc;
1870   double lower,upper;
1871   double *split;
1872 
1873   if (idim == 0) {
1874     other1 = myloc[1]; other2 = myloc[2];
1875     split = xsplit;
1876   } else if (idim == 1) {
1877     other1 = myloc[0]; other2 = myloc[2];
1878     split = ysplit;
1879   } else {
1880     other1 = myloc[0]; other2 = myloc[1];
1881     split = zsplit;
1882   }
1883 
1884   if (index < 0 || index > procgrid[idim])
1885     error->one(FLERR,"Comm tiled invalid index in box drop brick");
1886 
1887   while (1) {
1888     lower = boxlo[idim] + prd[idim]*split[index];
1889     if (index < procgrid[idim]-1)
1890       upper = boxlo[idim] + prd[idim]*split[index+1];
1891     else upper = boxhi[idim];
1892     if (lower >= hi[idim] || upper <= lo[idim]) break;
1893 
1894     if (idim == 0) proc = grid2proc[index][other1][other2];
1895     else if (idim == 1) proc = grid2proc[other1][index][other2];
1896     else proc = grid2proc[other1][other2][index];
1897 
1898     if (noverlap == maxoverlap) {
1899       maxoverlap += DELTA_PROCS;
1900       memory->grow(overlap,maxoverlap,"comm:overlap");
1901     }
1902 
1903     if (proc == me) indexme = noverlap;
1904     overlap[noverlap++] = proc;
1905     index += dir;
1906     if (index < 0 || index >= procgrid[idim]) break;
1907   }
1908 }
1909 
1910 /* ----------------------------------------------------------------------
1911    determine overlap list of Noverlap procs the lo/hi box overlaps
1912    overlap = non-zero area in common between box and proc sub-domain
1913    recursive method for traversing an RCB tree of cuts
1914    no need to split lo/hi box as recurse b/c OK if box extends outside RCB box
1915 ------------------------------------------------------------------------- */
1916 
box_drop_tiled(int,double * lo,double * hi,int & indexme)1917 void CommTiled::box_drop_tiled(int /*idim*/, double *lo, double *hi, int &indexme)
1918 {
1919   box_drop_tiled_recurse(lo,hi,0,nprocs-1,indexme);
1920 }
1921 
box_drop_tiled_recurse(double * lo,double * hi,int proclower,int procupper,int & indexme)1922 void CommTiled::box_drop_tiled_recurse(double *lo, double *hi,
1923                                        int proclower, int procupper,
1924                                        int &indexme)
1925 {
1926   // end recursion when partition is a single proc
1927   // add proc to overlap list
1928 
1929   if (proclower == procupper) {
1930     if (noverlap == maxoverlap) {
1931       maxoverlap += DELTA_PROCS;
1932       memory->grow(overlap,maxoverlap,"comm:overlap");
1933     }
1934 
1935     if (proclower == me) indexme = noverlap;
1936     overlap[noverlap++] = proclower;
1937     return;
1938   }
1939 
1940   // drop box on each side of cut it extends beyond
1941   // use > and < criteria so does not include a box it only touches
1942   // procmid = 1st processor in upper half of partition
1943   //         = location in tree that stores this cut
1944   // dim = 0,1,2 dimension of cut
1945   // cut = position of cut
1946 
1947   int procmid = proclower + (procupper - proclower) / 2 + 1;
1948   int idim = rcbinfo[procmid].dim;
1949   double cut = boxlo[idim] + prd[idim]*rcbinfo[procmid].cutfrac;
1950 
1951   if (lo[idim] < cut)
1952     box_drop_tiled_recurse(lo,hi,proclower,procmid-1,indexme);
1953   if (hi[idim] > cut)
1954     box_drop_tiled_recurse(lo,hi,procmid,procupper,indexme);
1955 }
1956 
1957 /* ----------------------------------------------------------------------
1958    return other box owned by proc as lo/hi corner pts
1959 ------------------------------------------------------------------------- */
1960 
box_other_brick(int idim,int idir,int proc,double * lo,double * hi)1961 void CommTiled::box_other_brick(int idim, int idir,
1962                                 int proc, double *lo, double *hi)
1963 {
1964   lo[0] = sublo[0]; lo[1] = sublo[1]; lo[2] = sublo[2];
1965   hi[0] = subhi[0]; hi[1] = subhi[1]; hi[2] = subhi[2];
1966 
1967   int other1,other2,oproc;
1968   double *split;
1969 
1970   if (idim == 0) {
1971     other1 = myloc[1]; other2 = myloc[2];
1972     split = xsplit;
1973   } else if (idim == 1) {
1974     other1 = myloc[0]; other2 = myloc[2];
1975     split = ysplit;
1976   } else {
1977     other1 = myloc[0]; other2 = myloc[1];
1978     split = zsplit;
1979   }
1980 
1981   int dir = -1;
1982   if (idir) dir = 1;
1983   int index = myloc[idim];
1984   int n = procgrid[idim];
1985 
1986   for (int i = 0; i < n; i++) {
1987     index += dir;
1988     if (index < 0) index = n-1;
1989     else if (index >= n) index = 0;
1990 
1991     if (idim == 0) oproc = grid2proc[index][other1][other2];
1992     else if (idim == 1) oproc = grid2proc[other1][index][other2];
1993     else oproc = grid2proc[other1][other2][index];
1994 
1995     if (proc == oproc) {
1996       lo[idim] = boxlo[idim] + prd[idim]*split[index];
1997       if (split[index+1] < 1.0)
1998         hi[idim] = boxlo[idim] + prd[idim]*split[index+1];
1999       else hi[idim] = boxhi[idim];
2000       return;
2001     }
2002   }
2003 }
2004 
2005 /* ----------------------------------------------------------------------
2006    return other box owned by proc as lo/hi corner pts
2007 ------------------------------------------------------------------------- */
2008 
box_other_tiled(int,int,int proc,double * lo,double * hi)2009 void CommTiled::box_other_tiled(int /*idim*/, int /*idir*/,
2010                                 int proc, double *lo, double *hi)
2011 {
2012   double (*split)[2] = rcbinfo[proc].mysplit;
2013 
2014   lo[0] = boxlo[0] + prd[0]*split[0][0];
2015   if (split[0][1] < 1.0) hi[0] = boxlo[0] + prd[0]*split[0][1];
2016   else hi[0] = boxhi[0];
2017 
2018   lo[1] = boxlo[1] + prd[1]*split[1][0];
2019   if (split[1][1] < 1.0) hi[1] = boxlo[1] + prd[1]*split[1][1];
2020   else hi[1] = boxhi[1];
2021 
2022   lo[2] = boxlo[2] + prd[2]*split[2][0];
2023   if (split[2][1] < 1.0) hi[2] = boxlo[2] + prd[2]*split[2][1];
2024   else hi[2] = boxhi[2];
2025 }
2026 
2027 /* ----------------------------------------------------------------------
2028    return 1 if proc's box touches me, else 0
2029    procneigh stores 6 procs that touch me
2030 ------------------------------------------------------------------------- */
2031 
box_touch_brick(int proc,int idim,int idir)2032 int CommTiled::box_touch_brick(int proc, int idim, int idir)
2033 {
2034   if (procneigh[idim][idir] == proc) return 1;
2035   return 0;
2036 }
2037 
2038 /* ----------------------------------------------------------------------
2039    return 1 if proc's box touches me, else 0
2040 ------------------------------------------------------------------------- */
2041 
box_touch_tiled(int proc,int idim,int idir)2042 int CommTiled::box_touch_tiled(int proc, int idim, int idir)
2043 {
2044   // sending to left
2045   // only touches if proc hi = my lo, or if proc hi = boxhi and my lo = boxlo
2046 
2047   if (idir == 0) {
2048     if (rcbinfo[proc].mysplit[idim][1] == rcbinfo[me].mysplit[idim][0])
2049       return 1;
2050     else if (rcbinfo[proc].mysplit[idim][1] == 1.0 &&
2051              rcbinfo[me].mysplit[idim][0] == 0.0)
2052       return 1;
2053 
2054   // sending to right
2055   // only touches if proc lo = my hi, or if proc lo = boxlo and my hi = boxhi
2056 
2057   } else {
2058     if (rcbinfo[proc].mysplit[idim][0] == rcbinfo[me].mysplit[idim][1])
2059       return 1;
2060     else if (rcbinfo[proc].mysplit[idim][0] == 0.0 &&
2061              rcbinfo[me].mysplit[idim][1] == 1.0)
2062       return 1;
2063   }
2064 
2065   return 0;
2066 }
2067 
2068 /* ----------------------------------------------------------------------
2069 ------------------------------------------------------------------------- */
2070 
point_drop_brick(int idim,double * x)2071 int CommTiled::point_drop_brick(int idim, double *x)
2072 {
2073   if (closer_subbox_edge(idim,x)) return procneigh[idim][1];
2074   return procneigh[idim][0];
2075 }
2076 
2077 /* ----------------------------------------------------------------------
2078    determine which proc owns point x via recursion thru RCB tree
2079 ------------------------------------------------------------------------- */
2080 
point_drop_tiled(int idim,double * x)2081 int CommTiled::point_drop_tiled(int idim, double *x)
2082 {
2083   double xnew[3];
2084   xnew[0] = x[0]; xnew[1] = x[1]; xnew[2] = x[2];
2085 
2086   if (idim == 0) {
2087     if (xnew[1] < sublo[1] || xnew[1] > subhi[1]) {
2088       if (closer_subbox_edge(1,x)) xnew[1] = subhi[1];
2089       else xnew[1] = sublo[1];
2090     }
2091   }
2092   if (idim <= 1) {
2093     if (xnew[2] < sublo[2] || xnew[2] > subhi[2]) {
2094       if (closer_subbox_edge(2,x)) xnew[2] = subhi[2];
2095       else xnew[2] = sublo[2];
2096     }
2097   }
2098 
2099   int proc = point_drop_tiled_recurse(xnew,0,nprocs-1);
2100   if (proc == me) return me;
2101 
2102   if (idim == 0) {
2103     int done = 1;
2104     if (rcbinfo[proc].mysplit[1][0] == rcbinfo[me].mysplit[1][1]) {
2105       xnew[1] -= EPSILON * (subhi[1]-sublo[1]);
2106       done = 0;
2107     }
2108     if (rcbinfo[proc].mysplit[2][0] == rcbinfo[me].mysplit[2][1]) {
2109       xnew[2] -= EPSILON * (subhi[2]-sublo[2]);
2110       done = 0;
2111     }
2112     if (!done) {
2113       proc = point_drop_tiled_recurse(xnew,0,nprocs-1);
2114       done = 1;
2115       if (rcbinfo[proc].mysplit[1][0] == rcbinfo[me].mysplit[1][1]) {
2116         xnew[1] -= EPSILON * (subhi[1]-sublo[1]);
2117         done = 0;
2118       }
2119       if (rcbinfo[proc].mysplit[2][0] == rcbinfo[me].mysplit[2][1]) {
2120         xnew[2] -= EPSILON * (subhi[2]-sublo[2]);
2121         done = 0;
2122       }
2123       if (!done) proc = point_drop_tiled_recurse(xnew,0,nprocs-1);
2124     }
2125   } else if (idim == 1) {
2126     if (rcbinfo[proc].mysplit[2][0] == rcbinfo[me].mysplit[2][1]) {
2127       xnew[2] -= EPSILON * (subhi[2]-sublo[2]);
2128       proc = point_drop_tiled_recurse(xnew,0,nprocs-1);
2129     }
2130   }
2131 
2132   return proc;
2133 }
2134 
2135 /* ----------------------------------------------------------------------
2136    recursive point drop thru RCB tree
2137 ------------------------------------------------------------------------- */
2138 
point_drop_tiled_recurse(double * x,int proclower,int procupper)2139 int CommTiled::point_drop_tiled_recurse(double *x,
2140                                         int proclower, int procupper)
2141 {
2142   // end recursion when partition is a single proc
2143   // return proc
2144 
2145   if (proclower == procupper) return proclower;
2146 
2147   // drop point on side of cut it is on
2148   // use < criterion so point is not on high edge of proc sub-domain
2149   // procmid = 1st processor in upper half of partition
2150   //         = location in tree that stores this cut
2151   // dim = 0,1,2 dimension of cut
2152   // cut = position of cut
2153 
2154   int procmid = proclower + (procupper - proclower) / 2 + 1;
2155   int idim = rcbinfo[procmid].dim;
2156   double cut = boxlo[idim] + prd[idim]*rcbinfo[procmid].cutfrac;
2157 
2158   if (x[idim] < cut) return point_drop_tiled_recurse(x,proclower,procmid-1);
2159   else return point_drop_tiled_recurse(x,procmid,procupper);
2160 }
2161 
2162 /* ----------------------------------------------------------------------
2163    assume x[idim] is outside subbox bounds in same dim
2164 ------------------------------------------------------------------------- */
2165 
closer_subbox_edge(int idim,double * x)2166 int CommTiled::closer_subbox_edge(int idim, double *x)
2167 {
2168   double deltalo,deltahi;
2169 
2170   if (sublo[idim] == boxlo[idim])
2171     deltalo = fabs(x[idim]-prd[idim] - sublo[idim]);
2172   else deltalo = fabs(x[idim] - sublo[idim]);
2173 
2174   if (subhi[idim] == boxhi[idim])
2175     deltahi = fabs(x[idim]+prd[idim] - subhi[idim]);
2176   else deltahi = fabs(x[idim] - subhi[idim]);
2177 
2178   if (deltalo < deltahi) return 0;
2179   return 1;
2180 }
2181 
2182 /* ----------------------------------------------------------------------
2183    if RCB decomp exists and just changed, gather needed global RCB info
2184 ------------------------------------------------------------------------- */
2185 
coord2proc_setup()2186 void CommTiled::coord2proc_setup()
2187 {
2188   if (!rcbnew) return;
2189 
2190   if (!rcbinfo)
2191     rcbinfo = (RCBinfo *)
2192       memory->smalloc(nprocs*sizeof(RCBinfo),"comm:rcbinfo");
2193   rcbnew = 0;
2194   RCBinfo rcbone;
2195   memcpy(&rcbone.mysplit[0][0],&mysplit[0][0],6*sizeof(double));
2196   rcbone.cutfrac = rcbcutfrac;
2197   rcbone.dim = rcbcutdim;
2198   MPI_Allgather(&rcbone,sizeof(RCBinfo),MPI_CHAR,
2199                 rcbinfo,sizeof(RCBinfo),MPI_CHAR,world);
2200 }
2201 
2202 /* ----------------------------------------------------------------------
2203    determine which proc owns atom with coord x[3] based on current decomp
2204    x will be in box (orthogonal) or lamda coords (triclinic)
2205    if layout = UNIFORM or NONUNIFORM, invoke parent method
2206    if layout = TILED, use point_drop_recurse()
2207    return owning proc ID, ignore igx,igy,igz
2208 ------------------------------------------------------------------------- */
2209 
coord2proc(double * x,int & igx,int & igy,int & igz)2210 int CommTiled::coord2proc(double *x, int &igx, int &igy, int &igz)
2211 {
2212   if (layout != Comm::LAYOUT_TILED) return Comm::coord2proc(x,igx,igy,igz);
2213   return point_drop_tiled_recurse(x,0,nprocs-1);
2214 }
2215 
2216 /* ----------------------------------------------------------------------
2217    realloc the size of the send buffer as needed with BUFFACTOR and bufextra
2218    flag = 0, don't need to realloc with copy, just free/malloc w/ BUFFACTOR
2219    flag = 1, realloc with BUFFACTOR
2220    flag = 2, free/malloc w/out BUFFACTOR
2221 ------------------------------------------------------------------------- */
2222 
grow_send(int n,int flag)2223 void CommTiled::grow_send(int n, int flag)
2224 {
2225   if (flag == 0) {
2226     maxsend = static_cast<int> (BUFFACTOR * n);
2227     memory->destroy(buf_send);
2228     memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
2229   } else if (flag == 1) {
2230     maxsend = static_cast<int> (BUFFACTOR * n);
2231     memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
2232   } else {
2233     memory->destroy(buf_send);
2234     memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
2235   }
2236 }
2237 
2238 /* ----------------------------------------------------------------------
2239    free/malloc the size of the recv buffer as needed with BUFFACTOR
2240 ------------------------------------------------------------------------- */
2241 
grow_recv(int n)2242 void CommTiled::grow_recv(int n)
2243 {
2244   maxrecv = static_cast<int> (BUFFACTOR * n);
2245   memory->destroy(buf_recv);
2246   memory->create(buf_recv,maxrecv,"comm:buf_recv");
2247 }
2248 
2249 /* ----------------------------------------------------------------------
2250    realloc the size of the iswap sendlist as needed with BUFFACTOR
2251 ------------------------------------------------------------------------- */
2252 
grow_list(int iswap,int iwhich,int n)2253 void CommTiled::grow_list(int iswap, int iwhich, int n)
2254 {
2255   maxsendlist[iswap][iwhich] = static_cast<int> (BUFFACTOR * n);
2256   memory->grow(sendlist[iswap][iwhich],maxsendlist[iswap][iwhich],
2257                "comm:sendlist[i][j]");
2258 }
2259 
2260 /* ----------------------------------------------------------------------
2261    allocation of swap info
2262 ------------------------------------------------------------------------- */
2263 
allocate_swap(int n)2264 void CommTiled::allocate_swap(int n)
2265 {
2266   nsendproc = new int[n];
2267   nrecvproc = new int[n];
2268   sendother = new int[n];
2269   recvother = new int[n];
2270   sendself = new int[n];
2271   nprocmax = new int[n];
2272 
2273   sendproc = new int*[n];
2274   recvproc = new int*[n];
2275   sendnum = new int*[n];
2276   recvnum = new int*[n];
2277   size_forward_recv = new int*[n];
2278   firstrecv = new int*[n];
2279   size_reverse_send = new int*[n];
2280   size_reverse_recv = new int*[n];
2281   forward_recv_offset = new int*[n];
2282   reverse_recv_offset = new int*[n];
2283 
2284   pbc_flag = new int*[n];
2285   pbc = new int**[n];
2286   sendbox = new double**[n];
2287   sendbox_multi = new double***[n];
2288   sendbox_multiold = new double***[n];
2289   maxsendlist = new int*[n];
2290   sendlist = new int**[n];
2291 
2292   for (int i = 0; i < n; i++) {
2293     sendproc[i] = recvproc[i] = nullptr;
2294     sendnum[i] = recvnum[i] = nullptr;
2295     size_forward_recv[i] = firstrecv[i] = nullptr;
2296     size_reverse_send[i] = size_reverse_recv[i] = nullptr;
2297     forward_recv_offset[i] = reverse_recv_offset[i] = nullptr;
2298 
2299     pbc_flag[i] = nullptr;
2300     pbc[i] = nullptr;
2301     sendbox[i] = nullptr;
2302     sendbox_multi[i] = nullptr;
2303     sendbox_multiold[i] = nullptr;
2304     maxsendlist[i] = nullptr;
2305     sendlist[i] = nullptr;
2306   }
2307 
2308   maxrequest = 0;
2309   requests = nullptr;
2310 
2311   for (int i = 0; i < n; i++) {
2312     nprocmax[i] = DELTA_PROCS;
2313     grow_swap_send(i,DELTA_PROCS,0);
2314     grow_swap_recv(i,DELTA_PROCS);
2315   }
2316 
2317   nexchproc = new int[n/2];
2318   nexchprocmax = new int[n/2];
2319   exchproc = new int*[n/2];
2320   exchnum = new int*[n/2];
2321 
2322   for (int i = 0; i < n/2; i++) {
2323     nexchprocmax[i] = DELTA_PROCS;
2324     exchproc[i] = new int[DELTA_PROCS];
2325     exchnum[i] = new int[DELTA_PROCS];
2326   }
2327 }
2328 
2329 /* ----------------------------------------------------------------------
2330    grow info for swap I, to allow for N procs to communicate with
2331    ditto for complementary recv for swap I+1 or I-1, as invoked by caller
2332 ------------------------------------------------------------------------- */
2333 
grow_swap_send(int i,int n,int nold)2334 void CommTiled::grow_swap_send(int i, int n, int nold)
2335 {
2336   delete [] sendproc[i];
2337   sendproc[i] = new int[n];
2338   delete [] sendnum[i];
2339   sendnum[i] = new int[n];
2340 
2341   delete [] size_reverse_recv[i];
2342   size_reverse_recv[i] = new int[n];
2343   delete [] reverse_recv_offset[i];
2344   reverse_recv_offset[i] = new int[n];
2345 
2346   delete [] pbc_flag[i];
2347   pbc_flag[i] = new int[n];
2348   memory->destroy(pbc[i]);
2349   memory->create(pbc[i],n,6,"comm:pbc_flag");
2350   memory->destroy(sendbox[i]);
2351   memory->create(sendbox[i],n,6,"comm:sendbox");
2352   grow_swap_send_multi(i,n);
2353   memory->destroy(sendbox_multiold[i]);
2354   memory->create(sendbox_multiold[i],n,atom->ntypes+1,6,"comm:sendbox_multiold");
2355 
2356   delete [] maxsendlist[i];
2357   maxsendlist[i] = new int[n];
2358 
2359   for (int j = 0; j < nold; j++) memory->destroy(sendlist[i][j]);
2360   delete [] sendlist[i];
2361   sendlist[i] = new int*[n];
2362   for (int j = 0; j < n; j++) {
2363     maxsendlist[i][j] = BUFMIN;
2364     memory->create(sendlist[i][j],BUFMIN,"comm:sendlist[i][j]");
2365   }
2366 }
2367 
grow_swap_recv(int i,int n)2368 void CommTiled::grow_swap_recv(int i, int n)
2369 {
2370   delete [] recvproc[i];
2371   recvproc[i] = new int[n];
2372   delete [] recvnum[i];
2373   recvnum[i] = new int[n];
2374 
2375   delete [] size_forward_recv[i];
2376   size_forward_recv[i] = new int[n];
2377   delete [] firstrecv[i];
2378   firstrecv[i] = new int[n];
2379   delete [] forward_recv_offset[i];
2380   forward_recv_offset[i] = new int[n];
2381 
2382   delete [] size_reverse_send[i];
2383   size_reverse_send[i] = new int[n];
2384 }
2385 
2386 
2387 /* ----------------------------------------------------------------------
2388    grow info for swap I for multi as ncollections can change
2389 ------------------------------------------------------------------------- */
2390 
grow_swap_send_multi(int i,int n)2391 void CommTiled::grow_swap_send_multi(int i, int n)
2392 {
2393   memory->destroy(sendbox_multi[i]);
2394 
2395   if (ncollections > 0)
2396     memory->create(sendbox_multi[i],n,ncollections,6,"comm:sendbox_multi");
2397 }
2398 
2399 /* ----------------------------------------------------------------------
2400    deallocate swap info
2401 ------------------------------------------------------------------------- */
2402 
deallocate_swap(int n)2403 void CommTiled::deallocate_swap(int n)
2404 {
2405   delete [] nsendproc;
2406   delete [] nrecvproc;
2407   delete [] sendother;
2408   delete [] recvother;
2409   delete [] sendself;
2410 
2411   for (int i = 0; i < n; i++) {
2412     delete [] sendproc[i];
2413     delete [] recvproc[i];
2414     delete [] sendnum[i];
2415     delete [] recvnum[i];
2416     delete [] size_forward_recv[i];
2417     delete [] firstrecv[i];
2418     delete [] size_reverse_send[i];
2419     delete [] size_reverse_recv[i];
2420     delete [] forward_recv_offset[i];
2421     delete [] reverse_recv_offset[i];
2422 
2423     delete [] pbc_flag[i];
2424     memory->destroy(pbc[i]);
2425     memory->destroy(sendbox[i]);
2426     memory->destroy(sendbox_multi[i]);
2427     memory->destroy(sendbox_multiold[i]);
2428 
2429     delete [] maxsendlist[i];
2430 
2431     for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);
2432     delete [] sendlist[i];
2433   }
2434 
2435   delete [] sendproc;
2436   delete [] recvproc;
2437   delete [] sendnum;
2438   delete [] recvnum;
2439   delete [] size_forward_recv;
2440   delete [] firstrecv;
2441   delete [] size_reverse_send;
2442   delete [] size_reverse_recv;
2443   delete [] forward_recv_offset;
2444   delete [] reverse_recv_offset;
2445 
2446   delete [] pbc_flag;
2447   delete [] pbc;
2448   delete [] sendbox;
2449   delete [] sendbox_multi;
2450   delete [] sendbox_multiold;
2451   delete [] maxsendlist;
2452   delete [] sendlist;
2453 
2454   delete [] requests;
2455 
2456   delete [] nprocmax;
2457 
2458   delete [] nexchproc;
2459   delete [] nexchprocmax;
2460 
2461   for (int i = 0; i < n/2; i++) {
2462     delete [] exchproc[i];
2463     delete [] exchnum[i];
2464   }
2465 
2466   delete [] exchproc;
2467   delete [] exchnum;
2468 }
2469 
2470 /* ----------------------------------------------------------------------
2471    return # of bytes of allocated memory
2472 ------------------------------------------------------------------------- */
2473 
memory_usage()2474 double CommTiled::memory_usage()
2475 {
2476   double bytes = 0;
2477   return bytes;
2478 }
2479