1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author for: Emile Maras (CEA, France)
17      new options for inter-replica forces, first/last replica treatment
18 ------------------------------------------------------------------------- */
19 
20 #include "fix_neb.h"
21 
22 #include "atom.h"
23 #include "comm.h"
24 #include "compute.h"
25 #include "domain.h"
26 #include "error.h"
27 #include "group.h"
28 #include "math_const.h"
29 #include "memory.h"
30 #include "modify.h"
31 #include "universe.h"
32 #include "update.h"
33 
34 #include <cmath>
35 #include <cstring>
36 
37 using namespace LAMMPS_NS;
38 using namespace FixConst;
39 using namespace MathConst;
40 
41 enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC};
42 
43 #define BUFSIZE 8
44 
45 /* ---------------------------------------------------------------------- */
46 
FixNEB(LAMMPS * lmp,int narg,char ** arg)47 FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
48   Fix(lmp, narg, arg),
49   id_pe(nullptr), pe(nullptr), nlenall(nullptr), xprev(nullptr), xnext(nullptr),
50   fnext(nullptr), springF(nullptr), tangent(nullptr), xsend(nullptr), xrecv(nullptr),
51   fsend(nullptr), frecv(nullptr), tagsend(nullptr), tagrecv(nullptr),
52   xsendall(nullptr), xrecvall(nullptr), fsendall(nullptr), frecvall(nullptr),
53   tagsendall(nullptr), tagrecvall(nullptr), counts(nullptr),
54   displacements(nullptr)
55 {
56 
57   if (narg < 4) error->all(FLERR,"Illegal fix neb command");
58 
59   kspring = utils::numeric(FLERR,arg[3],false,lmp);
60   if (kspring <= 0.0) error->all(FLERR,"Illegal fix neb command");
61 
62   // optional params
63 
64   NEBLongRange = false;
65   StandardNEB = true;
66   PerpSpring = FreeEndIni = FreeEndFinal = false;
67   FreeEndFinalWithRespToEIni = FinalAndInterWithRespToEIni = false;
68   kspringPerp = 0.0;
69   kspringIni = 1.0;
70   kspringFinal = 1.0;
71 
72   int iarg = 4;
73   while (iarg < narg) {
74     if (strcmp(arg[iarg],"parallel") == 0) {
75       if (iarg+2 > narg) error->all(FLERR,"Illegal fix neb command");
76       if (strcmp(arg[iarg+1],"ideal") == 0) {
77         NEBLongRange = true;
78         StandardNEB = false;
79       } else if (strcmp(arg[iarg+1],"neigh") == 0) {
80         NEBLongRange = false;
81         StandardNEB = true;
82       } else error->all(FLERR,"Illegal fix neb command");
83       iarg += 2;
84 
85     } else if (strcmp(arg[iarg],"perp") == 0) {
86       if (iarg+2 > narg) error->all(FLERR,"Illegal fix neb command");
87       PerpSpring = true;
88       kspringPerp = utils::numeric(FLERR,arg[iarg+1],false,lmp);
89       if (kspringPerp == 0.0) PerpSpring = false;
90       if (kspringPerp < 0.0) error->all(FLERR,"Illegal fix neb command");
91       iarg += 2;
92 
93     } else if (strcmp (arg[iarg],"end") == 0) {
94       if (iarg+3 > narg) error->all(FLERR,"Illegal fix neb command");
95       if (strcmp(arg[iarg+1],"first") == 0) {
96         FreeEndIni = true;
97         kspringIni = utils::numeric(FLERR,arg[iarg+2],false,lmp);
98       } else if (strcmp(arg[iarg+1],"last") == 0) {
99         FreeEndFinal = true;
100         FinalAndInterWithRespToEIni = false;
101         FreeEndFinalWithRespToEIni = false;
102         kspringFinal = utils::numeric(FLERR,arg[iarg+2],false,lmp);
103       } else if (strcmp(arg[iarg+1],"last/efirst") == 0) {
104         FreeEndFinal = false;
105         FinalAndInterWithRespToEIni = false;
106         FreeEndFinalWithRespToEIni = true;
107         kspringFinal = utils::numeric(FLERR,arg[iarg+2],false,lmp);
108       } else if (strcmp(arg[iarg+1],"last/efirst/middle") == 0) {
109         FreeEndFinal = false;
110         FinalAndInterWithRespToEIni = true;
111         FreeEndFinalWithRespToEIni = true;
112         kspringFinal = utils::numeric(FLERR,arg[iarg+2],false,lmp);
113       } else error->all(FLERR,"Illegal fix neb command");
114 
115       iarg += 3;
116 
117     } else error->all(FLERR,"Illegal fix neb command");
118   }
119 
120   // nreplica = number of partitions
121   // ireplica = which world I am in universe
122   // nprocs_universe = # of procs in all replicase
123   // procprev,procnext = root proc in adjacent replicas
124 
125   me = comm->me;
126   nprocs = comm->nprocs;
127 
128   nprocs_universe = universe->nprocs;
129   nreplica = universe->nworlds;
130   ireplica = universe->iworld;
131 
132   if (ireplica > 0) procprev = universe->root_proc[ireplica-1];
133   else procprev = -1;
134   if (ireplica < nreplica-1) procnext = universe->root_proc[ireplica+1];
135   else procnext = -1;
136 
137   uworld = universe->uworld;
138   int *iroots = new int[nreplica];
139   MPI_Group uworldgroup,rootgroup;
140   if (NEBLongRange) {
141     for (int i=0; i<nreplica; i++)
142       iroots[i] = universe->root_proc[i];
143     MPI_Comm_group(uworld, &uworldgroup);
144     MPI_Group_incl(uworldgroup, nreplica, iroots, &rootgroup);
145     MPI_Comm_create(uworld, rootgroup, &rootworld);
146   }
147   delete [] iroots;
148 
149   // create a new compute pe style
150   // id = fix-ID + pe, compute group = all
151 
152   std::string cmd = id + std::string("_pe");
153   id_pe = new char[cmd.size()+1];
154   strcpy(id_pe,cmd.c_str());
155   modify->add_compute(cmd + " all pe");
156 
157   // initialize local storage
158 
159   maxlocal = -1;
160   ntotal = -1;
161 }
162 
163 /* ---------------------------------------------------------------------- */
164 
~FixNEB()165 FixNEB::~FixNEB()
166 {
167   modify->delete_compute(id_pe);
168   delete [] id_pe;
169 
170   memory->destroy(xprev);
171   memory->destroy(xnext);
172   memory->destroy(tangent);
173   memory->destroy(fnext);
174   memory->destroy(springF);
175   memory->destroy(xsend);
176   memory->destroy(xrecv);
177   memory->destroy(fsend);
178   memory->destroy(frecv);
179   memory->destroy(tagsend);
180   memory->destroy(tagrecv);
181 
182   memory->destroy(xsendall);
183   memory->destroy(xrecvall);
184   memory->destroy(fsendall);
185   memory->destroy(frecvall);
186   memory->destroy(tagsendall);
187   memory->destroy(tagrecvall);
188 
189   memory->destroy(counts);
190   memory->destroy(displacements);
191 
192   if (NEBLongRange) {
193     if (rootworld != MPI_COMM_NULL) MPI_Comm_free(&rootworld);
194     memory->destroy(nlenall);
195   }
196 }
197 
198 /* ---------------------------------------------------------------------- */
199 
setmask()200 int FixNEB::setmask()
201 {
202   int mask = 0;
203   mask |= MIN_POST_FORCE;
204   return mask;
205 }
206 
207 /* ---------------------------------------------------------------------- */
208 
init()209 void FixNEB::init()
210 {
211   int icompute = modify->find_compute(id_pe);
212   if (icompute < 0)
213     error->all(FLERR,"Potential energy ID for fix neb does not exist");
214   pe = modify->compute[icompute];
215 
216   // turn off climbing mode, NEB command turns it on after init()
217 
218   rclimber = -1;
219 
220   // nebatoms = # of atoms in fix group = atoms with inter-replica forces
221 
222   bigint count = group->count(igroup);
223   if (count > MAXSMALLINT) error->all(FLERR,"Too many active NEB atoms");
224   nebatoms = count;
225 
226   // comm mode for inter-replica exchange of coords
227 
228   if (nreplica == nprocs_universe &&
229       nebatoms == atom->natoms && atom->sortfreq == 0)
230     cmode = SINGLE_PROC_DIRECT;
231   else if (nreplica == nprocs_universe) cmode = SINGLE_PROC_MAP;
232   else cmode = MULTI_PROC;
233 
234   // ntotal = total # of atoms in system, NEB atoms or not
235 
236   if (atom->natoms > MAXSMALLINT) error->all(FLERR,"Too many atoms for NEB");
237   ntotal = atom->natoms;
238 
239   if (atom->nmax > maxlocal) reallocate();
240 
241   if ((cmode == MULTI_PROC) && (counts == nullptr)) {
242     memory->create(xsendall,ntotal,3,"neb:xsendall");
243     memory->create(xrecvall,ntotal,3,"neb:xrecvall");
244     memory->create(fsendall,ntotal,3,"neb:fsendall");
245     memory->create(frecvall,ntotal,3,"neb:frecvall");
246     memory->create(tagsendall,ntotal,"neb:tagsendall");
247     memory->create(tagrecvall,ntotal,"neb:tagrecvall");
248     memory->create(counts,nprocs,"neb:counts");
249     memory->create(displacements,nprocs,"neb:displacements");
250   }
251 }
252 
253 /* ---------------------------------------------------------------------- */
254 
min_setup(int vflag)255 void FixNEB::min_setup(int vflag)
256 {
257   min_post_force(vflag);
258 
259   // trigger potential energy computation on next timestep
260 
261   pe->addstep(update->ntimestep+1);
262 }
263 
264 /* ---------------------------------------------------------------------- */
265 
min_post_force(int)266 void FixNEB::min_post_force(int /*vflag*/)
267 {
268   double vprev,vnext;
269   double delxp,delyp,delzp,delxn,delyn,delzn;
270   double vIni=0.0;
271 
272   vprev = vnext = veng = pe->compute_scalar();
273 
274   if (ireplica < nreplica-1 && me == 0)
275     MPI_Send(&veng,1,MPI_DOUBLE,procnext,0,uworld);
276   if (ireplica > 0 && me == 0)
277     MPI_Recv(&vprev,1,MPI_DOUBLE,procprev,0,uworld,MPI_STATUS_IGNORE);
278 
279   if (ireplica > 0 && me == 0)
280     MPI_Send(&veng,1,MPI_DOUBLE,procprev,0,uworld);
281   if (ireplica < nreplica-1 && me == 0)
282     MPI_Recv(&vnext,1,MPI_DOUBLE,procnext,0,uworld,MPI_STATUS_IGNORE);
283 
284   if (cmode == MULTI_PROC) {
285     MPI_Bcast(&vprev,1,MPI_DOUBLE,0,world);
286     MPI_Bcast(&vnext,1,MPI_DOUBLE,0,world);
287   }
288 
289   if (FreeEndFinal && ireplica == nreplica-1 && (update->ntimestep == 0)) EFinalIni = veng;
290 
291   if (ireplica == 0) vIni=veng;
292 
293   if (FreeEndFinalWithRespToEIni) {
294     if (cmode == SINGLE_PROC_DIRECT || cmode == SINGLE_PROC_MAP) {
295       int procFirst;
296       procFirst=universe->root_proc[0];
297       MPI_Bcast(&vIni,1,MPI_DOUBLE,procFirst,uworld);
298     } else {
299       if (me == 0)
300         MPI_Bcast(&vIni,1,MPI_DOUBLE,0,rootworld);
301 
302       MPI_Bcast(&vIni,1,MPI_DOUBLE,0,world);
303     }
304   }
305 
306   if (FreeEndIni && ireplica == 0 && (update->ntimestep == 0)) EIniIni = veng;
307   /*  if (FreeEndIni && ireplica == 0) {
308     //    if (me == 0 )
309       if (update->ntimestep == 0) {
310         EIniIni = veng;
311         //      if (cmode == MULTI_PROC)
312         // MPI_Bcast(&EIniIni,1,MPI_DOUBLE,0,world);
313       }
314       }*/
315 
316   // communicate atoms to/from adjacent replicas to fill xprev,xnext
317 
318   inter_replica_comm();
319 
320   // trigger potential energy computation on next timestep
321 
322   pe->addstep(update->ntimestep+1);
323 
324   double **x = atom->x;
325   int *mask = atom->mask;
326   double dot = 0.0;
327   double prefactor = 0.0;
328 
329   double **f = atom->f;
330   int nlocal = atom->nlocal;
331 
332   //calculating separation between images
333 
334   plen = 0.0;
335   nlen = 0.0;
336   double tlen = 0.0;
337   double gradnextlen = 0.0;
338 
339   dotgrad = gradlen = dotpath = dottangrad = 0.0;
340 
341   if (ireplica == nreplica-1) {
342 
343     for (int i = 0; i < nlocal; i++)
344       if (mask[i] & groupbit) {
345         delxp = x[i][0] - xprev[i][0];
346         delyp = x[i][1] - xprev[i][1];
347         delzp = x[i][2] - xprev[i][2];
348         domain->minimum_image(delxp,delyp,delzp);
349         plen += delxp*delxp + delyp*delyp + delzp*delzp;
350         dottangrad += delxp* f[i][0]+ delyp*f[i][1]+delzp*f[i][2];
351         gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
352         if (FreeEndFinal||FreeEndFinalWithRespToEIni) {
353           tangent[i][0]=delxp;
354           tangent[i][1]=delyp;
355           tangent[i][2]=delzp;
356           tlen += tangent[i][0]*tangent[i][0] +
357             tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
358           dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] +
359             f[i][2]*tangent[i][2];
360         }
361       }
362 
363   } else if (ireplica == 0) {
364     for (int i = 0; i < nlocal; i++)
365       if (mask[i] & groupbit) {
366         delxn = xnext[i][0] - x[i][0];
367         delyn = xnext[i][1] - x[i][1];
368         delzn = xnext[i][2] - x[i][2];
369         domain->minimum_image(delxn,delyn,delzn);
370         nlen += delxn*delxn + delyn*delyn + delzn*delzn;
371         gradnextlen += fnext[i][0]*fnext[i][0]
372           + fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2];
373         dotgrad += f[i][0]*fnext[i][0]
374           + f[i][1]*fnext[i][1] + f[i][2]*fnext[i][2];
375         dottangrad += delxn*f[i][0]+ delyn*f[i][1] + delzn*f[i][2];
376         gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
377         if (FreeEndIni) {
378           tangent[i][0]=delxn;
379           tangent[i][1]=delyn;
380           tangent[i][2]=delzn;
381           tlen += tangent[i][0]*tangent[i][0] +
382             tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
383           dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] +
384             f[i][2]*tangent[i][2];
385         }
386       }
387   } else {
388 
389     // not the first or last replica
390 
391     double vmax = MAX(fabs(vnext-veng),fabs(vprev-veng));
392     double vmin = MIN(fabs(vnext-veng),fabs(vprev-veng));
393 
394 
395     for (int i = 0; i < nlocal; i++)
396       if (mask[i] & groupbit) {
397         delxp = x[i][0] - xprev[i][0];
398         delyp = x[i][1] - xprev[i][1];
399         delzp = x[i][2] - xprev[i][2];
400         domain->minimum_image(delxp,delyp,delzp);
401         plen += delxp*delxp + delyp*delyp + delzp*delzp;
402 
403         delxn = xnext[i][0] - x[i][0];
404         delyn = xnext[i][1] - x[i][1];
405         delzn = xnext[i][2] - x[i][2];
406         domain->minimum_image(delxn,delyn,delzn);
407 
408         if (vnext > veng && veng > vprev) {
409           tangent[i][0] = delxn;
410           tangent[i][1] = delyn;
411           tangent[i][2] = delzn;
412         } else if (vnext < veng && veng < vprev) {
413           tangent[i][0] = delxp;
414           tangent[i][1] = delyp;
415           tangent[i][2] = delzp;
416         } else {
417           if (vnext > vprev) {
418             tangent[i][0] = vmax*delxn + vmin*delxp;
419             tangent[i][1] = vmax*delyn + vmin*delyp;
420             tangent[i][2] = vmax*delzn + vmin*delzp;
421           } else if (vnext < vprev) {
422             tangent[i][0] = vmin*delxn + vmax*delxp;
423             tangent[i][1] = vmin*delyn + vmax*delyp;
424             tangent[i][2] = vmin*delzn + vmax*delzp;
425           } else { // vnext == vprev, e.g. for potentials that do not compute an energy
426             tangent[i][0] = delxn + delxp;
427             tangent[i][1] = delyn + delyp;
428             tangent[i][2] = delzn + delzp;
429           }
430         }
431 
432         nlen += delxn*delxn + delyn*delyn + delzn*delzn;
433         tlen += tangent[i][0]*tangent[i][0] +
434           tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2];
435         gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2];
436         dotpath += delxp*delxn + delyp*delyn + delzp*delzn;
437         dottangrad += tangent[i][0]*f[i][0] +
438           tangent[i][1]*f[i][1] + tangent[i][2]*f[i][2];
439         gradnextlen += fnext[i][0]*fnext[i][0] +
440           fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2];
441         dotgrad += f[i][0]*fnext[i][0] + f[i][1]*fnext[i][1] +
442           f[i][2]*fnext[i][2];
443 
444         springF[i][0] = kspringPerp*(delxn-delxp);
445         springF[i][1] = kspringPerp*(delyn-delyp);
446         springF[i][2] = kspringPerp*(delzn-delzp);
447       }
448   }
449 
450   double bufin[BUFSIZE], bufout[BUFSIZE];
451   bufin[0] = nlen;
452   bufin[1] = plen;
453   bufin[2] = tlen;
454   bufin[3] = gradlen;
455   bufin[4] = gradnextlen;
456   bufin[5] = dotpath;
457   bufin[6] = dottangrad;
458   bufin[7] = dotgrad;
459   MPI_Allreduce(bufin,bufout,BUFSIZE,MPI_DOUBLE,MPI_SUM,world);
460   nlen = sqrt(bufout[0]);
461   plen = sqrt(bufout[1]);
462   tlen = sqrt(bufout[2]);
463   gradlen = sqrt(bufout[3]);
464   gradnextlen = sqrt(bufout[4]);
465   dotpath = bufout[5];
466   dottangrad = bufout[6];
467   dotgrad = bufout[7];
468 
469   // normalize tangent vector
470 
471   if (tlen > 0.0) {
472     double tleninv = 1.0/tlen;
473     for (int i = 0; i < nlocal; i++)
474       if (mask[i] & groupbit) {
475         tangent[i][0] *= tleninv;
476         tangent[i][1] *= tleninv;
477         tangent[i][2] *= tleninv;
478       }
479   }
480 
481   // first or last replica has no change to forces, just return
482 
483   if (ireplica > 0 && ireplica < nreplica-1)
484     dottangrad = dottangrad/(tlen*gradlen);
485   if (ireplica == 0)
486     dottangrad = dottangrad/(nlen*gradlen);
487   if (ireplica == nreplica-1)
488     dottangrad = dottangrad/(plen*gradlen);
489   if (ireplica < nreplica-1)
490     dotgrad = dotgrad /(gradlen*gradnextlen);
491 
492   if (FreeEndIni && ireplica == 0) {
493     if (tlen > 0.0) {
494       double dotall;
495       MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
496       dot=dotall/tlen;
497 
498       if (dot<0) prefactor = -dot - kspringIni*(veng-EIniIni);
499       else prefactor = -dot + kspringIni*(veng-EIniIni);
500 
501       for (int i = 0; i < nlocal; i++)
502         if (mask[i] & groupbit) {
503           f[i][0] += prefactor *tangent[i][0];
504           f[i][1] += prefactor *tangent[i][1];
505           f[i][2] += prefactor *tangent[i][2];
506         }
507     }
508   }
509 
510   if (FreeEndFinal && ireplica == nreplica -1) {
511     if (tlen > 0.0) {
512       double dotall;
513       MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
514       dot=dotall/tlen;
515 
516       if (veng<EFinalIni) {
517         if (dot<0) prefactor = -dot - kspringFinal*(veng-EFinalIni);
518         else prefactor = -dot + kspringFinal*(veng-EFinalIni);
519       }
520       for (int i = 0; i < nlocal; i++)
521         if (mask[i] & groupbit) {
522           f[i][0] += prefactor *tangent[i][0];
523           f[i][1] += prefactor *tangent[i][1];
524           f[i][2] += prefactor *tangent[i][2];
525         }
526     }
527   }
528 
529   if (FreeEndFinalWithRespToEIni&&ireplica == nreplica -1) {
530     if (tlen > 0.0) {
531       double dotall;
532       MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
533       dot=dotall/tlen;
534       if (veng<vIni) {
535         if (dot<0) prefactor = -dot - kspringFinal*(veng-vIni);
536         else prefactor = -dot + kspringFinal*(veng-vIni);
537       }
538       for (int i = 0; i < nlocal; i++)
539         if (mask[i] & groupbit) {
540           f[i][0] += prefactor *tangent[i][0];
541           f[i][1] += prefactor *tangent[i][1];
542           f[i][2] += prefactor *tangent[i][2];
543         }
544     }
545   }
546 
547   double lentot = 0;
548   double meanDist,idealPos,lenuntilIm,lenuntilClimber;
549   lenuntilClimber=0;
550   if (NEBLongRange) {
551     if (cmode == SINGLE_PROC_DIRECT || cmode == SINGLE_PROC_MAP) {
552       MPI_Allgather(&nlen,1,MPI_DOUBLE,&nlenall[0],1,MPI_DOUBLE,uworld);
553     } else {
554       if (me == 0)
555         MPI_Allgather(&nlen,1,MPI_DOUBLE,&nlenall[0],1,MPI_DOUBLE,rootworld);
556       MPI_Bcast(nlenall,nreplica,MPI_DOUBLE,0,world);
557     }
558 
559     lenuntilIm = 0;
560     for (int i = 0; i < ireplica; i++)
561       lenuntilIm += nlenall[i];
562 
563     for (int i = 0; i < nreplica; i++)
564       lentot += nlenall[i];
565 
566     meanDist = lentot/(nreplica -1);
567 
568     if (rclimber>0) {
569       for (int i = 0; i < rclimber; i++)
570         lenuntilClimber += nlenall[i];
571       double meanDistBeforeClimber = lenuntilClimber/rclimber;
572       double meanDistAfterClimber =
573         (lentot-lenuntilClimber)/(nreplica-rclimber-1);
574       if (ireplica<rclimber)
575         idealPos = ireplica * meanDistBeforeClimber;
576       else
577         idealPos = lenuntilClimber+ (ireplica-rclimber)*meanDistAfterClimber;
578     } else idealPos = ireplica * meanDist;
579   }
580 
581   if (ireplica == 0 || ireplica == nreplica-1) return ;
582 
583   double AngularContr;
584   dotpath = dotpath/(plen*nlen);
585   AngularContr = 0.5 *(1+cos(MY_PI * dotpath));
586 
587   double dotSpringTangent;
588   dotSpringTangent=0;
589 
590   for (int i = 0; i < nlocal; i++) {
591     if (mask[i] & groupbit) {
592       dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] +
593         f[i][2]*tangent[i][2];
594       dotSpringTangent += springF[i][0]*tangent[i][0] +
595         springF[i][1]*tangent[i][1] + springF[i][2]*tangent[i][2];}
596   }
597 
598   double dotSpringTangentall;
599   MPI_Allreduce(&dotSpringTangent,&dotSpringTangentall,1,
600                 MPI_DOUBLE,MPI_SUM,world);
601   dotSpringTangent=dotSpringTangentall;
602   double dotall;
603   MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
604   dot=dotall;
605 
606   if (ireplica == rclimber) prefactor = -2.0*dot;
607   else {
608     if (NEBLongRange) {
609       prefactor = -dot - kspring*(lenuntilIm-idealPos)/(2*meanDist);
610     } else if (StandardNEB) {
611       prefactor = -dot + kspring*(nlen-plen);
612     }
613 
614     if (FinalAndInterWithRespToEIni&& veng<vIni) {
615       for (int i = 0; i < nlocal; i++)
616         if (mask[i] & groupbit) {
617           f[i][0] = 0;
618           f[i][1] = 0;
619           f[i][2] = 0;
620         }
621       prefactor =  kspring*(nlen-plen);
622       AngularContr=0;
623     }
624   }
625 
626   for (int i = 0; i < nlocal; i++)
627     if (mask[i] & groupbit) {
628       f[i][0] += prefactor*tangent[i][0] +
629         AngularContr*(springF[i][0] - dotSpringTangent*tangent[i][0]);
630       f[i][1] += prefactor*tangent[i][1] +
631         AngularContr*(springF[i][1] - dotSpringTangent*tangent[i][1]);
632       f[i][2] += prefactor*tangent[i][2] +
633         AngularContr*(springF[i][2] - dotSpringTangent*tangent[i][2]);
634     }
635 }
636 
637 /* ----------------------------------------------------------------------
638    send/recv NEB atoms to/from adjacent replicas
639    received atoms matching my local atoms are stored in xprev,xnext
640    replicas 0 and N-1 send but do not receive any atoms
641 ------------------------------------------------------------------------- */
642 
643 
inter_replica_comm()644 void FixNEB::inter_replica_comm()
645 {
646   int i,m;
647   MPI_Request request;
648   MPI_Request requests[2];
649   MPI_Status statuses[2];
650 
651   // reallocate memory if necessary
652 
653   if (atom->nmax > maxlocal) reallocate();
654 
655   double **x = atom->x;
656   double **f = atom->f;
657   tagint *tag = atom->tag;
658   int *mask = atom->mask;
659   int nlocal = atom->nlocal;
660 
661   // -----------------------------------------------------
662   // 3 cases: two for single proc per replica
663   //          one for multiple procs per replica
664   // -----------------------------------------------------
665 
666   // single proc per replica
667   // all atoms are NEB atoms and no atom sorting
668   // direct comm of x -> xprev and x -> xnext
669 
670   if (cmode == SINGLE_PROC_DIRECT) {
671     if (ireplica > 0)
672       MPI_Irecv(xprev[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld,&request);
673     if (ireplica < nreplica-1)
674       MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld);
675     if (ireplica > 0) MPI_Wait(&request,MPI_STATUS_IGNORE);
676     if (ireplica < nreplica-1)
677       MPI_Irecv(xnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request);
678     if (ireplica > 0)
679       MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld);
680     if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE);
681 
682     if (ireplica < nreplica-1)
683       MPI_Irecv(fnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request);
684     if (ireplica > 0)
685       MPI_Send(f[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld);
686     if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE);
687 
688     return;
689   }
690 
691   // single proc per replica
692   // but only some atoms are NEB atoms or atom sorting is enabled
693   // send atom IDs and coords of only NEB atoms to prev/next proc
694   // recv procs use atom->map() to match received coords to owned atoms
695 
696   if (cmode == SINGLE_PROC_MAP) {
697     m = 0;
698     for (i = 0; i < nlocal; i++)
699       if (mask[i] & groupbit) {
700         tagsend[m] = tag[i];
701         xsend[m][0] = x[i][0];
702         xsend[m][1] = x[i][1];
703         xsend[m][2] = x[i][2];
704         fsend[m][0] = f[i][0];
705         fsend[m][1] = f[i][1];
706         fsend[m][2] = f[i][2];
707         m++;
708       }
709 
710     if (ireplica > 0) {
711       MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]);
712       MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld,&requests[1]);
713     }
714     if (ireplica < nreplica-1) {
715       MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld);
716       MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld);
717     }
718 
719     if (ireplica > 0) {
720       MPI_Waitall(2,requests,statuses);
721       for (i = 0; i < nebatoms; i++) {
722         m = atom->map(tagrecv[i]);
723         xprev[m][0] = xrecv[i][0];
724         xprev[m][1] = xrecv[i][1];
725         xprev[m][2] = xrecv[i][2];
726       }
727     }
728     if (ireplica < nreplica-1) {
729       MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
730       MPI_Irecv(frecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
731       MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld,&requests[1]);
732     }
733     if (ireplica > 0) {
734       MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
735       MPI_Send(fsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
736       MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld);
737     }
738 
739     if (ireplica < nreplica-1) {
740       MPI_Waitall(2,requests,statuses);
741       for (i = 0; i < nebatoms; i++) {
742         m = atom->map(tagrecv[i]);
743         xnext[m][0] = xrecv[i][0];
744         xnext[m][1] = xrecv[i][1];
745         xnext[m][2] = xrecv[i][2];
746         fnext[m][0] = frecv[i][0];
747         fnext[m][1] = frecv[i][1];
748         fnext[m][2] = frecv[i][2];
749       }
750     }
751 
752     return;
753   }
754 
755   // multiple procs per replica
756   // MPI_Gather all coords and atom IDs to root proc of each replica
757   // send to root of adjacent replicas
758   // bcast within each replica
759   // each proc extracts info for atoms it owns via atom->map()
760 
761   m = 0;
762   for (i = 0; i < nlocal; i++)
763     if (mask[i] & groupbit) {
764       tagsend[m] = tag[i];
765       xsend[m][0] = x[i][0];
766       xsend[m][1] = x[i][1];
767       xsend[m][2] = x[i][2];
768       fsend[m][0] = f[i][0];
769       fsend[m][1] = f[i][1];
770       fsend[m][2] = f[i][2];
771       m++;
772     }
773 
774   MPI_Gather(&m,1,MPI_INT,counts,1,MPI_INT,0,world);
775   displacements[0] = 0;
776   for (i = 0; i < nprocs-1; i++)
777     displacements[i+1] = displacements[i] + counts[i];
778   MPI_Gatherv(tagsend,m,MPI_LMP_TAGINT,
779               tagsendall,counts,displacements,MPI_LMP_TAGINT,0,world);
780   for (i = 0; i < nprocs; i++) counts[i] *= 3;
781   for (i = 0; i < nprocs-1; i++)
782     displacements[i+1] = displacements[i] + counts[i];
783   if (xsend) {
784     MPI_Gatherv(xsend[0],3*m,MPI_DOUBLE,
785                 xsendall[0],counts,displacements,MPI_DOUBLE,0,world);
786     MPI_Gatherv(fsend[0],3*m,MPI_DOUBLE,
787                 fsendall[0],counts,displacements,MPI_DOUBLE,0,world);
788   } else {
789     MPI_Gatherv(nullptr,3*m,MPI_DOUBLE,
790                 xsendall[0],counts,displacements,MPI_DOUBLE,0,world);
791     MPI_Gatherv(nullptr,3*m,MPI_DOUBLE,
792                 fsendall[0],counts,displacements,MPI_DOUBLE,0,world);
793   }
794 
795   if (ireplica > 0 && me == 0) {
796     MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]);
797     MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld,
798               &requests[1]);
799   }
800   if (ireplica < nreplica-1 && me == 0) {
801     MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld);
802     MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld);
803   }
804 
805   if (ireplica > 0) {
806     if (me == 0) MPI_Waitall(2,requests,statuses);
807 
808     MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world);
809     MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world);
810 
811     for (i = 0; i < nebatoms; i++) {
812       m = atom->map(tagrecvall[i]);
813       if (m < 0 || m >= nlocal) continue;
814       xprev[m][0] = xrecvall[i][0];
815       xprev[m][1] = xrecvall[i][1];
816       xprev[m][2] = xrecvall[i][2];
817     }
818   }
819 
820   if (ireplica < nreplica-1 && me == 0) {
821     MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
822     MPI_Irecv(frecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]);
823     MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld,
824               &requests[1]);
825   }
826   if (ireplica > 0 && me == 0) {
827     MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
828     MPI_Send(fsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld);
829     MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld);
830   }
831 
832   if (ireplica < nreplica-1) {
833     if (me == 0) MPI_Waitall(2,requests,statuses);
834 
835     MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world);
836     MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world);
837     MPI_Bcast(frecvall[0],3*nebatoms,MPI_DOUBLE,0,world);
838 
839     for (i = 0; i < nebatoms; i++) {
840       m = atom->map(tagrecvall[i]);
841       if (m < 0 || m >= nlocal) continue;
842       xnext[m][0] = xrecvall[i][0];
843       xnext[m][1] = xrecvall[i][1];
844       xnext[m][2] = xrecvall[i][2];
845       fnext[m][0] = frecvall[i][0];
846       fnext[m][1] = frecvall[i][1];
847       fnext[m][2] = frecvall[i][2];
848     }
849   }
850 }
851 
852 /* ----------------------------------------------------------------------
853    reallocate xprev,xnext,tangent arrays if necessary
854    reallocate communication arrays if necessary
855 ------------------------------------------------------------------------- */
856 
reallocate()857 void FixNEB::reallocate()
858 {
859   maxlocal = atom->nmax;
860 
861   memory->destroy(xprev);
862   memory->destroy(xnext);
863   memory->destroy(tangent);
864   memory->destroy(fnext);
865   memory->destroy(springF);
866 
867   memory->create(xprev,maxlocal,3,"neb:xprev");
868   memory->create(xnext,maxlocal,3,"neb:xnext");
869   memory->create(tangent,maxlocal,3,"neb:tangent");
870   memory->create(fnext,maxlocal,3,"neb:fnext");
871   memory->create(springF,maxlocal,3,"neb:springF");
872 
873   if (cmode != SINGLE_PROC_DIRECT) {
874     memory->destroy(xsend);
875     memory->destroy(fsend);
876     memory->destroy(xrecv);
877     memory->destroy(frecv);
878     memory->destroy(tagsend);
879     memory->destroy(tagrecv);
880     memory->create(xsend,maxlocal,3,"neb:xsend");
881     memory->create(fsend,maxlocal,3,"neb:fsend");
882     memory->create(xrecv,maxlocal,3,"neb:xrecv");
883     memory->create(frecv,maxlocal,3,"neb:frecv");
884     memory->create(tagsend,maxlocal,"neb:tagsend");
885     memory->create(tagrecv,maxlocal,"neb:tagrecv");
886   }
887 
888   if (NEBLongRange) {
889     memory->destroy(nlenall);
890     memory->create(nlenall,nreplica,"neb:nlenall");
891   }
892 }
893