1 // clang-format off
2 /* ----------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing authors: Paul Crozier, Stan Moore, Stephen Bond, (all SNL)
17 ------------------------------------------------------------------------- */
18 
19 #include "msm.h"
20 
21 #include "atom.h"
22 #include "comm.h"
23 #include "domain.h"
24 #include "error.h"
25 #include "force.h"
26 #include "gridcomm.h"
27 #include "math_const.h"
28 #include "memory.h"
29 #include "neighbor.h"
30 #include "pair.h"
31 
32 #include <cstring>
33 #include <cmath>
34 
35 using namespace LAMMPS_NS;
36 using namespace MathConst;
37 
38 #define MAX_LEVELS 10
39 #define OFFSET 16384
40 #define SMALL 0.00001
41 
42 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
43 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
44 
45 /* ---------------------------------------------------------------------- */
46 
MSM(LAMMPS * lmp)47 MSM::MSM(LAMMPS *lmp)
48   : KSpace(lmp),
49     factors(nullptr), delxinv(nullptr), delyinv(nullptr), delzinv(nullptr), nx_msm(nullptr),
50     ny_msm(nullptr), nz_msm(nullptr), nxlo_in(nullptr), nylo_in(nullptr), nzlo_in(nullptr),
51     nxhi_in(nullptr), nyhi_in(nullptr), nzhi_in(nullptr), nxlo_out(nullptr), nylo_out(nullptr),
52     nzlo_out(nullptr), nxhi_out(nullptr), nyhi_out(nullptr), nzhi_out(nullptr), ngrid(nullptr),
53     active_flag(nullptr), alpha(nullptr), betax(nullptr), betay(nullptr), betaz(nullptr),
54     peratom_allocate_flag(0),levels(0),world_levels(nullptr),qgrid(nullptr),egrid(nullptr),
55     v0grid(nullptr), v1grid(nullptr),v2grid(nullptr),v3grid(nullptr),v4grid(nullptr),v5grid(nullptr),
56     g_direct(nullptr),v0_direct(nullptr),v1_direct(nullptr),v2_direct(nullptr),v3_direct(nullptr),
57     v4_direct(nullptr),v5_direct(nullptr),g_direct_top(nullptr),v0_direct_top(nullptr),
58     v1_direct_top(nullptr),v2_direct_top(nullptr),v3_direct_top(nullptr),v4_direct_top(nullptr),
59     v5_direct_top(nullptr),phi1d(nullptr),dphi1d(nullptr),procneigh_levels(nullptr),gcall(nullptr),
60     gc(nullptr),gcall_buf1(nullptr),gcall_buf2(nullptr),gc_buf1(nullptr),gc_buf2(nullptr),
61     ngc_buf1(nullptr),ngc_buf2(nullptr),part2grid(nullptr),boxlo(nullptr)
62 {
63   msmflag = 1;
64 
65   nfactors = 1;
66   factors = new int[nfactors];
67   factors[0] = 2;
68 
69   MPI_Comm_rank(world,&me);
70 
71   phi1d = dphi1d = nullptr;
72 
73   nmax = 0;
74   part2grid = nullptr;
75 
76   g_direct = nullptr;
77   g_direct_top = nullptr;
78 
79   v0_direct = v1_direct = v2_direct = nullptr;
80   v3_direct = v4_direct = v5_direct = nullptr;
81 
82   v0_direct_top = v1_direct_top = v2_direct_top = nullptr;
83   v3_direct_top = v4_direct_top = v5_direct_top = nullptr;
84 
85   ngrid = nullptr;
86 
87   alpha = betax = betay = betaz = nullptr;
88   nx_msm = ny_msm = nz_msm = nullptr;
89   nxlo_in = nylo_in = nzlo_in = nullptr;
90   nxhi_in = nyhi_in = nzhi_in = nullptr;
91   nxlo_out = nylo_out = nzlo_out = nullptr;
92   nxhi_out = nyhi_out = nzhi_out = nullptr;
93   delxinv = delyinv = delzinv = nullptr;
94   qgrid = nullptr;
95   egrid = nullptr;
96   v0grid = v1grid = v2grid = v3grid = v4grid = v5grid = nullptr;
97 
98   peratom_allocate_flag = 0;
99   scalar_pressure_flag = 1;
100   warn_nonneutral = 0;
101 
102   order = 10;
103 }
104 
105 /* ---------------------------------------------------------------------- */
106 
settings(int narg,char ** arg)107 void MSM::settings(int narg, char **arg)
108 {
109   if (narg < 1) error->all(FLERR,"Illegal kspace_style msm command");
110   accuracy_relative = fabs(utils::numeric(FLERR,arg[0],false,lmp));
111 }
112 
113 /* ----------------------------------------------------------------------
114    free all memory
115 ------------------------------------------------------------------------- */
116 
~MSM()117 MSM::~MSM()
118 {
119   delete [] factors;
120   deallocate();
121   if (peratom_allocate_flag) deallocate_peratom();
122   deallocate_levels();
123   memory->destroy(part2grid);
124   memory->destroy(g_direct);
125   memory->destroy(g_direct_top);
126   memory->destroy(v0_direct);
127   memory->destroy(v1_direct);
128   memory->destroy(v2_direct);
129   memory->destroy(v3_direct);
130   memory->destroy(v4_direct);
131   memory->destroy(v5_direct);
132   memory->destroy(v0_direct_top);
133   memory->destroy(v1_direct_top);
134   memory->destroy(v2_direct_top);
135   memory->destroy(v3_direct_top);
136   memory->destroy(v4_direct_top);
137   memory->destroy(v5_direct_top);
138 }
139 
140 /* ----------------------------------------------------------------------
141    called once before run
142 ------------------------------------------------------------------------- */
143 
init()144 void MSM::init()
145 {
146   if (me == 0) utils::logmesg(lmp,"MSM initialization ...\n");
147 
148   // error check
149 
150   triclinic_check();
151   if (domain->dimension == 2)
152     error->all(FLERR,"Cannot (yet) use MSM with 2d simulation");
153   if (comm->style != 0)
154     error->universe_all(FLERR,"MSM can only currently be used with "
155                         "comm_style brick");
156 
157   if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
158 
159   if ((slabflag == 1) && (me == 0))
160     error->warning(FLERR,"Slab correction not needed for MSM");
161 
162   if ((order < 4) || (order > 10) || (order%2 != 0))
163     error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
164 
165   if (sizeof(FFT_SCALAR) != 8)
166     error->all(FLERR,"Cannot (yet) use single precision with MSM "
167                "(remove -DFFT_SINGLE from Makefile and re-compile)");
168 
169   // compute two charge force
170 
171   two_charge();
172 
173   // extract short-range Coulombic cutoff from pair style
174 
175   triclinic = domain->triclinic;
176   pair_check();
177 
178   int itmp;
179   double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
180   if (p_cutoff == nullptr)
181     error->all(FLERR,"KSpace style is incompatible with Pair style");
182   cutoff = *p_cutoff;
183 
184   // compute qsum & qsqsum and error if not charge-neutral
185 
186   scale = 1.0;
187   qqrd2e = force->qqrd2e;
188   qsum_qsq();
189   natoms_original = atom->natoms;
190 
191   // set accuracy (force units) from accuracy_relative or accuracy_absolute
192 
193   if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
194   else accuracy = accuracy_relative * two_charge_force;
195 
196   // setup MSM grid resolution
197 
198   set_grid_global();
199   setup();
200 
201   double estimated_error = estimate_total_error();
202 
203   // output grid stats
204 
205   int ngrid_max;
206   MPI_Allreduce(&ngrid[0],&ngrid_max,1,MPI_INT,MPI_MAX,world);
207 
208   if (me == 0) {
209     std::string mesg = fmt::format("  3d grid size/proc = {}\n", ngrid_max);
210     mesg += fmt::format("  estimated absolute RMS force accuracy = {:.8}\n",
211                         estimated_error);
212     mesg += fmt::format("  estimated relative force accuracy = {:.8}\n",
213                         estimated_error/two_charge_force);
214     mesg += fmt::format("  grid = {} {} {}\n",nx_msm[0],ny_msm[0],nz_msm[0]);
215     mesg += fmt::format("  order = {}\n",order);
216     utils::logmesg(lmp,mesg);
217   }
218 }
219 
220 /* ----------------------------------------------------------------------
221    estimate 1d grid RMS force error for MSM
222 ------------------------------------------------------------------------- */
223 
estimate_1d_error(double h,double prd)224 double MSM::estimate_1d_error(double h, double prd)
225 {
226   double a = cutoff;
227   int p = order - 1;
228 
229   double Mp,cprime,error_scaling;
230   Mp = cprime = error_scaling = 1;
231   // Mp values from Table 5.1 of Hardy's thesis
232   // cprime values from equation 4.17 of Hardy's thesis
233   // error scaling from empirical fitting to convert to rms force errors
234   if (p == 3) {
235     Mp = 9;
236     cprime = 1.0/6.0;
237     error_scaling = 0.39189561;
238   } else if (p == 5) {
239     Mp = 825;
240     cprime = 1.0/30.0;
241     error_scaling = 0.150829428;
242   } else if (p == 7) {
243     Mp = 130095;
244     cprime = 1.0/140.0;
245     error_scaling = 0.049632967;
246   } else if (p == 9) {
247     Mp = 34096545;
248     cprime = 1.0/630.0;
249     error_scaling = 0.013520855;
250   } else {
251     error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
252   }
253 
254   // equation 4.1 from Hardy's thesis
255   C_p = 4.0*cprime*Mp/3.0;
256 
257   // use empirical parameters to convert to rms force errors
258   C_p *= error_scaling;
259 
260   // equation 3.197 from Hardy's thesis
261   double error_1d = C_p*pow(h,(p-1))/pow(a,(p+1));
262 
263   // include dependency of error on other terms
264   error_1d *= q2*a/(prd*sqrt(double(atom->natoms)));
265 
266   return error_1d;
267 }
268 
269 /* ----------------------------------------------------------------------
270    estimate 3d grid RMS force error
271 ------------------------------------------------------------------------- */
272 
estimate_3d_error()273 double MSM::estimate_3d_error()
274 {
275   double xprd = domain->xprd;
276   double yprd = domain->yprd;
277   double zprd = domain->zprd;
278   double error_x = estimate_1d_error(h_x,xprd);
279   double error_y = estimate_1d_error(h_y,yprd);
280   double error_z = estimate_1d_error(h_z,zprd);
281   double error_3d =
282    sqrt(error_x*error_x + error_y*error_y + error_z*error_z) / sqrt(3.0);
283   return error_3d;
284 }
285 
286 /* ----------------------------------------------------------------------
287    estimate total RMS force error
288 ------------------------------------------------------------------------- */
289 
estimate_total_error()290 double MSM::estimate_total_error()
291 {
292   double xprd = domain->xprd;
293   double yprd = domain->yprd;
294   double zprd = domain->zprd;
295   bigint natoms = atom->natoms;
296 
297   double grid_error = estimate_3d_error();
298   double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
299   double short_range_error = 0.0;
300   double table_error =
301    estimate_table_accuracy(q2_over_sqrt,short_range_error);
302   double estimated_total_error = sqrt(grid_error*grid_error +
303    short_range_error*short_range_error + table_error*table_error);
304 
305   return estimated_total_error;
306 }
307 
308 /* ----------------------------------------------------------------------
309    adjust MSM coeffs, called initially and whenever volume has changed
310 ------------------------------------------------------------------------- */
311 
setup()312 void MSM::setup()
313 {
314   double *prd;
315   double a = cutoff;
316 
317   // volume-dependent factors
318 
319   prd = domain->prd;
320 
321   double xprd = prd[0];
322   double yprd = prd[1];
323   double zprd = prd[2];
324   volume = xprd * yprd * zprd;
325 
326   // loop over grid levels and compute grid spacing
327 
328   for (int n=0; n<levels; n++) {
329     if (triclinic == 0) {
330       delxinv[n] = nx_msm[n]/xprd;
331       delyinv[n] = ny_msm[n]/yprd;
332       delzinv[n] = nz_msm[n]/zprd;
333     } else { // use lamda (0-1) coordinates
334       delxinv[n] = nx_msm[n];
335       delyinv[n] = ny_msm[n];
336       delzinv[n] = nz_msm[n];
337     }
338   }
339 
340   double ax = a;
341   double ay = a;
342   double az = a;
343 
344   // transform the interaction sphere in box coords to an
345   // ellipsoid in lamda (0-1) coords to
346   // get the direct sum interaction limits for a triclinic system
347 
348   if (triclinic) {
349     double tmp[3];
350     kspacebbox(a,&tmp[0]);
351     ax = tmp[0];
352     ay = tmp[1];
353     az = tmp[2];
354   }
355 
356   // direct sum interaction limits
357 
358   nxhi_direct = static_cast<int> (2.0*ax*delxinv[0]);
359   nxlo_direct = -nxhi_direct;
360   nyhi_direct = static_cast<int> (2.0*ay*delyinv[0]);
361   nylo_direct = -nyhi_direct;
362   nzhi_direct = static_cast<int> (2.0*az*delzinv[0]);
363   nzlo_direct = -nzhi_direct;
364 
365   nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1);
366 
367   deallocate();
368   if (peratom_allocate_flag) deallocate_peratom();
369 
370   // compute direct sum interaction weights
371 
372   if (!peratom_allocate_flag) { // Timestep 0
373     get_g_direct();
374     get_virial_direct();
375     if (domain->nonperiodic) {
376       get_g_direct_top(levels-1);
377       get_virial_direct_top(levels-1);
378     }
379   } else {
380     get_g_direct();
381     if (domain->nonperiodic) get_g_direct_top(levels-1);
382     if (vflag_either && !scalar_pressure_flag) {
383       get_virial_direct();
384       if (domain->nonperiodic) get_virial_direct_top(levels-1);
385     }
386   }
387 
388   if (!triclinic)
389     boxlo = domain->boxlo;
390   else
391     boxlo = domain->boxlo_lamda;
392 
393   // ghost grid points depend on direct sum interaction limits,
394   // so need to re-compute local grid
395 
396   set_grid_local();
397 
398   // allocate K-space dependent memory
399   // don't invoke allocate_peratom(), compute() will allocate when needed
400 
401   allocate();
402 }
403 
404 /* ----------------------------------------------------------------------
405    compute the MSM long-range force, energy, virial
406 ------------------------------------------------------------------------- */
407 
compute(int eflag,int vflag)408 void MSM::compute(int eflag, int vflag)
409 {
410   int i,j;
411 
412   // set energy/virial flags
413 
414   ev_init(eflag,vflag);
415 
416   if (scalar_pressure_flag && vflag_either) {
417     if (vflag_atom)
418       error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' to obtain "
419         "per-atom virial with kspace_style MSM");
420 
421     // must switch on global energy computation if not already on
422 
423     if (eflag == 0 || eflag == 2) {
424       eflag++;
425       ev_setup(eflag,vflag);
426     }
427   }
428 
429   // if atom count has changed, update qsum and qsqsum
430 
431   if (atom->natoms != natoms_original) {
432     qsum_qsq();
433     natoms_original = atom->natoms;
434   }
435 
436   // return if there are no charges
437 
438   if (qsqsum == 0.0) return;
439 
440   // invoke allocate_peratom() if needed for first time
441 
442   if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
443 
444   // convert atoms from box to lamda coords
445 
446   if (triclinic)
447     domain->x2lamda(atom->nlocal);
448 
449   // extend size of per-atom arrays if necessary
450 
451   if (atom->nmax > nmax) {
452     memory->destroy(part2grid);
453     nmax = atom->nmax;
454     memory->create(part2grid,nmax,3,"msm:part2grid");
455   }
456 
457   // find grid points for all my particles
458   // map my particle charge onto my local 3d density grid (aninterpolation)
459 
460   particle_map();
461   make_rho();
462 
463   // all procs reverse communicate charge density values from
464   // their ghost grid points
465   // to fully sum contribution in their 3d grid
466 
467   current_level = 0;
468   gcall->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
469                       REVERSE_RHO,gcall_buf1,gcall_buf2,MPI_DOUBLE);
470 
471   // forward communicate charge density values to fill ghost grid points
472   // compute direct sum interaction and then restrict to coarser grid
473 
474   for (int n=0; n<=levels-2; n++) {
475     if (!active_flag[n]) continue;
476     current_level = n;
477     gc[n]->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
478                         FORWARD_RHO,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
479     direct(n);
480     restriction(n);
481   }
482 
483   // compute direct interation for top grid level for non-periodic
484   //   and for second from top grid level for periodic
485 
486   if (active_flag[levels-1]) {
487     if (domain->nonperiodic) {
488       current_level = levels-1;
489       gc[levels-1]->
490         forward_comm(GridComm::KSPACE,this,1,sizeof(double),
491                      FORWARD_RHO,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
492       direct_top(levels-1);
493       gc[levels-1]->
494         reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
495                      REVERSE_AD,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
496       if (vflag_atom)
497         gc[levels-1]->
498           reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
499                        REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
500 
501     } else {
502       // Here using MPI_Allreduce is cheaper than using commgrid
503       grid_swap_forward(levels-1,qgrid[levels-1]);
504       direct(levels-1);
505       grid_swap_reverse(levels-1,egrid[levels-1]);
506       current_level = levels-1;
507       if (vflag_atom)
508         gc[levels-1]->
509           reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
510                        REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
511     }
512   }
513 
514   // prolongate energy/virial from coarser grid to finer grid
515   // reverse communicate from ghost grid points to get full sum
516 
517   for (int n=levels-2; n>=0; n--) {
518     if (!active_flag[n]) continue;
519     prolongation(n);
520 
521     current_level = n;
522     gc[n]->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
523                         REVERSE_AD,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
524 
525     // extra per-atom virial communication
526 
527     if (vflag_atom)
528       gc[n]->reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
529                           REVERSE_AD_PERATOM,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
530   }
531 
532   // all procs communicate E-field values
533   // to fill ghost cells surrounding their 3d bricks
534 
535   current_level = 0;
536   gcall->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
537                       FORWARD_AD,gcall_buf1,gcall_buf2,MPI_DOUBLE);
538 
539   // extra per-atom energy/virial communication
540 
541   if (vflag_atom)
542     gcall->forward_comm(GridComm::KSPACE,this,6,sizeof(double),
543                         FORWARD_AD_PERATOM,gcall_buf1,gcall_buf2,MPI_DOUBLE);
544 
545   // calculate the force on my particles (interpolation)
546 
547   fieldforce();
548 
549   // calculate the per-atom energy/virial for my particles
550 
551   if (evflag_atom) fieldforce_peratom();
552 
553   // sum global energy across procs and add in self-energy term
554 
555   const double qscale = qqrd2e * scale;
556 
557   if (eflag_global) {
558     double energy_all;
559     MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
560     energy = energy_all;
561 
562     double e_self = qsqsum*gamma(0.0)/cutoff;
563     energy -= e_self;
564     energy *= 0.5*qscale;
565   }
566 
567   // total long-range virial
568 
569   if (vflag_global && !scalar_pressure_flag) {
570     double virial_all[6];
571     MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
572     for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
573   }
574 
575   // fast compute of scalar pressure (if requested)
576 
577   if (scalar_pressure_flag && vflag_global)
578     for (i = 0; i < 3; i++) virial[i] = energy/3.0;
579 
580   // per-atom energy/virial
581   // energy includes self-energy correction
582 
583   if (evflag_atom) {
584     double *q = atom->q;
585     int nlocal = atom->nlocal;
586 
587     if (eflag_atom) {
588       for (i = 0; i < nlocal; i++) {
589         eatom[i] -= q[i]*q[i]*gamma(0.0)/cutoff;
590         eatom[i] *= 0.5*qscale;
591       }
592     }
593 
594     if (vflag_atom) {
595       for (i = 0; i < nlocal; i++)
596         for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
597     }
598   }
599 
600   // convert atoms back from lamda to box coords
601 
602   if (triclinic) domain->lamda2x(atom->nlocal);
603 }
604 
605 /* ----------------------------------------------------------------------
606    allocate memory that depends on # of grid points
607 ------------------------------------------------------------------------- */
608 
allocate()609 void MSM::allocate()
610 {
611   // interpolation coeffs
612 
613   order_allocated = order;
614   memory->create2d_offset(phi1d,3,-order,order,"msm:phi1d");
615   memory->create2d_offset(dphi1d,3,-order,order,"msm:dphi1d");
616 
617   // commgrid using all processors for finest grid level
618 
619   gcall = new GridComm(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
620                        nxlo_in[0],nxhi_in[0],nylo_in[0],
621                        nyhi_in[0],nzlo_in[0],nzhi_in[0],
622                        nxlo_out_all,nxhi_out_all,nylo_out_all,
623                        nyhi_out_all,nzlo_out_all,nzhi_out_all,
624                        nxlo_out[0],nxhi_out[0],nylo_out[0],
625                        nyhi_out[0],nzlo_out[0],nzhi_out[0]);
626 
627   gcall->setup(ngcall_buf1,ngcall_buf2);
628   npergrid = 1;
629   memory->create(gcall_buf1,npergrid*ngcall_buf1,"msm:gcall_buf1");
630   memory->create(gcall_buf2,npergrid*ngcall_buf2,"msm:gcall_buf2");
631 
632   // allocate memory for each grid level
633 
634   for (int n=0; n<levels; n++) {
635     memory->create3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n],
636             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid");
637 
638     memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n],
639             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid");
640 
641     // create commgrid object for rho and electric field communication
642 
643     if (active_flag[n]) {
644       delete gc[n];
645       int **procneigh = procneigh_levels[n];
646 
647       gc[n] = new GridComm(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
648                            nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
649                            nzlo_in[n],nzhi_in[n],
650                            nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],
651                            nzlo_out[n],nzhi_out[n],
652                            procneigh[0][0],procneigh[0][1],procneigh[1][0],
653                            procneigh[1][1],procneigh[2][0],procneigh[2][1]);
654 
655       gc[n]->setup(ngc_buf1[n],ngc_buf2[n]);
656       npergrid = 1;
657       memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"msm:gc_buf1");
658       memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"msm:gc_buf2");
659     } else {
660       delete gc[n];
661       memory->destroy(gc_buf1[n]);
662       memory->destroy(gc_buf2[n]);
663       gc[n] = nullptr;
664       gc_buf1[n] = gc_buf2[n] = nullptr;
665     }
666   }
667 }
668 
669 /* ----------------------------------------------------------------------
670    deallocate memory that depends on # of grid points
671 ------------------------------------------------------------------------- */
672 
deallocate()673 void MSM::deallocate()
674 {
675   memory->destroy2d_offset(phi1d,-order_allocated);
676   memory->destroy2d_offset(dphi1d,-order_allocated);
677 
678   if (gcall) delete gcall;
679   memory->destroy(gcall_buf1);
680   memory->destroy(gcall_buf2);
681   gcall = nullptr;
682   gcall_buf1 = gcall_buf2 = nullptr;
683 }
684 
685 /* ----------------------------------------------------------------------
686    allocate per-atom virial memory that depends on # of grid points
687 ------------------------------------------------------------------------- */
688 
allocate_peratom()689 void MSM::allocate_peratom()
690 {
691   peratom_allocate_flag = 1;
692 
693   // create commgrid object for per-atom virial using all processors
694 
695   npergrid = 6;
696   memory->destroy(gcall_buf1);
697   memory->destroy(gcall_buf2);
698   memory->create(gcall_buf1,npergrid*ngcall_buf1,"pppm:gcall_buf1");
699   memory->create(gcall_buf2,npergrid*ngcall_buf2,"pppm:gcall_buf2");
700 
701   // allocate memory for each grid level
702 
703   for (int n=0; n<levels; n++) {
704     memory->create3d_offset(v0grid[n],nzlo_out[n],nzhi_out[n],
705             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v0grid");
706     memory->create3d_offset(v1grid[n],nzlo_out[n],nzhi_out[n],
707             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v1grid");
708     memory->create3d_offset(v2grid[n],nzlo_out[n],nzhi_out[n],
709             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v2grid");
710     memory->create3d_offset(v3grid[n],nzlo_out[n],nzhi_out[n],
711             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v3grid");
712     memory->create3d_offset(v4grid[n],nzlo_out[n],nzhi_out[n],
713             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v4grid");
714     memory->create3d_offset(v5grid[n],nzlo_out[n],nzhi_out[n],
715             nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v5grid");
716 
717     // create commgrid object for per-atom virial
718 
719     if (active_flag[n]) {
720       npergrid = 6;
721       memory->destroy(gc_buf1[n]);
722       memory->destroy(gc_buf2[n]);
723       memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"pppm:gc_buf1");
724       memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"pppm:gc_buf2");
725     }
726   }
727 }
728 
729 /* ----------------------------------------------------------------------
730    deallocate per-atom virial memory that depends on # of grid points
731 ------------------------------------------------------------------------- */
732 
deallocate_peratom()733 void MSM::deallocate_peratom()
734 {
735   peratom_allocate_flag = 0;
736 
737   for (int n=0; n<levels; n++) {
738     if (v0grid[n])
739       memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
740     if (v1grid[n])
741       memory->destroy3d_offset(v1grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
742     if (v2grid[n])
743       memory->destroy3d_offset(v2grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
744     if (v3grid[n])
745       memory->destroy3d_offset(v3grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
746     if (v4grid[n])
747       memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
748     if (v5grid[n])
749       memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
750   }
751 }
752 
753 /* ----------------------------------------------------------------------
754    allocate memory that depends on # of grid levels
755 ------------------------------------------------------------------------- */
756 
allocate_levels()757 void MSM::allocate_levels()
758 {
759   ngrid = new int[levels];
760 
761   gc = new GridComm*[levels];
762   gc_buf1 = new double*[levels];
763   gc_buf2 = new double*[levels];
764   ngc_buf1 = new int[levels];
765   ngc_buf2 = new int[levels];
766 
767   memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
768   world_levels = new MPI_Comm[levels];
769   active_flag = new int[levels];
770 
771   alpha = new int[levels];
772   betax = new int[levels];
773   betay = new int[levels];
774   betaz = new int[levels];
775 
776   nx_msm = new int[levels];
777   ny_msm = new int[levels];
778   nz_msm = new int[levels];
779 
780   nxlo_in = new int[levels];
781   nylo_in = new int[levels];
782   nzlo_in = new int[levels];
783 
784   nxhi_in = new int[levels];
785   nyhi_in = new int[levels];
786   nzhi_in = new int[levels];
787 
788   nxlo_out = new int[levels];
789   nylo_out = new int[levels];
790   nzlo_out = new int[levels];
791 
792   nxhi_out = new int[levels];
793   nyhi_out = new int[levels];
794   nzhi_out = new int[levels];
795 
796   delxinv = new double[levels];
797   delyinv = new double[levels];
798   delzinv = new double[levels];
799 
800   qgrid = new double***[levels];
801   egrid = new double***[levels];
802 
803   v0grid = new double***[levels];
804   v1grid = new double***[levels];
805   v2grid = new double***[levels];
806   v3grid = new double***[levels];
807   v4grid = new double***[levels];
808   v5grid = new double***[levels];
809 
810   for (int n=0; n<levels; n++) {
811     gc[n] = nullptr;
812 
813     gc_buf1[n] = nullptr;
814     gc_buf2[n] = nullptr;
815 
816     world_levels[n] = MPI_COMM_NULL;
817 
818     qgrid[n] = nullptr;
819     egrid[n] = nullptr;
820 
821     v0grid[n] = nullptr;
822     v1grid[n] = nullptr;
823     v2grid[n] = nullptr;
824     v3grid[n] = nullptr;
825     v4grid[n] = nullptr;
826     v5grid[n] = nullptr;
827   }
828 }
829 
830 /* ----------------------------------------------------------------------
831    deallocate memory that depends on # of grid levels
832 ------------------------------------------------------------------------- */
833 
deallocate_levels()834 void MSM::deallocate_levels()
835 {
836   if (world_levels) {
837     for (int n=0; n < levels; ++n) {
838       if (qgrid[n])
839         memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
840 
841       if (egrid[n])
842         memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
843 
844       if (gc) {
845         if (gc[n]) {
846           delete gc[n];
847           memory->destroy(gc_buf1[n]);
848           memory->destroy(gc_buf2[n]);
849           gc[n] = nullptr;
850           gc_buf1[n] = gc_buf2[n] = nullptr;
851         }
852       }
853 
854       if (world_levels[n] != MPI_COMM_NULL) {
855         MPI_Comm_free(&world_levels[n]);
856       }
857     }
858   }
859 
860   delete [] ngrid;
861   ngrid = nullptr;
862 
863   memory->destroy(procneigh_levels);
864   delete [] world_levels;
865   delete [] active_flag;
866 
867   delete [] gc;
868   delete [] gc_buf1;
869   delete [] gc_buf2;
870   delete [] ngc_buf1;
871   delete [] ngc_buf2;
872 
873   delete [] alpha;
874   delete [] betax;
875   delete [] betay;
876   delete [] betaz;
877 
878   delete [] nx_msm;
879   delete [] ny_msm;
880   delete [] nz_msm;
881 
882   delete [] nxlo_in;
883   delete [] nylo_in;
884   delete [] nzlo_in;
885 
886   delete [] nxhi_in;
887   delete [] nyhi_in;
888   delete [] nzhi_in;
889 
890   delete [] nxlo_out;
891   delete [] nylo_out;
892   delete [] nzlo_out;
893 
894   delete [] nxhi_out;
895   delete [] nyhi_out;
896   delete [] nzhi_out;
897 
898   delete [] delxinv;
899   delete [] delyinv;
900   delete [] delzinv;
901 
902   delete [] qgrid;
903   delete [] egrid;
904 
905   delete [] v0grid;
906   delete [] v1grid;
907   delete [] v2grid;
908   delete [] v3grid;
909   delete [] v4grid;
910   delete [] v5grid;
911 
912   world_levels = nullptr;
913   active_flag = nullptr;
914   gc = nullptr;
915   gc_buf1 = gc_buf2 = nullptr;
916 
917   alpha = nullptr;
918   betax = nullptr;
919   betay = nullptr;
920   betaz = nullptr;
921 
922   nx_msm = nullptr;
923   ny_msm = nullptr;
924   nz_msm = nullptr;
925 
926   nxlo_in = nullptr;
927   nylo_in = nullptr;
928   nzlo_in = nullptr;
929 
930   nxhi_in = nullptr;
931   nyhi_in = nullptr;
932   nzhi_in = nullptr;
933 
934   nxlo_out = nullptr;
935   nylo_out = nullptr;
936   nzlo_out = nullptr;
937 
938   nxhi_out = nullptr;
939   nyhi_out = nullptr;
940   nzhi_out = nullptr;
941 
942   delxinv = nullptr;
943   delyinv = nullptr;
944   delzinv = nullptr;
945 
946   qgrid = nullptr;
947   egrid = nullptr;
948 
949   v0grid = nullptr;
950   v1grid = nullptr;
951   v2grid = nullptr;
952   v3grid = nullptr;
953   v4grid = nullptr;
954   v5grid = nullptr;
955 }
956 
957 /* ----------------------------------------------------------------------
958    set total size of MSM grids
959 ------------------------------------------------------------------------- */
960 
set_grid_global()961 void MSM::set_grid_global()
962 {
963   if (accuracy_relative <= 0.0)
964     error->all(FLERR,"KSpace accuracy must be > 0");
965 
966   double xprd = domain->xprd;
967   double yprd = domain->yprd;
968   double zprd = domain->zprd;
969 
970   int nx_max,ny_max,nz_max;
971   double hx,hy,hz;
972 
973   if (adjust_cutoff_flag && !gridflag) {
974     // seek to choose optimal Coulombic cutoff and number of grid levels
975     //  (based on a cost estimate in Hardy's thesis)
976     int p = order - 1;
977     double hmin = 3072.0*(p+1)/(p-1)/
978       (448.0*MY_PI + 56.0*MY_PI*order/2 + 1701.0);
979     hmin = pow(hmin,1.0/6.0)*pow(xprd*yprd*zprd/atom->natoms,1.0/3.0);
980 
981     nx_max = static_cast<int>(xprd/hmin);
982     ny_max = static_cast<int>(yprd/hmin);
983     nz_max = static_cast<int>(zprd/hmin);
984 
985     nx_max = MAX(nx_max,2);
986     ny_max = MAX(ny_max,2);
987     nz_max = MAX(nz_max,2);
988 
989   } else if (!gridflag) {
990     // Coulombic cutoff is set by user, choose grid to give requested error
991     nx_max = ny_max = nz_max = 2;
992     hx = xprd/nx_max;
993     hy = yprd/ny_max;
994     hz = zprd/nz_max;
995 
996     double x_error = 2.0*accuracy;
997     double y_error = 2.0*accuracy;
998     double z_error = 2.0*accuracy;
999 
1000     while (x_error > accuracy) {
1001       nx_max *= 2;
1002       hx = xprd/nx_max;
1003       x_error = estimate_1d_error(hx,xprd);
1004     }
1005 
1006     while (y_error > accuracy) {
1007       ny_max *= 2;
1008       hy = yprd/ny_max;
1009       y_error = estimate_1d_error(hy,yprd);
1010     }
1011 
1012     while (z_error > accuracy) {
1013       nz_max *= 2;
1014       hz = zprd/nz_max;
1015       z_error = estimate_1d_error(hz,zprd);
1016     }
1017   } else {
1018     // cutoff and grid are set by user
1019     nx_max = nx_msm_max;
1020     ny_max = ny_msm_max;
1021     nz_max = nz_msm_max;
1022   }
1023 
1024   // scale grid for triclinic skew
1025 
1026   if (triclinic && !gridflag) {
1027     double tmp[3];
1028     tmp[0] = nx_max/xprd;
1029     tmp[1] = ny_max/yprd;
1030     tmp[2] = nz_max/zprd;
1031     lamda2xT(&tmp[0],&tmp[0]);
1032     nx_max = static_cast<int>(tmp[0]);
1033     ny_max = static_cast<int>(tmp[1]);
1034     nz_max = static_cast<int>(tmp[2]);
1035   }
1036 
1037   // boost grid size until it is factorable by 2
1038   // round up or down, depending on which is closer
1039 
1040   int flag = 0;
1041   int xlevels,ylevels,zlevels;
1042 
1043   while (!factorable(nx_max,flag,xlevels)) {
1044     double k = log(nx_max)/log(2.0);
1045     double r = k - floor(k);
1046     if (r > 0.5) nx_max++;
1047     else nx_max--;
1048   }
1049   while (!factorable(ny_max,flag,ylevels)) {
1050     double k = log(ny_max)/log(2.0);
1051     double r = k - floor(k);
1052     if (r > 0.5) ny_max++;
1053     else ny_max--;
1054   }
1055   while (!factorable(nz_max,flag,zlevels)) {
1056     double k = log(nz_max)/log(2.0);
1057     double r = k - floor(k);
1058     if (r > 0.5) nz_max++;
1059     else nz_max--;
1060   }
1061 
1062   if (flag && gridflag && me == 0)
1063     error->warning(FLERR,
1064                    "Number of MSM mesh points changed to be a multiple of 2");
1065 
1066   // adjust Coulombic cutoff to give desired error (if requested)
1067 
1068   if (adjust_cutoff_flag) {
1069     hx = xprd/nx_max;
1070     hy = yprd/ny_max;
1071     hz = zprd/nz_max;
1072 
1073     int p = order - 1;
1074     double Lx2 = xprd*xprd;
1075     double Ly2 = yprd*yprd;
1076     double Lz2 = zprd*zprd;
1077     double hx2pm2 = pow(hx,2.0*p-2.0);
1078     double hy2pm2 = pow(hy,2.0*p-2.0);
1079     double hz2pm2 = pow(hz,2.0*p-2.0);
1080     estimate_1d_error(1.0,1.0); // make sure that C_p is defined
1081     double k = q2*C_p/accuracy/sqrt(double(atom->natoms));
1082     double sum = hx2pm2/Lx2 + hy2pm2/Ly2 + hz2pm2/Lz2;
1083 
1084     cutoff = pow(k*k*sum/3.0,1.0/(2.0*p));
1085     int itmp;
1086     double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
1087     *p_cutoff = cutoff;
1088 
1089     if (me == 0)
1090       error->warning(FLERR,"Adjusting Coulombic cutoff for "
1091                      "MSM, new cutoff = {:.8}", cutoff);
1092   }
1093 
1094   if (triclinic == 0) {
1095     h_x = xprd/nx_max;
1096     h_y = yprd/ny_max;
1097     h_z = zprd/nz_max;
1098   } else {
1099     double tmp[3];
1100     tmp[0] = nx_max;
1101     tmp[1] = ny_max;
1102     tmp[2] = nz_max;
1103     x2lamdaT(&tmp[0],&tmp[0]);
1104     h_x = 1.0/tmp[0];
1105     h_y = 1.0/tmp[1];
1106     h_z = 1.0/tmp[2];
1107   }
1108 
1109   // find maximum number of levels
1110 
1111   levels = MAX(xlevels,ylevels);
1112   levels = MAX(levels,zlevels);
1113 
1114   if (levels > MAX_LEVELS) error->all(FLERR,"Too many MSM grid levels");
1115 
1116   // need at least 2 MSM levels for periodic systems
1117 
1118   if (levels <= 1) {
1119     levels = xlevels = ylevels = zlevels = 2;
1120     nx_max = ny_max = nz_max = 2;
1121     if (gridflag)
1122       error->warning(FLERR,
1123              "MSM mesh too small, increasing to 2 points in each direction");
1124   }
1125 
1126   // omit top grid level for periodic systems
1127 
1128   if (!domain->nonperiodic) levels -= 1;
1129 
1130   deallocate_levels();
1131   allocate_levels();
1132 
1133   // find number of grid levels in each direction
1134 
1135   for (int n = 0; n < levels; n++) {
1136 
1137     if (xlevels-n-1 > 0)
1138       nx_msm[n] = static_cast<int> (pow(2.0,xlevels-n-1));
1139     else
1140       nx_msm[n] = 1;
1141 
1142     if (ylevels-n-1 > 0)
1143       ny_msm[n] = static_cast<int> (pow(2.0,ylevels-n-1));
1144     else
1145       ny_msm[n] = 1;
1146 
1147     if (zlevels-n-1 > 0)
1148       nz_msm[n] = static_cast<int> (pow(2.0,zlevels-n-1));
1149     else
1150       nz_msm[n] = 1;
1151   }
1152 
1153   if (nx_msm[0] >= OFFSET || ny_msm[0] >= OFFSET || nz_msm[0] >= OFFSET)
1154     error->all(FLERR,"MSM grid is too large");
1155 
1156   // compute number of extra grid points needed for non-periodic boundary conditions
1157 
1158   if (domain->nonperiodic) {
1159     alpha[0] = -(order/2 - 1);
1160     betax[0] = nx_msm[0] + (order/2 - 1);
1161     betay[0] = ny_msm[0] + (order/2 - 1);
1162     betaz[0] = nz_msm[0] + (order/2 - 1);
1163     for (int n = 1; n < levels; n++) {
1164       alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
1165       betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
1166       betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
1167       betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
1168     }
1169   }
1170 
1171   if (domain->nonperiodic) {
1172     alpha[0] = -(order/2 - 1);
1173     betax[0] = nx_msm[0] + (order/2 - 1);
1174     betay[0] = ny_msm[0] + (order/2 - 1);
1175     betaz[0] = nz_msm[0] + (order/2 - 1);
1176     for (int n = 1; n < levels; n++) {
1177       alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
1178       betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
1179       betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
1180       betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
1181     }
1182   }
1183 
1184 }
1185 
1186 /* ----------------------------------------------------------------------
1187    set local subset of MSM grid that I own
1188    n xyz lo/hi in = 3d grid that I own (inclusive)
1189    n xyz lo/hi out = 3d grid + ghost cells in 6 directions (inclusive)
1190 ------------------------------------------------------------------------- */
1191 
set_grid_local()1192 void MSM::set_grid_local()
1193 {
1194   // loop over grid levels
1195 
1196   for (int n=0; n<levels; n++) {
1197 
1198     // partition global grid across procs
1199     // n xyz lo/hi in[] = lower/upper bounds of global grid this proc owns
1200     // indices range from 0 to N-1 inclusive in each dim
1201 
1202     comm->partition_grid(nx_msm[n],ny_msm[n],nz_msm[n],0.0,
1203                          nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
1204                          nzlo_in[n],nzhi_in[n]);
1205 
1206     nlower = -(order-1)/2;
1207     nupper = order/2;
1208 
1209     // lengths of box and processor sub-domains
1210 
1211     double *prd,*sublo,*subhi;
1212 
1213     if (!triclinic) {
1214       prd = domain->prd;
1215       sublo = domain->sublo;
1216       subhi = domain->subhi;
1217     } else {
1218       prd = domain->prd_lamda;
1219       sublo = domain->sublo_lamda;
1220       subhi = domain->subhi_lamda;
1221     }
1222 
1223     double xprd = prd[0];
1224     double yprd = prd[1];
1225     double zprd = prd[2];
1226 
1227     // shift values for particle <-> grid mapping
1228     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
1229 
1230     // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
1231     //   global MSM grid that my particles can contribute charge to
1232     // effectively nlo_in,nhi_in + ghost cells
1233     // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
1234     //           position a particle in my box can be at
1235     // dist[3] = particle position bound = subbox + skin/2.0
1236     // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
1237 
1238     double dist[3];
1239     double cuthalf = 0.0;
1240     if (n == 0) cuthalf = 0.5*neighbor->skin; // only applies to finest grid
1241     dist[0] = dist[1] = dist[2] = cuthalf;
1242     if (triclinic) kspacebbox(cuthalf,&dist[0]);
1243 
1244     int nlo,nhi;
1245 
1246     nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
1247                             nx_msm[n]/xprd + OFFSET) - OFFSET;
1248     nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
1249                             nx_msm[n]/xprd + OFFSET) - OFFSET;
1250     if (n == 0) {
1251       // use a smaller ghost region for interpolation
1252       nxlo_out_all = nlo + nlower;
1253       nxhi_out_all = nhi + nupper;
1254     }
1255     // a larger ghost region is needed for the direct sum and for restriction/prolongation
1256     nxlo_out[n] = nlo + MIN(-order,nxlo_direct);
1257     nxhi_out[n] = nhi + MAX(order,nxhi_direct);
1258 
1259     nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
1260                             ny_msm[n]/yprd + OFFSET) - OFFSET;
1261     nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
1262                             ny_msm[n]/yprd + OFFSET) - OFFSET;
1263     if (n == 0) {
1264       nylo_out_all = nlo + nlower;
1265       nyhi_out_all = nhi + nupper;
1266     }
1267     nylo_out[n] = nlo + MIN(-order,nylo_direct);
1268     nyhi_out[n] = nhi + MAX(order,nyhi_direct);
1269 
1270     nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
1271                             nz_msm[n]/zprd + OFFSET) - OFFSET;
1272     nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
1273                             nz_msm[n]/zprd + OFFSET) - OFFSET;
1274     if (n == 0) {
1275       nzlo_out_all = nlo + nlower;
1276       nzhi_out_all = nhi + nupper;
1277     }
1278     // a hemisphere is used for direct sum interactions,
1279     //   so no ghosting is needed for direct sum in the -z direction
1280     nzlo_out[n] = nlo - order;
1281     nzhi_out[n] = nhi + MAX(order,nzhi_direct);
1282 
1283     // add extra grid points for non-periodic boundary conditions
1284     // skip reset of lo/hi for procs who do not own any grid cells
1285 
1286     if (domain->nonperiodic) {
1287 
1288       if (!domain->xperiodic && nxlo_in[n] <= nxhi_in[n]) {
1289         if (nxlo_in[n] == 0) nxlo_in[n] = alpha[n];
1290         nxlo_out[n] = MAX(nxlo_out[n],alpha[n]);
1291         if (n == 0) nxlo_out_all = MAX(nxlo_out_all,alpha[0]);
1292 
1293         if (nxhi_in[n] == nx_msm[n] - 1) nxhi_in[n] = betax[n];
1294         nxhi_out[n] = MIN(nxhi_out[n],betax[n]);
1295         if (n == 0) nxhi_out_all = MIN(nxhi_out_all,betax[0]);
1296       }
1297 
1298       if (!domain->yperiodic && nylo_in[n] <= nyhi_in[n]) {
1299         if (nylo_in[n] == 0) nylo_in[n] = alpha[n];
1300         nylo_out[n] = MAX(nylo_out[n],alpha[n]);
1301         if (n == 0) nylo_out_all = MAX(nylo_out_all,alpha[0]);
1302 
1303         if (nyhi_in[n] == ny_msm[n] - 1) nyhi_in[n] = betay[n];
1304         nyhi_out[n] = MIN(nyhi_out[n],betay[n]);
1305         if (n == 0) nyhi_out_all = MIN(nyhi_out_all,betay[0]);
1306       }
1307 
1308       if (!domain->zperiodic && nzlo_in[n] <= nzhi_in[n]) {
1309         if (nzlo_in[n] == 0) nzlo_in[n] = alpha[n];
1310         nzlo_out[n] = MAX(nzlo_out[n],alpha[n]);
1311         if (n == 0) nzlo_out_all = MAX(nzlo_out_all,alpha[0]);
1312 
1313         if (nzhi_in[n] == nz_msm[n] - 1) nzhi_in[n] = betaz[n];
1314         nzhi_out[n] = MIN(nzhi_out[n],betaz[n]);
1315         if (n == 0) nzhi_out_all = MIN(nzhi_out_all,betaz[0]);
1316       }
1317     }
1318 
1319     // prevent inactive processors from participating in MPI communication routines
1320 
1321     set_proc_grid(n);
1322 
1323     // MSM grids for this proc, including ghosts
1324 
1325     ngrid[n] = (nxhi_out[n]-nxlo_out[n]+1) * (nyhi_out[n]-nylo_out[n]+1) *
1326       (nzhi_out[n]-nzlo_out[n]+1);
1327   }
1328 }
1329 
1330 /* ----------------------------------------------------------------------
1331    find active procs and neighbor procs for each grid level
1332 ------------------------------------------------------------------------- */
1333 
set_proc_grid(int n)1334 void MSM::set_proc_grid(int n)
1335 {
1336   for (int i=0; i<3; i++)
1337     myloc[i] = comm->myloc[i];
1338 
1339   // size of inner MSM grid owned by this proc
1340 
1341   int nxgrid_in = nxhi_in[n]-nxlo_in[n]+1;
1342   int nygrid_in = nyhi_in[n]-nylo_in[n]+1;
1343   int nzgrid_in = nzhi_in[n]-nzlo_in[n]+1;
1344   int ngrid_in = nxgrid_in * nygrid_in * nzgrid_in;
1345 
1346   // check to see if this proc owns any inner grid points on this grid level
1347   //  if not, deactivate by setting active_flag = 0
1348 
1349   int cnt[3];
1350 
1351   cnt[0] = 0;
1352   if (myloc[1] == 0 && myloc[2] == 0)
1353     if (nxgrid_in > 0)
1354       cnt[0] = 1;
1355 
1356   cnt[1] = 0;
1357   if (myloc[0] == 0 && myloc[2] == 0)
1358     if (nygrid_in > 0)
1359       cnt[1] = 1;
1360 
1361   cnt[2] = 0;
1362   if (myloc[0] == 0 && myloc[1] == 0)
1363     if (nzgrid_in > 0)
1364       cnt[2] = 1;
1365 
1366   MPI_Allreduce(&cnt[0],&procgrid[0],3,MPI_INT,MPI_SUM,world);
1367 
1368   int color;
1369 
1370   if (ngrid_in > 0) {
1371     active_flag[n] = 1;
1372     color = 0;
1373   } else {
1374     active_flag[n] = 0;
1375     color = MPI_UNDEFINED;
1376   }
1377 
1378   // define a new MPI communicator for this grid level that only includes active procs
1379 
1380   if (world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
1381   MPI_Comm_split(world,color,me,&world_levels[n]);
1382 
1383   if (!active_flag[n]) return;
1384 
1385   int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
1386 
1387   // map processor IDs to new 3d processor grid
1388   // produces myloc, procneigh
1389 
1390   int periods[3];
1391   periods[0] = periods[1] = periods[2] = 1;
1392   MPI_Comm cartesian;
1393 
1394   MPI_Cart_create(world_levels[n],3,procgrid,periods,0,&cartesian);
1395   MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
1396   MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]);
1397   MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]);
1398   MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]);
1399 
1400   MPI_Comm_free(&cartesian);
1401 
1402   for (int i=0; i<3; i++)
1403     for (int j=0; j<2; j++)
1404       procneigh_levels[n][i][j] = procneigh[i][j];
1405 }
1406 
1407 /* ----------------------------------------------------------------------
1408    reset local grid arrays and communication stencils
1409    called by fix balance b/c it changed sizes of processor sub-domains
1410 ------------------------------------------------------------------------- */
1411 
setup_grid()1412 void MSM::setup_grid()
1413 {
1414   // free all arrays previously allocated
1415   // pre-compute volume-dependent coeffs
1416   // reset portion of global grid that each proc owns
1417   // reallocate MSM long-range dependent memory
1418   // don't invoke allocate_peratom(), compute() will allocate when needed
1419 
1420   setup();
1421 }
1422 
1423 /* ----------------------------------------------------------------------
1424    check if all factors of n are in list of factors
1425    return 1 if yes, 0 if no
1426 ------------------------------------------------------------------------- */
1427 
factorable(int n,int & flag,int & nlevels)1428 int MSM::factorable(int n, int &flag, int &nlevels)
1429 {
1430   int i;
1431   nlevels = 1;
1432 
1433   while (n > 1) {
1434     for (i = 0; i < nfactors; i++) {
1435       if (n % factors[i] == 0) {
1436         n /= factors[i];
1437         nlevels++;
1438         break;
1439       }
1440     }
1441     if (i == nfactors) {
1442       flag = 1;
1443       return 0;
1444     }
1445   }
1446 
1447   return 1;
1448 }
1449 
1450 /* ----------------------------------------------------------------------
1451    find center grid pt for each of my particles
1452    check that full stencil for the particle will fit in my 3d brick
1453    store central grid pt indices in part2grid array
1454 ------------------------------------------------------------------------- */
1455 
particle_map()1456 void MSM::particle_map()
1457 {
1458   int nx,ny,nz;
1459 
1460   double **x = atom->x;
1461   int nlocal = atom->nlocal;
1462 
1463   int flag = 0;
1464 
1465   if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2]))
1466     error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
1467 
1468   for (int i = 0; i < nlocal; i++) {
1469 
1470     // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
1471     // current particle coord can be outside global and local box
1472     // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
1473 
1474     nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv[0]+OFFSET) - OFFSET;
1475     ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv[0]+OFFSET) - OFFSET;
1476     nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv[0]+OFFSET) - OFFSET;
1477 
1478     part2grid[i][0] = nx;
1479     part2grid[i][1] = ny;
1480     part2grid[i][2] = nz;
1481 
1482     // check that entire stencil around nx,ny,nz will fit in my 3d brick
1483 
1484     if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] ||
1485         ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] ||
1486         nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0]) flag = 1;
1487   }
1488 
1489   if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM");
1490 }
1491 
1492 /* ----------------------------------------------------------------------
1493    aninterpolation: interpolate charges from particles to grid
1494 ------------------------------------------------------------------------- */
1495 
make_rho()1496 void MSM::make_rho()
1497 {
1498   int i,l,m,n,nx,ny,nz,mx,my,mz;
1499   double dx,dy,dz,x0,y0,z0;
1500 
1501   // clear 3d density array
1502 
1503   double ***qgrid0 = qgrid[0];
1504   memset(&(qgrid0[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double));
1505 
1506   // loop over my charges, add their contribution to nearby grid points
1507   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
1508   // (dx,dy,dz) = distance to "lower left" grid pt
1509   // (mx,my,mz) = global coords of moving stencil pt
1510 
1511   double *q = atom->q;
1512   double **x = atom->x;
1513   int nlocal = atom->nlocal;
1514 
1515   for (i = 0; i < nlocal; i++) {
1516 
1517     nx = part2grid[i][0];
1518     ny = part2grid[i][1];
1519     nz = part2grid[i][2];
1520     dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
1521     dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
1522     dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
1523 
1524     compute_phis(dx,dy,dz);
1525 
1526     z0 = q[i];
1527     for (n = nlower; n <= nupper; n++) {
1528       mz = n+nz;
1529       y0 = z0*phi1d[2][n];
1530       for (m = nlower; m <= nupper; m++) {
1531         my = m+ny;
1532         x0 = y0*phi1d[1][m];
1533         for (l = nlower; l <= nupper; l++) {
1534           mx = l+nx;
1535           qgrid0[mz][my][mx] += x0*phi1d[0][l];
1536         }
1537       }
1538     }
1539   }
1540 
1541 }
1542 
1543 /* ----------------------------------------------------------------------
1544    MSM direct sum procedure for intermediate grid levels, solve Poisson's
1545    equation to get energy, virial, etc.
1546 ------------------------------------------------------------------------- */
1547 
direct(int n)1548 void MSM::direct(int n)
1549 {
1550   double ***qgridn = qgrid[n];
1551   double ***egridn = egrid[n];
1552   double ***v0gridn = v0grid[n];
1553   double ***v1gridn = v1grid[n];
1554   double ***v2gridn = v2grid[n];
1555   double ***v3gridn = v3grid[n];
1556   double ***v4gridn = v4grid[n];
1557   double ***v5gridn = v5grid[n];
1558   double *g_directn = g_direct[n];
1559   double *v0_directn = v0_direct[n];
1560   double *v1_directn = v1_direct[n];
1561   double *v2_directn = v2_direct[n];
1562   double *v3_directn = v3_direct[n];
1563   double *v4_directn = v4_direct[n];
1564   double *v5_directn = v5_direct[n];
1565 
1566   // zero out electric potential
1567 
1568   memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1569 
1570   // zero out virial
1571 
1572   if (vflag_atom) {
1573     memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1574     memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1575     memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1576     memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1577     memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1578     memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1579   }
1580 
1581   int icx,icy,icz,ix,iy,iz,zk,zyk,k;
1582   int ii,jj,kk;
1583   int imin,imax,jmin,jmax,kmax;
1584   double qtmp,qtmp2,gtmp;
1585   double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
1586   double **qk,**ek;
1587   double *qkj,*ekj;
1588 
1589   int nx = nxhi_direct - nxlo_direct + 1;
1590   int ny = nyhi_direct - nylo_direct + 1;
1591 
1592   // loop over inner grid points
1593 
1594   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
1595 
1596     if (domain->zperiodic) {
1597       kmax = nzhi_direct;
1598     } else {
1599       kmax = MIN(nzhi_direct,betaz[n] - icz);
1600     }
1601 
1602     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
1603 
1604       if (domain->yperiodic) {
1605         jmin = nylo_direct;
1606         jmax = nyhi_direct;
1607       } else {
1608         jmin = MAX(nylo_direct,alpha[n] - icy);
1609         jmax = MIN(nyhi_direct,betay[n] - icy);
1610       }
1611 
1612       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
1613 
1614         if (domain->xperiodic) {
1615           imin = nxlo_direct;
1616           imax = nxhi_direct;
1617         } else {
1618           imin = MAX(nxlo_direct,alpha[n] - icx);
1619           imax = MIN(nxhi_direct,betax[n] - icx);
1620         }
1621 
1622         qtmp = qgridn[icz][icy][icx]; // charge on center grid point
1623 
1624         esum = 0.0;
1625         if (vflag_either && !scalar_pressure_flag)
1626           v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
1627 
1628         // use hemisphere to avoid double computation of pair-wise
1629         //   interactions in direct sum (no computations in -z direction)
1630 
1631         for (iz = 1; iz <= kmax; iz++) {
1632           kk = icz+iz;
1633           qk = qgridn[kk];
1634           ek = egridn[kk];
1635           zk = (iz + nzhi_direct)*ny;
1636           for (iy = jmin; iy <= jmax; iy++) {
1637             jj = icy+iy;
1638             qkj = qk[jj];
1639             ekj = ek[jj];
1640             zyk = (zk + iy + nyhi_direct)*nx;
1641             for (ix = imin; ix <= imax; ix++) {
1642               ii = icx+ix;
1643               qtmp2 = qkj[ii]; // charge on outer grid point
1644               k = zyk + ix + nxhi_direct;
1645               gtmp = g_directn[k];
1646               esum += gtmp * qtmp2;
1647               ekj[ii] += gtmp * qtmp;
1648 
1649               if (vflag_either && !scalar_pressure_flag) {
1650                 v0sum += v0_directn[k] * qtmp2;
1651                 v1sum += v1_directn[k] * qtmp2;
1652                 v2sum += v2_directn[k] * qtmp2;
1653                 v3sum += v3_directn[k] * qtmp2;
1654                 v4sum += v4_directn[k] * qtmp2;
1655                 v5sum += v5_directn[k] * qtmp2;
1656               }
1657             }
1658           }
1659         }
1660 
1661         // iz=0
1662 
1663         iz = 0;
1664         kk = icz+iz;
1665         qk = qgridn[kk];
1666         ek = egridn[kk];
1667         zk = (iz + nzhi_direct)*ny;
1668         for (iy = 1; iy <= jmax; iy++) {
1669           jj = icy+iy;
1670           qkj = qk[jj];
1671           ekj = ek[jj];
1672           zyk = (zk + iy + nyhi_direct)*nx;
1673           for (ix = imin; ix <= imax; ix++) {
1674             ii = icx+ix;
1675             qtmp2 = qkj[ii];
1676             k = zyk + ix + nxhi_direct;
1677             gtmp = g_directn[k];
1678             esum += gtmp * qtmp2;
1679             ekj[ii] += gtmp * qtmp;
1680 
1681             if (vflag_either && !scalar_pressure_flag) {
1682               v0sum += v0_directn[k] * qtmp2;
1683               v1sum += v1_directn[k] * qtmp2;
1684               v2sum += v2_directn[k] * qtmp2;
1685               v3sum += v3_directn[k] * qtmp2;
1686               v4sum += v4_directn[k] * qtmp2;
1687               v5sum += v5_directn[k] * qtmp2;
1688             }
1689           }
1690         }
1691 
1692         // iz=0, iy=0
1693 
1694         iz = 0;
1695         kk = icz+iz;
1696         qk = qgridn[kk];
1697         ek = egridn[kk];
1698         zk = (iz + nzhi_direct)*ny;
1699         iy = 0;
1700         jj = icy+iy;
1701         qkj = qk[jj];
1702         ekj = ek[jj];
1703         zyk = (zk + iy + nyhi_direct)*nx;
1704         for (ix = 1; ix <= imax; ix++) {
1705           ii = icx+ix;
1706           qtmp2 = qkj[ii];
1707           k = zyk + ix + nxhi_direct;
1708           gtmp = g_directn[k];
1709           esum += gtmp * qtmp2;
1710           ekj[ii] += gtmp * qtmp;
1711 
1712           if (vflag_either && !scalar_pressure_flag) {
1713             v0sum += v0_directn[k] * qtmp2;
1714             v1sum += v1_directn[k] * qtmp2;
1715             v2sum += v2_directn[k] * qtmp2;
1716             v3sum += v3_directn[k] * qtmp2;
1717             v4sum += v4_directn[k] * qtmp2;
1718             v5sum += v5_directn[k] * qtmp2;
1719           }
1720         }
1721 
1722         // iz=0, iy=0, ix=0
1723 
1724         iz = 0;
1725         zk = (iz + nzhi_direct)*ny;
1726         iy = 0;
1727         zyk = (zk + iy + nyhi_direct)*nx;
1728         ix = 0;
1729         k = zyk + ix + nxhi_direct;
1730         gtmp = g_directn[k];
1731         esum += 0.5 * gtmp * qtmp;
1732         egridn[icz][icy][icx] += 0.5 * gtmp * qtmp;
1733 
1734         // virial is zero for iz=0, iy=0, ix=0
1735 
1736         // accumulate per-atom energy/virial
1737 
1738         egridn[icz][icy][icx] += esum;
1739 
1740         if (vflag_atom && !scalar_pressure_flag) {
1741           v0gridn[icz][icy][icx] += v0sum;
1742           v1gridn[icz][icy][icx] += v1sum;
1743           v2gridn[icz][icy][icx] += v2sum;
1744           v3gridn[icz][icy][icx] += v3sum;
1745           v4gridn[icz][icy][icx] += v4sum;
1746           v5gridn[icz][icy][icx] += v5sum;
1747         }
1748 
1749         // accumulate total energy/virial
1750 
1751         if (evflag) {
1752           qtmp = qgridn[icz][icy][icx];
1753           if (eflag_global) energy += 2.0 * esum * qtmp;
1754           if (vflag_global && !scalar_pressure_flag) {
1755             virial[0] += 2.0 * v0sum * qtmp;
1756             virial[1] += 2.0 * v1sum * qtmp;
1757             virial[2] += 2.0 * v2sum * qtmp;
1758             virial[3] += 2.0 * v3sum * qtmp;
1759             virial[4] += 2.0 * v4sum * qtmp;
1760             virial[5] += 2.0 * v5sum * qtmp;
1761           }
1762         }
1763 
1764       }
1765     }
1766   }
1767 
1768   // compute per-atom virial (if requested)
1769 
1770   if (vflag_atom)
1771     direct_peratom(n);
1772 }
1773 
1774 /* ----------------------------------------------------------------------
1775    MSM direct sum procedure for intermediate grid levels, solve Poisson's
1776    equation to get per-atom virial, separate method used for performance
1777    reasons
1778 ------------------------------------------------------------------------- */
1779 
direct_peratom(int n)1780 void MSM::direct_peratom(int n)
1781 {
1782   double ***qgridn = qgrid[n];
1783   double ***v0gridn = v0grid[n];
1784   double ***v1gridn = v1grid[n];
1785   double ***v2gridn = v2grid[n];
1786   double ***v3gridn = v3grid[n];
1787   double ***v4gridn = v4grid[n];
1788   double ***v5gridn = v5grid[n];
1789 
1790   int icx,icy,icz,ix,iy,iz,zk,zyk,k;
1791   int ii,jj,kk;
1792   int imin,imax,jmin,jmax,kmax;
1793   double qtmp;
1794 
1795   int nx = nxhi_direct - nxlo_direct + 1;
1796   int ny = nyhi_direct - nylo_direct + 1;
1797 
1798   // loop over inner grid points
1799 
1800   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
1801 
1802     if (domain->zperiodic) {
1803       kmax = nzhi_direct;
1804     } else {
1805       kmax = MIN(nzhi_direct,betaz[n] - icz);
1806     }
1807 
1808     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
1809 
1810       if (domain->yperiodic) {
1811         jmin = nylo_direct;
1812         jmax = nyhi_direct;
1813       } else {
1814         jmin = MAX(nylo_direct,alpha[n] - icy);
1815         jmax = MIN(nyhi_direct,betay[n] - icy);
1816       }
1817 
1818       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
1819 
1820         if (domain->xperiodic) {
1821           imin = nxlo_direct;
1822           imax = nxhi_direct;
1823         } else {
1824           imin = MAX(nxlo_direct,alpha[n] - icx);
1825           imax = MIN(nxhi_direct,betax[n] - icx);
1826         }
1827 
1828         qtmp = qgridn[icz][icy][icx]; // center grid point
1829 
1830         // use hemisphere to avoid double computation of pair-wise
1831         //   interactions in direct sum (no computations in -z direction)
1832 
1833         for (iz = 1; iz <= kmax; iz++) {
1834           kk = icz+iz;
1835           zk = (iz + nzhi_direct)*ny;
1836           for (iy = jmin; iy <= jmax; iy++) {
1837             jj = icy+iy;
1838             zyk = (zk + iy + nyhi_direct)*nx;
1839             for (ix = imin; ix <= imax; ix++) {
1840               ii = icx+ix;
1841               k = zyk + ix + nxhi_direct;
1842               v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
1843               v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
1844               v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
1845               v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
1846               v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
1847               v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
1848             }
1849           }
1850         }
1851 
1852         // iz=0
1853 
1854         iz = 0;
1855         kk = icz+iz;
1856         zk = (iz + nzhi_direct)*ny;
1857         for (iy = 1; iy <= jmax; iy++) {
1858           jj = icy+iy;
1859           zyk = (zk + iy + nyhi_direct)*nx;
1860           for (ix = imin; ix <= imax; ix++) {
1861             ii = icx+ix;
1862             k = zyk + ix + nxhi_direct;
1863             v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
1864             v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
1865             v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
1866             v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
1867             v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
1868             v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
1869           }
1870         }
1871 
1872         // iz=0, iy=0
1873 
1874         iz = 0;
1875         kk = icz+iz;
1876         zk = (iz + nzhi_direct)*ny;
1877         iy = 0;
1878         jj = icy+iy;
1879         zyk = (zk + iy + nyhi_direct)*nx;
1880         for (ix = 1; ix <= imax; ix++) {
1881           ii = icx+ix;
1882           k = zyk + ix + nxhi_direct;
1883           v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
1884           v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
1885           v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
1886           v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
1887           v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
1888           v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
1889        }
1890 
1891         // virial is zero for iz=0, iy=0, ix=0
1892 
1893       }
1894     }
1895   }
1896 }
1897 
1898 /* ----------------------------------------------------------------------
1899    MSM direct sum procedure for top grid level (nonperiodic systems only)
1900 ------------------------------------------------------------------------- */
1901 
direct_top(int n)1902 void MSM::direct_top(int n)
1903 {
1904   double ***qgridn = qgrid[n];
1905   double ***egridn = egrid[n];
1906   double ***v0gridn = v0grid[n];
1907   double ***v1gridn = v1grid[n];
1908   double ***v2gridn = v2grid[n];
1909   double ***v3gridn = v3grid[n];
1910   double ***v4gridn = v4grid[n];
1911   double ***v5gridn = v5grid[n];
1912 
1913   // zero out electric potential
1914 
1915   memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1916 
1917   // zero out virial
1918 
1919   if (vflag_atom) {
1920     memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1921     memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1922     memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1923     memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1924     memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1925     memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1926   }
1927 
1928   int icx,icy,icz,ix,iy,iz,zk,zyk,k;
1929   int ii,jj,kk;
1930   int imin,imax,jmin,jmax,kmax;
1931   double qtmp,qtmp2,gtmp;
1932   double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
1933   double **qk,**ek;
1934   double *qkj,*ekj;
1935 
1936   int nx_top = betax[n] - alpha[n];
1937   int ny_top = betay[n] - alpha[n];
1938   int nz_top = betaz[n] - alpha[n];
1939 
1940   int nx = 2*nx_top + 1;
1941   int ny = 2*ny_top + 1;
1942 
1943   // loop over inner grid points
1944 
1945   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
1946 
1947     if (domain->zperiodic) {
1948       kmax = nz_msm[n]-1;
1949     } else {
1950       kmax = betaz[n] - icz;
1951     }
1952 
1953     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
1954 
1955       if (domain->yperiodic) {
1956         jmin = 0;
1957         jmax = ny_msm[n]-1;
1958       } else {
1959         jmin = alpha[n] - icy;
1960         jmax = betay[n] - icy;
1961       }
1962 
1963       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
1964 
1965         if (domain->xperiodic) {
1966           imin = 0;
1967           imax = nx_msm[n]-1;
1968         } else {
1969           imin = alpha[n] - icx;
1970           imax = betax[n] - icx;
1971         }
1972 
1973         qtmp = qgridn[icz][icy][icx];
1974 
1975         esum = 0.0;
1976         if (vflag_either && !scalar_pressure_flag)
1977           v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
1978 
1979         // use hemisphere to avoid double computation of pair-wise
1980         //   interactions in direct sum (no computations in -z direction)
1981 
1982         for (iz = 1; iz <= kmax; iz++) {
1983           kk = icz+iz;
1984           qk = qgridn[kk];
1985           ek = egridn[kk];
1986           zk = (iz + nz_top)*ny;
1987           for (iy = jmin; iy <= jmax; iy++) {
1988             jj = icy+iy;
1989             qkj = qk[jj];
1990             ekj = ek[jj];
1991             zyk = (zk + iy + ny_top)*nx;
1992             for (ix = imin; ix <= imax; ix++) {
1993               ii = icx+ix;
1994               qtmp2 = qkj[ii];
1995               k = zyk + ix + nx_top;
1996               gtmp = g_direct_top[k];
1997               esum += gtmp * qtmp2;
1998               ekj[ii] += gtmp * qtmp;
1999 
2000               if (vflag_either && !scalar_pressure_flag) {
2001                 v0sum += v0_direct_top[k] * qtmp2;
2002                 v1sum += v1_direct_top[k] * qtmp2;
2003                 v2sum += v2_direct_top[k] * qtmp2;
2004                 v3sum += v3_direct_top[k] * qtmp2;
2005                 v4sum += v4_direct_top[k] * qtmp2;
2006                 v5sum += v5_direct_top[k] * qtmp2;
2007               }
2008             }
2009           }
2010         }
2011 
2012         // iz=0
2013 
2014         iz = 0;
2015         kk = icz+iz;
2016         qk = qgridn[kk];
2017         ek = egridn[kk];
2018         zk = (iz + nz_top)*ny;
2019         for (iy = 1; iy <= jmax; iy++) {
2020           jj = icy+iy;
2021           qkj = qk[jj];
2022           ekj = ek[jj];
2023           zyk = (zk + iy + ny_top)*nx;
2024           for (ix = imin; ix <= imax; ix++) {
2025             ii = icx+ix;
2026             qtmp2 = qkj[ii];
2027             k = zyk + ix + nx_top;
2028             gtmp = g_direct_top[k];
2029             esum += gtmp * qtmp2;
2030             ekj[ii] += gtmp * qtmp;
2031 
2032             if (vflag_either && !scalar_pressure_flag) {
2033               v0sum += v0_direct_top[k] * qtmp2;
2034               v1sum += v1_direct_top[k] * qtmp2;
2035               v2sum += v2_direct_top[k] * qtmp2;
2036               v3sum += v3_direct_top[k] * qtmp2;
2037               v4sum += v4_direct_top[k] * qtmp2;
2038               v5sum += v5_direct_top[k] * qtmp2;
2039             }
2040           }
2041         }
2042 
2043         // iz=0, iy=0
2044 
2045         iz = 0;
2046         kk = icz+iz;
2047         qk = qgridn[kk];
2048         ek = egridn[kk];
2049         zk = (iz + nz_top)*ny;
2050         iy = 0;
2051         jj = icy+iy;
2052         qkj = qk[jj];
2053         ekj = ek[jj];
2054         zyk = (zk + iy + ny_top)*nx;
2055         for (ix = 1; ix <= imax; ix++) {
2056           ii = icx+ix;
2057           qtmp2 = qkj[ii];
2058           k = zyk + ix + nx_top;
2059           gtmp = g_direct_top[k];
2060           esum += gtmp * qtmp2;
2061           ekj[ii] += gtmp * qtmp;
2062 
2063           if (vflag_either && !scalar_pressure_flag) {
2064             v0sum += v0_direct_top[k] * qtmp2;
2065             v1sum += v1_direct_top[k] * qtmp2;
2066             v2sum += v2_direct_top[k] * qtmp2;
2067             v3sum += v3_direct_top[k] * qtmp2;
2068             v4sum += v4_direct_top[k] * qtmp2;
2069             v5sum += v5_direct_top[k] * qtmp2;
2070           }
2071         }
2072 
2073         // iz=0, iy=0, ix=0
2074 
2075         iz = 0;
2076         zk = (iz + nz_top)*ny;
2077         iy = 0;
2078         zyk = (zk + iy + ny_top)*nx;
2079         ix = 0;
2080         ii = icx+ix;
2081         k = zyk + ix + nx_top;
2082         gtmp = g_direct_top[k];
2083         esum += 0.5 * gtmp * qtmp;
2084         egridn[icz][icy][icx] += 0.5 * gtmp * qtmp;
2085 
2086         if (vflag_either && !scalar_pressure_flag) {
2087           v0sum += v0_direct_top[k] * qtmp;
2088           v1sum += v1_direct_top[k] * qtmp;
2089           v2sum += v2_direct_top[k] * qtmp;
2090           v3sum += v3_direct_top[k] * qtmp;
2091           v4sum += v4_direct_top[k] * qtmp;
2092           v5sum += v5_direct_top[k] * qtmp;
2093         }
2094 
2095         // accumulate per-atom energy/virial
2096 
2097         egridn[icz][icy][icx] += esum;
2098 
2099         if (vflag_atom && !scalar_pressure_flag) {
2100           v0gridn[icz][icy][icx] += v0sum;
2101           v1gridn[icz][icy][icx] += v1sum;
2102           v2gridn[icz][icy][icx] += v2sum;
2103           v3gridn[icz][icy][icx] += v3sum;
2104           v4gridn[icz][icy][icx] += v4sum;
2105           v5gridn[icz][icy][icx] += v5sum;
2106         }
2107 
2108         // accumulate total energy/virial
2109 
2110         if (evflag) {
2111           qtmp = qgridn[icz][icy][icx];
2112           if (eflag_global) energy += 2.0 * esum * qtmp;
2113           if (vflag_global && !scalar_pressure_flag) {
2114             virial[0] += 2.0 * v0sum * qtmp;
2115             virial[1] += 2.0 * v1sum * qtmp;
2116             virial[2] += 2.0 * v2sum * qtmp;
2117             virial[3] += 2.0 * v3sum * qtmp;
2118             virial[4] += 2.0 * v4sum * qtmp;
2119             virial[5] += 2.0 * v5sum * qtmp;
2120           }
2121         }
2122 
2123       }
2124     }
2125   }
2126 
2127   // compute per-atom virial (if requested)
2128 
2129   if (vflag_atom)
2130     direct_peratom_top(n);
2131 }
2132 /* ----------------------------------------------------------------------
2133    MSM direct sum procedure for top grid level, solve Poisson's
2134    equation to get per-atom virial, separate method used for performance
2135    reasons
2136 ------------------------------------------------------------------------- */
2137 
direct_peratom_top(int n)2138 void MSM::direct_peratom_top(int n)
2139 {
2140   double ***qgridn = qgrid[n];
2141   double ***v0gridn = v0grid[n];
2142   double ***v1gridn = v1grid[n];
2143   double ***v2gridn = v2grid[n];
2144   double ***v3gridn = v3grid[n];
2145   double ***v4gridn = v4grid[n];
2146   double ***v5gridn = v5grid[n];
2147 
2148   int icx,icy,icz,ix,iy,iz,zk,zyk,k;
2149   int ii,jj,kk;
2150   int imin,imax,jmin,jmax,kmax;
2151   double qtmp;
2152 
2153   int nx_top = betax[n] - alpha[n];
2154   int ny_top = betay[n] - alpha[n];
2155   int nz_top = betaz[n] - alpha[n];
2156 
2157   int nx = 2*nx_top + 1;
2158   int ny = 2*ny_top + 1;
2159 
2160   // loop over inner grid points
2161 
2162   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
2163 
2164     if (domain->zperiodic) {
2165       kmax = nz_msm[n]-1;
2166     } else {
2167       kmax = betaz[n] - icz;
2168     }
2169 
2170     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
2171 
2172       if (domain->yperiodic) {
2173         jmin = 0;
2174         jmax = ny_msm[n]-1;
2175       } else {
2176         jmin = alpha[n] - icy;
2177         jmax = betay[n] - icy;
2178       }
2179 
2180       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
2181 
2182         if (domain->xperiodic) {
2183           imin = 0;
2184           imax = nx_msm[n]-1;
2185         } else {
2186           imin = alpha[n] - icx;
2187           imax = betax[n] - icx;
2188         }
2189 
2190         qtmp = qgridn[icz][icy][icx]; // center grid point
2191 
2192         // use hemisphere to avoid double computation of pair-wise
2193         //   interactions in direct sum (no computations in -z direction)
2194 
2195         for (iz = 1; iz <= kmax; iz++) {
2196           kk = icz+iz;
2197           zk = (iz + nz_top)*ny;
2198           for (iy = jmin; iy <= jmax; iy++) {
2199             jj = icy+iy;
2200             zyk = (zk + iy + ny_top)*nx;
2201             for (ix = imin; ix <= imax; ix++) {
2202               ii = icx+ix;
2203               k = zyk + ix + nx_top;
2204               v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
2205               v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
2206               v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
2207               v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
2208               v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
2209               v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
2210             }
2211           }
2212         }
2213 
2214         // iz=0
2215 
2216         iz = 0;
2217         kk = icz+iz;
2218         zk = (iz + nz_top)*ny;
2219         for (iy = 1; iy <= jmax; iy++) {
2220           jj = icy+iy;
2221           zyk = (zk + iy + ny_top)*nx;
2222           for (ix = imin; ix <= imax; ix++) {
2223             ii = icx+ix;
2224             k = zyk + ix + nx_top;
2225             v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
2226             v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
2227             v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
2228             v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
2229             v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
2230             v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
2231           }
2232         }
2233 
2234         // iz=0, iy=0
2235 
2236         iz = 0;
2237         kk = icz+iz;
2238         zk = (iz + nz_top)*ny;
2239         iy = 0;
2240         jj = icy+iy;
2241         zyk = (zk + iy + ny_top)*nx;
2242         for (ix = 1; ix <= imax; ix++) {
2243           ii = icx+ix;
2244           k = zyk + ix + nx_top;
2245           v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
2246           v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
2247           v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
2248           v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
2249           v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
2250           v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
2251        }
2252 
2253         // virial is zero for iz=0, iy=0, ix=0
2254 
2255       }
2256     }
2257   }
2258 }
2259 
2260 /* ----------------------------------------------------------------------
2261    MSM restriction procedure for intermediate grid levels, interpolate
2262    charges from finer grid to coarser grid
2263 ------------------------------------------------------------------------- */
2264 
restriction(int n)2265 void MSM::restriction(int n)
2266 {
2267   const int p = order-1;
2268 
2269   double ***qgrid1 = qgrid[n];
2270   double ***qgrid2 = qgrid[n+1];
2271 
2272   int k = 0;
2273   int *index = new int[p+2];
2274   for (int nu=-p; nu<=p; nu++) {
2275     if (nu%2 == 0 && nu != 0) continue;
2276     phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
2277     phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
2278     phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
2279     index[k] = nu;
2280     k++;
2281   }
2282 
2283   int ip,jp,kp,ic,jc,kc,i,j;
2284   int ii,jj,kk;
2285   double phiz,phizy,q2sum;
2286 
2287   // zero out charge on coarser grid
2288 
2289   memset(&(qgrid2[nzlo_out[n+1]][nylo_out[n+1]][nxlo_out[n+1]]),0,ngrid[n+1]*sizeof(double));
2290 
2291   for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
2292     for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
2293       for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
2294 
2295         ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
2296         jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
2297         kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
2298 
2299         q2sum = 0.0;
2300 
2301         for (k=0; k<=p+1; k++) {
2302           kk = kc+index[k];
2303           if (!domain->zperiodic) {
2304             if (kk < alpha[n]) continue;
2305             if (kk > betaz[n]) break;
2306           }
2307           phiz = phi1d[2][k];
2308           for (j=0; j<=p+1; j++) {
2309             jj = jc+index[j];
2310             if (!domain->yperiodic) {
2311               if (jj < alpha[n]) continue;
2312               if (jj > betay[n]) break;
2313             }
2314             phizy = phi1d[1][j]*phiz;
2315             for (i=0; i<=p+1; i++) {
2316               ii = ic+index[i];
2317               if (!domain->xperiodic) {
2318                 if (ii < alpha[n]) continue;
2319                 if (ii > betax[n]) break;
2320               }
2321               q2sum += qgrid1[kk][jj][ii] *
2322                 phi1d[0][i]*phizy;
2323             }
2324           }
2325         }
2326         qgrid2[kp][jp][ip] += q2sum;
2327       }
2328   delete[] index;
2329 }
2330 
2331 /* ----------------------------------------------------------------------
2332    MSM prolongation procedure for intermediate grid levels, interpolate
2333    per-atom energy/virial from coarser grid to finer grid
2334 ------------------------------------------------------------------------- */
2335 
prolongation(int n)2336 void MSM::prolongation(int n)
2337 {
2338   const int p = order-1;
2339 
2340   double ***egrid1 = egrid[n];
2341   double ***egrid2 = egrid[n+1];
2342 
2343   double ***v0grid1 = v0grid[n];
2344   double ***v0grid2 = v0grid[n+1];
2345   double ***v1grid1 = v1grid[n];
2346   double ***v1grid2 = v1grid[n+1];
2347   double ***v2grid1 = v2grid[n];
2348   double ***v2grid2 = v2grid[n+1];
2349   double ***v3grid1 = v3grid[n];
2350   double ***v3grid2 = v3grid[n+1];
2351   double ***v4grid1 = v4grid[n];
2352   double ***v4grid2 = v4grid[n+1];
2353   double ***v5grid1 = v5grid[n];
2354   double ***v5grid2 = v5grid[n+1];
2355 
2356   int k = 0;
2357   int *index = new int[p+2];
2358   for (int nu=-p; nu<=p; nu++) {
2359     if (nu%2 == 0 && nu != 0) continue;
2360     phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
2361     phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
2362     phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
2363     index[k] = nu;
2364     k++;
2365   }
2366 
2367   int ip,jp,kp,ic,jc,kc,i,j;
2368   int ii,jj,kk;
2369   double phiz,phizy,phi3d;
2370   double etmp2,v0tmp2,v1tmp2,v2tmp2,v3tmp2,v4tmp2,v5tmp2;
2371 
2372   for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
2373     for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
2374       for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
2375 
2376         ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
2377         jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
2378         kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
2379 
2380         etmp2 = egrid2[kp][jp][ip];
2381 
2382         if (vflag_atom) {
2383           v0tmp2 = v0grid2[kp][jp][ip];
2384           v1tmp2 = v1grid2[kp][jp][ip];
2385           v2tmp2 = v2grid2[kp][jp][ip];
2386           v3tmp2 = v3grid2[kp][jp][ip];
2387           v4tmp2 = v4grid2[kp][jp][ip];
2388           v5tmp2 = v5grid2[kp][jp][ip];
2389         }
2390 
2391         for (k=0; k<=p+1; k++) {
2392           kk = kc+index[k];
2393           if (!domain->zperiodic) {
2394             if (kk < alpha[n]) continue;
2395             if (kk > betaz[n]) break;
2396           }
2397           phiz = phi1d[2][k];
2398           for (j=0; j<=p+1; j++) {
2399             jj = jc+index[j];
2400             if (!domain->yperiodic) {
2401               if (jj < alpha[n]) continue;
2402               if (jj > betay[n]) break;
2403             }
2404             phizy = phi1d[1][j]*phiz;
2405             for (i=0; i<=p+1; i++) {
2406               ii = ic+index[i];
2407               if (!domain->xperiodic) {
2408                 if (ii < alpha[n]) continue;
2409                 if (ii > betax[n]) break;
2410               }
2411               phi3d = phi1d[0][i]*phizy;
2412 
2413               egrid1[kk][jj][ii] += etmp2 * phi3d;
2414 
2415               if (vflag_atom) {
2416                 v0grid1[kk][jj][ii] += v0tmp2 * phi3d;
2417                 v1grid1[kk][jj][ii] += v1tmp2 * phi3d;
2418                 v2grid1[kk][jj][ii] += v2tmp2 * phi3d;
2419                 v3grid1[kk][jj][ii] += v3tmp2 * phi3d;
2420                 v4grid1[kk][jj][ii] += v4tmp2 * phi3d;
2421                 v5grid1[kk][jj][ii] += v5tmp2 * phi3d;
2422               }
2423 
2424             }
2425           }
2426         }
2427 
2428       }
2429   delete[] index;
2430 }
2431 
2432 /* ----------------------------------------------------------------------
2433    Use MPI_Allreduce to fill ghost grid values, for coarse grids this may
2434    be cheaper than using nearest-neighbor communication (commgrid), right
2435    now only works for periodic boundary conditions
2436 ------------------------------------------------------------------------- */
2437 
grid_swap_forward(int n,double *** & gridn)2438 void MSM::grid_swap_forward(int n, double*** &gridn)
2439 {
2440   double ***gridn_tmp;
2441   memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp");
2442 
2443   double ***gridn_all;
2444   memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all");
2445 
2446   int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n];
2447 
2448   memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double));
2449   memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double));
2450 
2451   // copy inner grid cell values from gridn into gridn_tmp
2452 
2453   int icx,icy,icz;
2454 
2455   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++)
2456     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++)
2457       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++)
2458         gridn_tmp[icz][icy][icx] = gridn[icz][icy][icx];
2459 
2460   MPI_Allreduce(&(gridn_tmp[0][0][0]),
2461                 &(gridn_all[0][0][0]),
2462                 ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]);
2463 
2464   // bitmask for PBCs (only works for power of 2 numbers)
2465 
2466   int PBCx,PBCy,PBCz;
2467 
2468   PBCx = nx_msm[n]-1;
2469   PBCy = ny_msm[n]-1;
2470   PBCz = nz_msm[n]-1;
2471 
2472   // copy from gridn_all into gridn to fill ghost grid cell values
2473 
2474   for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++)
2475     for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++)
2476       for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++)
2477         gridn[icz][icy][icx] = gridn_all[icz&PBCz][icy&PBCy][icx&PBCx];
2478 
2479   memory->destroy(gridn_tmp);
2480   memory->destroy(gridn_all);
2481 }
2482 
2483 /* ----------------------------------------------------------------------
2484    Use MPI_Allreduce to get contribution from ghost grid cells, for coarse
2485    grids this may be cheaper than using nearest-neighbor communication
2486    (commgrid), right now only works for periodic boundary conditions
2487 ------------------------------------------------------------------------- */
2488 
grid_swap_reverse(int n,double *** & gridn)2489 void MSM::grid_swap_reverse(int n, double*** &gridn)
2490 {
2491   double ***gridn_tmp;
2492   memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp");
2493 
2494   double ***gridn_all;
2495   memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all");
2496 
2497   int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n];
2498 
2499   memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double));
2500   memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double));
2501 
2502   // bitmask for PBCs (only works for power of 2 numbers)
2503 
2504   int icx,icy,icz;
2505   int PBCx,PBCy,PBCz;
2506 
2507   PBCx = nx_msm[n]-1;
2508   PBCy = ny_msm[n]-1;
2509   PBCz = nz_msm[n]-1;
2510 
2511   // copy ghost grid cell values from gridn into inner portion of gridn_tmp
2512 
2513   for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++)
2514     for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++)
2515       for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++)
2516         gridn_tmp[icz&PBCz][icy&PBCy][icx&PBCx] += gridn[icz][icy][icx];
2517 
2518   MPI_Allreduce(&(gridn_tmp[0][0][0]),
2519                 &(gridn_all[0][0][0]),
2520                 ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]);
2521 
2522   // copy inner grid cell values from gridn_all into gridn
2523 
2524   for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++)
2525     for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++)
2526       for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++)
2527         gridn[icz][icy][icx] = gridn_all[icz][icy][icx];
2528 
2529   memory->destroy(gridn_tmp);
2530   memory->destroy(gridn_all);
2531 }
2532 
2533 /* ----------------------------------------------------------------------
2534    pack own values to buf to send to another proc (used by commgrid)
2535 ------------------------------------------------------------------------- */
2536 
pack_forward_grid(int flag,void * vbuf,int nlist,int * list)2537 void MSM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
2538 {
2539   double *buf = (double *) vbuf;
2540 
2541   int n = current_level;
2542   int k = 0;
2543 
2544   if (flag == FORWARD_RHO) {
2545     double ***qgridn = qgrid[n];
2546     double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2547     for (int i = 0; i < nlist; i++) {
2548       buf[k++] = qsrc[list[i]];
2549     }
2550   } else if (flag == FORWARD_AD) {
2551     double ***egridn = egrid[n];
2552     double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2553     for (int i = 0; i < nlist; i++)
2554       buf[i] = src[list[i]];
2555   } else if (flag == FORWARD_AD_PERATOM) {
2556     double ***v0gridn = v0grid[n];
2557     double ***v1gridn = v1grid[n];
2558     double ***v2gridn = v2grid[n];
2559     double ***v3gridn = v3grid[n];
2560     double ***v4gridn = v4grid[n];
2561     double ***v5gridn = v5grid[n];
2562     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2563     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2564     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2565     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2566     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2567     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2568     for (int i = 0; i < nlist; i++) {
2569       buf[k++] = v0src[list[i]];
2570       buf[k++] = v1src[list[i]];
2571       buf[k++] = v2src[list[i]];
2572       buf[k++] = v3src[list[i]];
2573       buf[k++] = v4src[list[i]];
2574       buf[k++] = v5src[list[i]];
2575     }
2576   }
2577 }
2578 
2579 /* ----------------------------------------------------------------------
2580    unpack another proc's own values from buf and set own ghost values
2581 ------------------------------------------------------------------------- */
2582 
unpack_forward_grid(int flag,void * vbuf,int nlist,int * list)2583 void MSM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
2584 {
2585   double *buf = (double *) vbuf;
2586 
2587   int n = current_level;
2588   int k = 0;
2589 
2590   if (flag == FORWARD_RHO) {
2591   double ***qgridn = qgrid[n];
2592     double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2593     for (int i = 0; i < nlist; i++) {
2594       dest[list[i]] = buf[k++];
2595     }
2596   } else if (flag == FORWARD_AD) {
2597     double ***egridn = egrid[n];
2598     double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2599     for (int i = 0; i < nlist; i++)
2600       dest[list[i]] = buf[k++];
2601   } else if (flag == FORWARD_AD_PERATOM) {
2602     double ***v0gridn = v0grid[n];
2603     double ***v1gridn = v1grid[n];
2604     double ***v2gridn = v2grid[n];
2605     double ***v3gridn = v3grid[n];
2606     double ***v4gridn = v4grid[n];
2607     double ***v5gridn = v5grid[n];
2608     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2609     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2610     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2611     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2612     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2613     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2614     for (int i = 0; i < nlist; i++) {
2615       v0src[list[i]] = buf[k++];
2616       v1src[list[i]] = buf[k++];
2617       v2src[list[i]] = buf[k++];
2618       v3src[list[i]] = buf[k++];
2619       v4src[list[i]] = buf[k++];
2620       v5src[list[i]] = buf[k++];
2621     }
2622   }
2623 }
2624 
2625 /* ----------------------------------------------------------------------
2626    pack ghost values into buf to send to another proc
2627 ------------------------------------------------------------------------- */
2628 
pack_reverse_grid(int flag,void * vbuf,int nlist,int * list)2629 void MSM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
2630 {
2631   double *buf = (double *) vbuf;
2632 
2633   int n = current_level;
2634   int k = 0;
2635 
2636   if (flag == REVERSE_RHO) {
2637     double ***qgridn = qgrid[n];
2638     double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2639     for (int i = 0; i < nlist; i++) {
2640       buf[k++] = qsrc[list[i]];
2641     }
2642   } else if (flag == REVERSE_AD) {
2643     double ***egridn = egrid[n];
2644     double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2645     for (int i = 0; i < nlist; i++)
2646       buf[i] = src[list[i]];
2647   } else if (flag == REVERSE_AD_PERATOM) {
2648     double ***v0gridn = v0grid[n];
2649     double ***v1gridn = v1grid[n];
2650     double ***v2gridn = v2grid[n];
2651     double ***v3gridn = v3grid[n];
2652     double ***v4gridn = v4grid[n];
2653     double ***v5gridn = v5grid[n];
2654     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2655     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2656     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2657     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2658     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2659     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2660     for (int i = 0; i < nlist; i++) {
2661       buf[k++] = v0src[list[i]];
2662       buf[k++] = v1src[list[i]];
2663       buf[k++] = v2src[list[i]];
2664       buf[k++] = v3src[list[i]];
2665       buf[k++] = v4src[list[i]];
2666       buf[k++] = v5src[list[i]];
2667     }
2668   }
2669 }
2670 
2671 /* ----------------------------------------------------------------------
2672    unpack another proc's ghost values from buf and add to own values
2673 ------------------------------------------------------------------------- */
2674 
unpack_reverse_grid(int flag,void * vbuf,int nlist,int * list)2675 void MSM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
2676 {
2677   double *buf = (double *) vbuf;
2678 
2679   int n = current_level;
2680   int k = 0;
2681 
2682   if (flag == REVERSE_RHO) {
2683     double ***qgridn = qgrid[n];
2684     double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2685     for (int i = 0; i < nlist; i++) {
2686       dest[list[i]] += buf[k++];
2687     }
2688   } else if (flag == REVERSE_AD) {
2689     double ***egridn = egrid[n];
2690     double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2691     for (int i = 0; i < nlist; i++)
2692       dest[list[i]] += buf[k++];
2693   } else if (flag == REVERSE_AD_PERATOM) {
2694     double ***v0gridn = v0grid[n];
2695     double ***v1gridn = v1grid[n];
2696     double ***v2gridn = v2grid[n];
2697     double ***v3gridn = v3grid[n];
2698     double ***v4gridn = v4grid[n];
2699     double ***v5gridn = v5grid[n];
2700     double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2701     double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2702     double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2703     double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2704     double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2705     double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2706     for (int i = 0; i < nlist; i++) {
2707       v0src[list[i]] += buf[k++];
2708       v1src[list[i]] += buf[k++];
2709       v2src[list[i]] += buf[k++];
2710       v3src[list[i]] += buf[k++];
2711       v4src[list[i]] += buf[k++];
2712       v5src[list[i]] += buf[k++];
2713     }
2714   }
2715 }
2716 
2717 /* ----------------------------------------------------------------------
2718    interpolate from grid to get force on my particles
2719 ------------------------------------------------------------------------- */
2720 
fieldforce()2721 void MSM::fieldforce()
2722 {
2723   double ***egridn = egrid[0];
2724 
2725   int i,l,m,n,nx,ny,nz,mx,my,mz;
2726   double dx,dy,dz;
2727   double phi_x,phi_y,phi_z;
2728   double dphi_x,dphi_y,dphi_z;
2729   double ekx,eky,ekz,etmp;
2730 
2731 
2732   // loop over my charges, interpolate electric field from nearby grid points
2733   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
2734   // (dx,dy,dz) = distance to "lower left" grid pt
2735   // (mx,my,mz) = global coords of moving stencil pt
2736   // ek = 3 components of E-field on particle
2737 
2738   double *q = atom->q;
2739   double **x = atom->x;
2740   double **f = atom->f;
2741 
2742   int nlocal = atom->nlocal;
2743 
2744   for (i = 0; i < nlocal; i++) {
2745     nx = part2grid[i][0];
2746     ny = part2grid[i][1];
2747     nz = part2grid[i][2];
2748     dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
2749     dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
2750     dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
2751 
2752     compute_phis_and_dphis(dx,dy,dz);
2753 
2754     ekx = eky = ekz = 0.0;
2755     for (n = nlower; n <= nupper; n++) {
2756       mz = n+nz;
2757       phi_z = phi1d[2][n];
2758       dphi_z = dphi1d[2][n];
2759       for (m = nlower; m <= nupper; m++) {
2760         my = m+ny;
2761         phi_y = phi1d[1][m];
2762         dphi_y = dphi1d[1][m];
2763         for (l = nlower; l <= nupper; l++) {
2764           mx = l+nx;
2765           phi_x = phi1d[0][l];
2766           dphi_x = dphi1d[0][l];
2767           etmp = egridn[mz][my][mx];
2768           ekx += dphi_x*phi_y*phi_z*etmp;
2769           eky += phi_x*dphi_y*phi_z*etmp;
2770           ekz += phi_x*phi_y*dphi_z*etmp;
2771         }
2772       }
2773     }
2774 
2775     ekx *= delxinv[0];
2776     eky *= delyinv[0];
2777     ekz *= delzinv[0];
2778 
2779     // effectively divide by length for a triclinic system
2780 
2781     if (triclinic) {
2782       double tmp[3];
2783       tmp[0] = ekx;
2784       tmp[1] = eky;
2785       tmp[2] = ekz;
2786       x2lamdaT(&tmp[0],&tmp[0]);
2787       ekx = tmp[0];
2788       eky = tmp[1];
2789       ekz = tmp[2];
2790     }
2791 
2792     // convert E-field to force
2793 
2794     const double qfactor = qqrd2e*scale*q[i];
2795     f[i][0] += qfactor*ekx;
2796     f[i][1] += qfactor*eky;
2797     f[i][2] += qfactor*ekz;
2798   }
2799 }
2800 
2801 /* ----------------------------------------------------------------------
2802    interpolate from grid to get per-atom energy/virial
2803 ------------------------------------------------------------------------- */
2804 
fieldforce_peratom()2805 void MSM::fieldforce_peratom()
2806 {
2807   int i,l,m,n,nx,ny,nz,mx,my,mz;
2808   double dx,dy,dz,x0,y0,z0;
2809   double u,v0,v1,v2,v3,v4,v5;
2810 
2811   double ***egridn = egrid[0];
2812 
2813   double ***v0gridn = v0grid[0];
2814   double ***v1gridn = v1grid[0];
2815   double ***v2gridn = v2grid[0];
2816   double ***v3gridn = v3grid[0];
2817   double ***v4gridn = v4grid[0];
2818   double ***v5gridn = v5grid[0];
2819 
2820   // loop over my charges, interpolate from nearby grid points
2821   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
2822   // (dx,dy,dz) = distance to "lower left" grid pt
2823   // (mx,my,mz) = global coords of moving stencil pt
2824 
2825   double *q = atom->q;
2826   double **x = atom->x;
2827 
2828   int nlocal = atom->nlocal;
2829 
2830   for (i = 0; i < nlocal; i++) {
2831     nx = part2grid[i][0];
2832     ny = part2grid[i][1];
2833     nz = part2grid[i][2];
2834     dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
2835     dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
2836     dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
2837 
2838     compute_phis_and_dphis(dx,dy,dz);
2839 
2840     u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0;
2841     for (n = nlower; n <= nupper; n++) {
2842       mz = n+nz;
2843       z0 = phi1d[2][n];
2844       for (m = nlower; m <= nupper; m++) {
2845         my = m+ny;
2846         y0 = z0*phi1d[1][m];
2847         for (l = nlower; l <= nupper; l++) {
2848           mx = l+nx;
2849           x0 = y0*phi1d[0][l];
2850           if (eflag_atom) u += x0*egridn[mz][my][mx];
2851           if (vflag_atom) {
2852             v0 += x0*v0gridn[mz][my][mx];
2853             v1 += x0*v1gridn[mz][my][mx];
2854             v2 += x0*v2gridn[mz][my][mx];
2855             v3 += x0*v3gridn[mz][my][mx];
2856             v4 += x0*v4gridn[mz][my][mx];
2857             v5 += x0*v5gridn[mz][my][mx];
2858           }
2859         }
2860       }
2861     }
2862 
2863     if (eflag_atom) eatom[i] += q[i]*u;
2864     if (vflag_atom) {
2865       vatom[i][0] += q[i]*v0;
2866       vatom[i][1] += q[i]*v1;
2867       vatom[i][2] += q[i]*v2;
2868       vatom[i][3] += q[i]*v3;
2869       vatom[i][4] += q[i]*v4;
2870       vatom[i][5] += q[i]*v5;
2871     }
2872   }
2873 }
2874 
2875 /* ----------------------------------------------------------------------
2876    charge assignment into phi1d (interpolation coefficients)
2877 ------------------------------------------------------------------------- */
2878 
compute_phis(const double & dx,const double & dy,const double & dz)2879 void MSM::compute_phis(const double &dx, const double &dy,
2880                                  const double &dz)
2881 {
2882   double delx,dely,delz;
2883 
2884   for (int nu = nlower; nu <= nupper; nu++) {
2885     delx = dx + double(nu);
2886     dely = dy + double(nu);
2887     delz = dz + double(nu);
2888 
2889     phi1d[0][nu] = compute_phi(delx);
2890     phi1d[1][nu] = compute_phi(dely);
2891     phi1d[2][nu] = compute_phi(delz);
2892   }
2893 }
2894 
2895 /* ----------------------------------------------------------------------
2896    charge assignment into phi1d and dphi1d (interpolation coefficients)
2897 ------------------------------------------------------------------------- */
2898 
compute_phis_and_dphis(const double & dx,const double & dy,const double & dz)2899 void MSM::compute_phis_and_dphis(const double &dx, const double &dy,
2900                                  const double &dz)
2901 {
2902   double delx,dely,delz;
2903 
2904   for (int nu = nlower; nu <= nupper; nu++) {
2905     delx = dx + double(nu);
2906     dely = dy + double(nu);
2907     delz = dz + double(nu);
2908 
2909     phi1d[0][nu] = compute_phi(delx);
2910     phi1d[1][nu] = compute_phi(dely);
2911     phi1d[2][nu] = compute_phi(delz);
2912     dphi1d[0][nu] = compute_dphi(delx);
2913     dphi1d[1][nu] = compute_dphi(dely);
2914     dphi1d[2][nu] = compute_dphi(delz);
2915   }
2916 }
2917 
2918 /* ----------------------------------------------------------------------
2919    compute phi using interpolating polynomial
2920    see Eq 7 from Parallel Computing 35 (2009) 164-177
2921    and Hardy's thesis
2922 ------------------------------------------------------------------------- */
2923 
compute_phi(const double & xi)2924 inline double MSM::compute_phi(const double &xi)
2925 {
2926   double phi = 0.0;
2927   double abs_xi = fabs(xi);
2928   double xi2 = xi*xi;
2929 
2930   if (order == 4) {
2931     if (abs_xi <= 1) {
2932       phi = (1.0 - abs_xi)*(1.0 + abs_xi - 1.5*xi2);
2933     } else if (abs_xi <= 2) {
2934       phi = -0.5*(abs_xi - 1.0)*(2.0 - abs_xi)*(2.0 - abs_xi);
2935     } else {
2936       phi = 0.0;
2937     }
2938 
2939   } else if (order == 6) {
2940     if (abs_xi <= 1) {
2941       phi = (1.0 - xi2)*(2.0 - abs_xi)*(6.0 + 3.0*abs_xi -
2942         5.0*xi2)/12.0;
2943     } else if (abs_xi <= 2) {
2944       phi = -(abs_xi - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
2945         (4.0 + 9.0*abs_xi - 5.0*xi2)/24.0;
2946     } else if (abs_xi <= 3) {
2947       phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*
2948         (3.0 - abs_xi)*(4.0 - abs_xi)/24.0;
2949     } else {
2950       phi = 0.0;
2951     }
2952 
2953   } else if (order == 8) {
2954     if (abs_xi <= 1) {
2955       phi = (1.0 - xi2)*(4.0 - xi2)*(3.0 - abs_xi)*
2956         (12.0 + 4.0*abs_xi - 7.0*xi2)/144.0;
2957     } else if (abs_xi <= 2) {
2958       phi = -(xi2 - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
2959         (4.0 - abs_xi)*(10.0 + 12.0*abs_xi - 7.0*xi2)/240.0;
2960     } else if (abs_xi <= 3) {
2961       phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
2962         (5.0 - abs_xi)*(6.0 + 20.0*abs_xi - 7.0*xi2)/720.0;
2963     } else if (abs_xi <= 4) {
2964       phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
2965         (4.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)/720.0;
2966     } else {
2967       phi = 0.0;
2968     }
2969 
2970   } else if (order == 10) {
2971     if (abs_xi <= 1) {
2972       phi = (1.0 - xi2)*(4.0 - xi2)*(9.0 - xi2)*
2973         (4.0 - abs_xi)*(20.0 + 5.0*abs_xi - 9.0*xi2)/2880.0;
2974     } else if (abs_xi <= 2) {
2975       phi = -(xi2 - 1.0)*(4.0 - xi2)*(3.0 - abs_xi)*(4.0 - abs_xi)*
2976         (5.0 - abs_xi)*(6.0 + 5.0*abs_xi - 3.0*xi2)/1440.0;
2977     } else if (abs_xi <= 3) {
2978       phi = (xi2 - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
2979         (5.0 - abs_xi)*(6.0 - abs_xi)*(14.0 + 25.0*abs_xi - 9.0*xi2)/10080.0;
2980     } else if (abs_xi <= 4) {
2981       phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
2982         (5.0 - abs_xi)*(6.0 - abs_xi)*(7.0 - abs_xi)*
2983         (8.0 + 35.0*abs_xi - 9.0*xi2)/40320.0;
2984     } else if (abs_xi <= 5) {
2985       phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*
2986         (abs_xi - 4.0)*(5.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)*
2987         (7.0 - abs_xi)*(8.0 - abs_xi)/40320.0;
2988     } else {
2989       phi = 0.0;
2990     }
2991   }
2992 
2993   return phi;
2994 }
2995 
2996 /* ----------------------------------------------------------------------
2997    compute the derivative of phi
2998    phi is an interpolating polynomial
2999    see Eq 7 from Parallel Computing 35 (2009) 164-177
3000    and Hardy's thesis
3001 ------------------------------------------------------------------------- */
3002 
compute_dphi(const double & xi)3003 inline double MSM::compute_dphi(const double &xi)
3004 {
3005   double dphi = 0.0;
3006   double abs_xi = fabs(xi);
3007 
3008   if (order == 4) {
3009     double xi2 = xi*xi;
3010     double abs_xi2 = abs_xi*abs_xi;
3011     if (abs_xi == 0.0) {
3012       dphi = 0.0;
3013     } else if (abs_xi <= 1) {
3014       dphi = xi*(3*xi2 + 6*abs_xi2 - 10*abs_xi)/2.0/abs_xi;
3015     } else if (abs_xi <= 2) {
3016       dphi = xi*(2 - abs_xi)*(3*abs_xi - 4)/2.0/abs_xi;
3017     } else {
3018       dphi = 0.0;
3019     }
3020 
3021   } else if (order == 6) {
3022     double xi2 = xi*xi;
3023     double xi4 = xi2*xi2;
3024     double abs_xi2 = abs_xi*abs_xi;
3025     double abs_xi3 = abs_xi2*abs_xi;
3026     double abs_xi4 = abs_xi2*abs_xi2;
3027     if (abs_xi == 0.0) {
3028       dphi = 0.0;
3029     } else if (abs_xi <= 1) {
3030       dphi = xi*(46*xi2*abs_xi - 20*xi2*abs_xi2 - 5*xi4 + 5*xi2 +
3031         6*abs_xi3 + 10*abs_xi2 - 50*abs_xi)/12.0/abs_xi;
3032     } else if (abs_xi <= 2) {
3033       dphi = xi*(15*xi2*abs_xi2 - 60*xi2*abs_xi + 55*xi2 +
3034         10*abs_xi4 - 96*abs_xi3 + 260*abs_xi2 - 210*abs_xi + 10)/
3035         24.0/abs_xi;
3036     } else if (abs_xi <= 3) {
3037       dphi = -xi*(abs_xi - 3)*(5*abs_xi3 - 37*abs_xi2 +
3038       84*abs_xi - 58)/24.0/abs_xi;
3039     } else {
3040       dphi = 0.0;
3041     }
3042 
3043   } else if (order == 8) {
3044     double xi2 = xi*xi;
3045     double xi4 = xi2*xi2;
3046     double xi6 = xi4*xi2;
3047     double abs_xi3 = xi2*abs_xi;
3048     double abs_xi5 = xi4*abs_xi;
3049     if (abs_xi == 0.0) {
3050       dphi = 0.0;
3051     } else if (abs_xi <= 1) {
3052       dphi = xi*(49*xi6 - 175*xi4 + 84*xi2 - 150*abs_xi5 +
3053         644*abs_xi3 - 560*abs_xi)/144.0/abs_xi;
3054     } else if (abs_xi <= 2) {
3055       dphi = xi*(-49*xi6 - 1365*xi4 + 756*xi2 +
3056         450*abs_xi5 + 1260*abs_xi3 - 1260*abs_xi + 28)/240.0/abs_xi;
3057     } else if (abs_xi <= 3) {
3058       dphi = xi*(49*xi6 + 4445*xi4 + 17724*xi2 -
3059         750*abs_xi5 - 12740*abs_xi3 - 9940*abs_xi + 756)/720.0/abs_xi;
3060     } else if (abs_xi <= 4) {
3061       dphi = -xi*(abs_xi - 4)*(7*abs_xi5 - 122*xi4 +
3062         807*abs_xi3 - 2512*xi2 + 3644*abs_xi - 1944)/720.0/abs_xi;
3063     } else {
3064       dphi = 0.0;
3065     }
3066 
3067   } else if (order == 10) {
3068     double xi2 = xi*xi;
3069     double xi4 = xi2*xi2;
3070     double xi6 = xi4*xi2;
3071     double xi8 = xi6*xi2;
3072     double abs_xi2 = abs_xi*abs_xi;
3073     double abs_xi3 = abs_xi2*abs_xi;
3074     double abs_xi4 = abs_xi2*abs_xi2;
3075     double abs_xi5 = abs_xi4*abs_xi;
3076     double abs_xi6 = abs_xi5*abs_xi;
3077     double abs_xi7 = abs_xi6*abs_xi;
3078     double abs_xi8 = abs_xi7*abs_xi;
3079     if (abs_xi == 0.0) {
3080       dphi = 0.0;
3081     } else if (abs_xi <= 1) {
3082       dphi = xi*(298*xi6*abs_xi - 72*xi6*abs_xi2 - 9*xi8 +
3083         126*xi6 + 30*xi4*abs_xi3 + 756*xi4*abs_xi2 - 3644*xi4*abs_xi -
3084         441*xi4 - 280*xi2*abs_xi3 - 1764*xi2*abs_xi2 + 12026*xi2*abs_xi +
3085         324*xi2 + 490*abs_xi3 + 648*abs_xi2 - 10792*abs_xi)/2880.0/abs_xi;
3086     } else if (abs_xi <= 2) {
3087       dphi = xi*(9*xi6*abs_xi2 - 72*xi6*abs_xi + 141*xi6 +
3088         18*xi4*abs_xi4 - 236*xi4*abs_xi3 + 963*xi4*abs_xi2 -
3089         1046*xi4*abs_xi - 687*xi4 - 20*xi2*abs_xi5 + 156*xi2*abs_xi4 +
3090         168*xi2*abs_xi3 - 3522*xi2*abs_xi2 + 6382*xi2*abs_xi + 474*xi2 +
3091         50*abs_xi5 - 516*abs_xi4 + 1262*abs_xi3 + 1596*abs_xi2 -
3092         6344*abs_xi + 72)/1440.0/abs_xi;
3093     } else if (abs_xi <= 3) {
3094       dphi = xi*(720*xi4*abs_xi3 - 45*xi4*abs_xi4 - 4185*xi4*abs_xi2 +
3095         10440*xi4*abs_xi - 9396*xi4 - 36*xi2*abs_xi6 + 870*xi2*abs_xi5 -
3096         7965*xi2*abs_xi4 + 34540*xi2*abs_xi3 - 70389*xi2*abs_xi2 +
3097         51440*xi2*abs_xi + 6012*xi2 + 50*abs_xi7 - 954*abs_xi6 +
3098         6680*abs_xi5 - 19440*abs_xi4 + 11140*abs_xi3 + 49014*abs_xi2 -
3099         69080*abs_xi + 3384)/10080.0/abs_xi;
3100     } else if (abs_xi <= 4) {
3101       dphi = xi*(63*xi2*abs_xi6 - 1512*xi2*abs_xi5 + 14490*xi2*abs_xi4 -
3102         70560*xi2*abs_xi3 + 182763*xi2*abs_xi2 - 236376*xi2*abs_xi +
3103         117612*xi2 + 18*abs_xi8 - 784*abs_xi7 + 12600*abs_xi6 -
3104         101556*abs_xi5 + 451962*abs_xi4 - 1121316*abs_xi3 +
3105         1451628*abs_xi2 - 795368*abs_xi + 71856)/40320.0/abs_xi;
3106     } else if (abs_xi <= 5) {
3107       dphi = -xi*(abs_xi - 5)*(9*abs_xi7 - 283*abs_xi6 +
3108         3667*abs_xi5 - 25261*abs_xi4 + 99340*abs_xi3 -
3109         221416*abs_xi2 + 256552*abs_xi - 117648)/40320.0/abs_xi;
3110     } else {
3111       dphi = 0.0;
3112     }
3113   }
3114 
3115   return dphi;
3116 }
3117 
3118 /* ----------------------------------------------------------------------
3119    Compute direct interaction (energy) weights for intermediate grid levels
3120 ------------------------------------------------------------------------- */
get_g_direct()3121 void MSM::get_g_direct()
3122 {
3123   if (g_direct) memory->destroy(g_direct);
3124   memory->create(g_direct,levels,nmax_direct,"msm:g_direct");
3125 
3126   double a = cutoff;
3127 
3128   int n,zk,zyk,k,ix,iy,iz;
3129   double xdiff,ydiff,zdiff;
3130   double dx,dy,dz;
3131   double tmp[3];
3132   double rsq,rho,two_n;
3133 
3134   two_n = 1.0;
3135 
3136   int nx = nxhi_direct - nxlo_direct + 1;
3137   int ny = nyhi_direct - nylo_direct + 1;
3138 
3139   for (n=0; n<levels; n++) {
3140 
3141     for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
3142       zdiff = iz/delzinv[n];
3143       zk = (iz + nzhi_direct)*ny;
3144       for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
3145         ydiff = iy/delyinv[n];
3146         zyk = (zk + iy + nyhi_direct)*nx;
3147         for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
3148           xdiff = ix/delxinv[n];
3149 
3150           // transform grid point pair-wise distance from lamda (0-1) coords to box coords
3151 
3152           if (triclinic) {
3153             tmp[0] = xdiff;
3154             tmp[1] = ydiff;
3155             tmp[2] = zdiff;
3156             lamda2xvector(&tmp[0],&tmp[0]);
3157             dx = tmp[0];
3158             dy = tmp[1];
3159             dz = tmp[2];
3160           } else {
3161             dx = xdiff;
3162             dy = ydiff;
3163             dz = zdiff;
3164           }
3165 
3166           rsq = dx*dx + dy*dy + dz*dz;
3167 
3168           rho = sqrt(rsq)/(two_n*a);
3169           k = zyk + ix + nxhi_direct;
3170           g_direct[n][k] = gamma(rho)/(two_n*a) - gamma(rho/2.0)/(2.0*two_n*a);
3171         }
3172       }
3173     }
3174     two_n *= 2.0;
3175   }
3176 }
3177 
3178 /* ----------------------------------------------------------------------
3179    Compute direct interaction (virial) weights for intermediate grid levels
3180 ------------------------------------------------------------------------- */
3181 
get_virial_direct()3182 void MSM::get_virial_direct()
3183 {
3184   if (v0_direct) memory->destroy(v0_direct);
3185   memory->create(v0_direct,levels,nmax_direct,"msm:v0_direct");
3186   if (v1_direct) memory->destroy(v1_direct);
3187   memory->create(v1_direct,levels,nmax_direct,"msm:v1_direct");
3188   if (v2_direct) memory->destroy(v2_direct);
3189   memory->create(v2_direct,levels,nmax_direct,"msm:v2_direct");
3190   if (v3_direct) memory->destroy(v3_direct);
3191   memory->create(v3_direct,levels,nmax_direct,"msm:v3_direct");
3192   if (v4_direct) memory->destroy(v4_direct);
3193   memory->create(v4_direct,levels,nmax_direct,"msm:v4_direct");
3194   if (v5_direct) memory->destroy(v5_direct);
3195   memory->create(v5_direct,levels,nmax_direct,"msm:v5_direct");
3196 
3197   double a = cutoff;
3198   double a_sq = cutoff*cutoff;
3199 
3200   int n,zk,zyk,k,ix,iy,iz;
3201   double xdiff,ydiff,zdiff;
3202   double dx,dy,dz;
3203   double tmp[3];
3204   double rsq,r,rho,two_n,two_nsq,dg;
3205 
3206   two_n = 1.0;
3207 
3208   int nx = nxhi_direct - nxlo_direct + 1;
3209   int ny = nyhi_direct - nylo_direct + 1;
3210 
3211   for (n=0; n<levels; n++) {
3212     two_nsq = two_n * two_n;
3213 
3214     for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
3215       zdiff = iz/delzinv[n];
3216       zk = (iz + nzhi_direct)*ny;
3217       for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
3218         ydiff = iy/delyinv[n];
3219         zyk = (zk + iy + nyhi_direct)*nx;
3220         for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
3221           xdiff = ix/delxinv[n];
3222 
3223           if (triclinic) {
3224             tmp[0] = xdiff;
3225             tmp[1] = ydiff;
3226             tmp[2] = zdiff;
3227             lamda2xvector(&tmp[0],&tmp[0]);
3228             dx = tmp[0];
3229             dy = tmp[1];
3230             dz = tmp[2];
3231           } else {
3232             dx = xdiff;
3233             dy = ydiff;
3234             dz = zdiff;
3235           }
3236 
3237           rsq = dx*dx + dy*dy + dz*dz;
3238           k = zyk + ix + nxhi_direct;
3239           r = sqrt(rsq);
3240           if (r == 0) {
3241             v0_direct[n][k] = 0.0;
3242             v1_direct[n][k] = 0.0;
3243             v2_direct[n][k] = 0.0;
3244             v3_direct[n][k] = 0.0;
3245             v4_direct[n][k] = 0.0;
3246             v5_direct[n][k] = 0.0;
3247           } else {
3248             rho = r/(two_n*a);
3249             dg = -(dgamma(rho)/(two_nsq*a_sq) -
3250               dgamma(rho/2.0)/(4.0*two_nsq*a_sq))/r;
3251             v0_direct[n][k] = dg * dx * dx;
3252             v1_direct[n][k] = dg * dy * dy;
3253             v2_direct[n][k] = dg * dz * dz;
3254             v3_direct[n][k] = dg * dx * dy;
3255             v4_direct[n][k] = dg * dx * dz;
3256             v5_direct[n][k] = dg * dy * dz;
3257           }
3258 
3259         }
3260       }
3261     }
3262     two_n *= 2.0;
3263   }
3264 }
3265 
3266 /* ----------------------------------------------------------------------
3267    Compute direct interaction (energy) weights for top grid level
3268    (nonperiodic systems only)
3269 ------------------------------------------------------------------------- */
3270 
get_g_direct_top(int n)3271 void MSM::get_g_direct_top(int n)
3272 {
3273   int nx_top = betax[n] - alpha[n];
3274   int ny_top = betay[n] - alpha[n];
3275   int nz_top = betaz[n] - alpha[n];
3276 
3277   int nx = 2*nx_top + 1;
3278   int ny = 2*ny_top + 1;
3279   int nz = 2*nz_top + 1;
3280 
3281   int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
3282 
3283   if (g_direct_top) memory->destroy(g_direct_top);
3284   memory->create(g_direct_top,nmax_top,"msm:g_direct_top");
3285 
3286   double a = cutoff;
3287 
3288   int zk,zyk,k,ix,iy,iz;
3289   double xdiff,ydiff,zdiff;
3290   double dx,dy,dz;
3291   double tmp[3];
3292   double rsq,rho,two_n;
3293 
3294   two_n = pow(2.0,n);
3295 
3296   for (iz = -nz_top; iz <= nz_top; iz++) {
3297     zdiff = iz/delzinv[n];
3298     zk = (iz + nz_top)*ny;
3299     for (iy = -ny_top; iy <= ny_top; iy++) {
3300       ydiff = iy/delyinv[n];
3301       zyk = (zk + iy + ny_top)*nx;
3302       for (ix = -nx_top; ix <= nx_top; ix++) {
3303         xdiff = ix/delxinv[n];
3304 
3305         if (triclinic) {
3306           tmp[0] = xdiff;
3307           tmp[1] = ydiff;
3308           tmp[2] = zdiff;
3309           lamda2xvector(&tmp[0],&tmp[0]);
3310           dx = tmp[0];
3311           dy = tmp[1];
3312           dz = tmp[2];
3313         } else {
3314           dx = xdiff;
3315           dy = ydiff;
3316           dz = zdiff;
3317         }
3318 
3319         rsq = dx*dx + dy*dy + dz*dz;
3320         rho = sqrt(rsq)/(two_n*a);
3321         k = zyk + ix + nx_top;
3322         g_direct_top[k] = gamma(rho)/(two_n*a);
3323       }
3324     }
3325   }
3326 }
3327 
3328 /* ----------------------------------------------------------------------
3329    Compute direct interaction (virial) weights for top grid level
3330    (nonperiodic systems only)
3331 ------------------------------------------------------------------------- */
3332 
get_virial_direct_top(int n)3333 void MSM::get_virial_direct_top(int n)
3334 {
3335   int nx_top = betax[n] - alpha[n];
3336   int ny_top = betay[n] - alpha[n];
3337   int nz_top = betaz[n] - alpha[n];
3338 
3339   int nx = 2*nx_top + 1;
3340   int ny = 2*ny_top + 1;
3341   int nz = 2*nz_top + 1;
3342 
3343   int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
3344 
3345   if (v0_direct_top) memory->destroy(v0_direct_top);
3346   memory->create(v0_direct_top,nmax_top,"msm:v0_direct_top");
3347   if (v1_direct_top) memory->destroy(v1_direct_top);
3348   memory->create(v1_direct_top,nmax_top,"msm:v1_direct_top");
3349   if (v2_direct_top) memory->destroy(v2_direct_top);
3350   memory->create(v2_direct_top,nmax_top,"msm:v2_direct_top");
3351   if (v3_direct_top) memory->destroy(v3_direct_top);
3352   memory->create(v3_direct_top,nmax_top,"msm:v3_direct_top");
3353   if (v4_direct_top) memory->destroy(v4_direct_top);
3354   memory->create(v4_direct_top,nmax_top,"msm:v4_direct_top");
3355   if (v5_direct_top) memory->destroy(v5_direct_top);
3356   memory->create(v5_direct_top,nmax_top,"msm:v5_direct_top");
3357 
3358   double a = cutoff;
3359   double a_sq = cutoff*cutoff;
3360 
3361   int zk,zyk,k,ix,iy,iz;
3362   double xdiff,ydiff,zdiff;
3363   double dx,dy,dz;
3364   double tmp[3];
3365   double rsq,r,rho,two_n,two_nsq,dg;
3366 
3367   two_n = pow(2.0,n);
3368   two_nsq = two_n * two_n;
3369 
3370   for (iz = -nz_top; iz <= nz_top; iz++) {
3371     zdiff = iz/delzinv[n];
3372     zk = (iz + nz_top)*ny;
3373     for (iy = -ny_top; iy <= ny_top; iy++) {
3374       ydiff = iy/delyinv[n];
3375       zyk = (zk + iy + ny_top)*nx;
3376       for (ix = -nx_top; ix <= nx_top; ix++) {
3377         xdiff = ix/delxinv[n];
3378         if (triclinic) {
3379           tmp[0] = xdiff;
3380           tmp[1] = ydiff;
3381           tmp[2] = zdiff;
3382           lamda2xvector(&tmp[0],&tmp[0]);
3383           dx = tmp[0];
3384           dy = tmp[1];
3385           dz = tmp[2];
3386         } else {
3387           dx = xdiff;
3388           dy = ydiff;
3389           dz = zdiff;
3390         }
3391 
3392         rsq = dx*dx + dy*dy + dz*dz;
3393         k = zyk + ix + nx_top;
3394         r = sqrt(rsq);
3395         if (r == 0) {
3396           v0_direct_top[k] = 0.0;
3397           v1_direct_top[k] = 0.0;
3398           v2_direct_top[k] = 0.0;
3399           v3_direct_top[k] = 0.0;
3400           v4_direct_top[k] = 0.0;
3401           v5_direct_top[k] = 0.0;
3402         } else {
3403           rho = r/(two_n*a);
3404           dg = -(dgamma(rho)/(two_nsq*a_sq))/r;
3405           v0_direct_top[k] = dg * dx * dx;
3406           v1_direct_top[k] = dg * dy * dy;
3407           v2_direct_top[k] = dg * dz * dz;
3408           v3_direct_top[k] = dg * dx * dy;
3409           v4_direct_top[k] = dg * dx * dz;
3410           v5_direct_top[k] = dg * dy * dz;
3411         }
3412       }
3413     }
3414   }
3415 }
3416 
3417 /* ----------------------------------------------------------------------
3418    memory usage of local arrays
3419 ------------------------------------------------------------------------- */
3420 
memory_usage()3421 double MSM::memory_usage()
3422 {
3423   double bytes = 0;
3424 
3425   // NOTE: Stan, fill in other memory allocations here
3426 
3427   // all GridComm bufs
3428 
3429   bytes += (double)(ngcall_buf1 + ngcall_buf2) * npergrid * sizeof(double);
3430 
3431   for (int n=0; n<levels; n++)
3432     if (active_flag[n])
3433       bytes += (double)(ngc_buf1[n] + ngc_buf2[n]) * npergrid * sizeof(double);
3434 
3435   return bytes;
3436 }
3437