1 // clang-format off
2 /* ----------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing authors: Paul Crozier, Stan Moore, Stephen Bond, (all SNL)
17 ------------------------------------------------------------------------- */
18
19 #include "msm.h"
20
21 #include "atom.h"
22 #include "comm.h"
23 #include "domain.h"
24 #include "error.h"
25 #include "force.h"
26 #include "gridcomm.h"
27 #include "math_const.h"
28 #include "memory.h"
29 #include "neighbor.h"
30 #include "pair.h"
31
32 #include <cstring>
33 #include <cmath>
34
35 using namespace LAMMPS_NS;
36 using namespace MathConst;
37
38 #define MAX_LEVELS 10
39 #define OFFSET 16384
40 #define SMALL 0.00001
41
42 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
43 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
44
45 /* ---------------------------------------------------------------------- */
46
MSM(LAMMPS * lmp)47 MSM::MSM(LAMMPS *lmp)
48 : KSpace(lmp),
49 factors(nullptr), delxinv(nullptr), delyinv(nullptr), delzinv(nullptr), nx_msm(nullptr),
50 ny_msm(nullptr), nz_msm(nullptr), nxlo_in(nullptr), nylo_in(nullptr), nzlo_in(nullptr),
51 nxhi_in(nullptr), nyhi_in(nullptr), nzhi_in(nullptr), nxlo_out(nullptr), nylo_out(nullptr),
52 nzlo_out(nullptr), nxhi_out(nullptr), nyhi_out(nullptr), nzhi_out(nullptr), ngrid(nullptr),
53 active_flag(nullptr), alpha(nullptr), betax(nullptr), betay(nullptr), betaz(nullptr),
54 peratom_allocate_flag(0),levels(0),world_levels(nullptr),qgrid(nullptr),egrid(nullptr),
55 v0grid(nullptr), v1grid(nullptr),v2grid(nullptr),v3grid(nullptr),v4grid(nullptr),v5grid(nullptr),
56 g_direct(nullptr),v0_direct(nullptr),v1_direct(nullptr),v2_direct(nullptr),v3_direct(nullptr),
57 v4_direct(nullptr),v5_direct(nullptr),g_direct_top(nullptr),v0_direct_top(nullptr),
58 v1_direct_top(nullptr),v2_direct_top(nullptr),v3_direct_top(nullptr),v4_direct_top(nullptr),
59 v5_direct_top(nullptr),phi1d(nullptr),dphi1d(nullptr),procneigh_levels(nullptr),gcall(nullptr),
60 gc(nullptr),gcall_buf1(nullptr),gcall_buf2(nullptr),gc_buf1(nullptr),gc_buf2(nullptr),
61 ngc_buf1(nullptr),ngc_buf2(nullptr),part2grid(nullptr),boxlo(nullptr)
62 {
63 msmflag = 1;
64
65 nfactors = 1;
66 factors = new int[nfactors];
67 factors[0] = 2;
68
69 MPI_Comm_rank(world,&me);
70
71 phi1d = dphi1d = nullptr;
72
73 nmax = 0;
74 part2grid = nullptr;
75
76 g_direct = nullptr;
77 g_direct_top = nullptr;
78
79 v0_direct = v1_direct = v2_direct = nullptr;
80 v3_direct = v4_direct = v5_direct = nullptr;
81
82 v0_direct_top = v1_direct_top = v2_direct_top = nullptr;
83 v3_direct_top = v4_direct_top = v5_direct_top = nullptr;
84
85 ngrid = nullptr;
86
87 alpha = betax = betay = betaz = nullptr;
88 nx_msm = ny_msm = nz_msm = nullptr;
89 nxlo_in = nylo_in = nzlo_in = nullptr;
90 nxhi_in = nyhi_in = nzhi_in = nullptr;
91 nxlo_out = nylo_out = nzlo_out = nullptr;
92 nxhi_out = nyhi_out = nzhi_out = nullptr;
93 delxinv = delyinv = delzinv = nullptr;
94 qgrid = nullptr;
95 egrid = nullptr;
96 v0grid = v1grid = v2grid = v3grid = v4grid = v5grid = nullptr;
97
98 peratom_allocate_flag = 0;
99 scalar_pressure_flag = 1;
100 warn_nonneutral = 0;
101
102 order = 10;
103 }
104
105 /* ---------------------------------------------------------------------- */
106
settings(int narg,char ** arg)107 void MSM::settings(int narg, char **arg)
108 {
109 if (narg < 1) error->all(FLERR,"Illegal kspace_style msm command");
110 accuracy_relative = fabs(utils::numeric(FLERR,arg[0],false,lmp));
111 }
112
113 /* ----------------------------------------------------------------------
114 free all memory
115 ------------------------------------------------------------------------- */
116
~MSM()117 MSM::~MSM()
118 {
119 delete [] factors;
120 deallocate();
121 if (peratom_allocate_flag) deallocate_peratom();
122 deallocate_levels();
123 memory->destroy(part2grid);
124 memory->destroy(g_direct);
125 memory->destroy(g_direct_top);
126 memory->destroy(v0_direct);
127 memory->destroy(v1_direct);
128 memory->destroy(v2_direct);
129 memory->destroy(v3_direct);
130 memory->destroy(v4_direct);
131 memory->destroy(v5_direct);
132 memory->destroy(v0_direct_top);
133 memory->destroy(v1_direct_top);
134 memory->destroy(v2_direct_top);
135 memory->destroy(v3_direct_top);
136 memory->destroy(v4_direct_top);
137 memory->destroy(v5_direct_top);
138 }
139
140 /* ----------------------------------------------------------------------
141 called once before run
142 ------------------------------------------------------------------------- */
143
init()144 void MSM::init()
145 {
146 if (me == 0) utils::logmesg(lmp,"MSM initialization ...\n");
147
148 // error check
149
150 triclinic_check();
151 if (domain->dimension == 2)
152 error->all(FLERR,"Cannot (yet) use MSM with 2d simulation");
153 if (comm->style != 0)
154 error->universe_all(FLERR,"MSM can only currently be used with "
155 "comm_style brick");
156
157 if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
158
159 if ((slabflag == 1) && (me == 0))
160 error->warning(FLERR,"Slab correction not needed for MSM");
161
162 if ((order < 4) || (order > 10) || (order%2 != 0))
163 error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
164
165 if (sizeof(FFT_SCALAR) != 8)
166 error->all(FLERR,"Cannot (yet) use single precision with MSM "
167 "(remove -DFFT_SINGLE from Makefile and re-compile)");
168
169 // compute two charge force
170
171 two_charge();
172
173 // extract short-range Coulombic cutoff from pair style
174
175 triclinic = domain->triclinic;
176 pair_check();
177
178 int itmp;
179 double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
180 if (p_cutoff == nullptr)
181 error->all(FLERR,"KSpace style is incompatible with Pair style");
182 cutoff = *p_cutoff;
183
184 // compute qsum & qsqsum and error if not charge-neutral
185
186 scale = 1.0;
187 qqrd2e = force->qqrd2e;
188 qsum_qsq();
189 natoms_original = atom->natoms;
190
191 // set accuracy (force units) from accuracy_relative or accuracy_absolute
192
193 if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
194 else accuracy = accuracy_relative * two_charge_force;
195
196 // setup MSM grid resolution
197
198 set_grid_global();
199 setup();
200
201 double estimated_error = estimate_total_error();
202
203 // output grid stats
204
205 int ngrid_max;
206 MPI_Allreduce(&ngrid[0],&ngrid_max,1,MPI_INT,MPI_MAX,world);
207
208 if (me == 0) {
209 std::string mesg = fmt::format(" 3d grid size/proc = {}\n", ngrid_max);
210 mesg += fmt::format(" estimated absolute RMS force accuracy = {:.8}\n",
211 estimated_error);
212 mesg += fmt::format(" estimated relative force accuracy = {:.8}\n",
213 estimated_error/two_charge_force);
214 mesg += fmt::format(" grid = {} {} {}\n",nx_msm[0],ny_msm[0],nz_msm[0]);
215 mesg += fmt::format(" order = {}\n",order);
216 utils::logmesg(lmp,mesg);
217 }
218 }
219
220 /* ----------------------------------------------------------------------
221 estimate 1d grid RMS force error for MSM
222 ------------------------------------------------------------------------- */
223
estimate_1d_error(double h,double prd)224 double MSM::estimate_1d_error(double h, double prd)
225 {
226 double a = cutoff;
227 int p = order - 1;
228
229 double Mp,cprime,error_scaling;
230 Mp = cprime = error_scaling = 1;
231 // Mp values from Table 5.1 of Hardy's thesis
232 // cprime values from equation 4.17 of Hardy's thesis
233 // error scaling from empirical fitting to convert to rms force errors
234 if (p == 3) {
235 Mp = 9;
236 cprime = 1.0/6.0;
237 error_scaling = 0.39189561;
238 } else if (p == 5) {
239 Mp = 825;
240 cprime = 1.0/30.0;
241 error_scaling = 0.150829428;
242 } else if (p == 7) {
243 Mp = 130095;
244 cprime = 1.0/140.0;
245 error_scaling = 0.049632967;
246 } else if (p == 9) {
247 Mp = 34096545;
248 cprime = 1.0/630.0;
249 error_scaling = 0.013520855;
250 } else {
251 error->all(FLERR,"MSM order must be 4, 6, 8, or 10");
252 }
253
254 // equation 4.1 from Hardy's thesis
255 C_p = 4.0*cprime*Mp/3.0;
256
257 // use empirical parameters to convert to rms force errors
258 C_p *= error_scaling;
259
260 // equation 3.197 from Hardy's thesis
261 double error_1d = C_p*pow(h,(p-1))/pow(a,(p+1));
262
263 // include dependency of error on other terms
264 error_1d *= q2*a/(prd*sqrt(double(atom->natoms)));
265
266 return error_1d;
267 }
268
269 /* ----------------------------------------------------------------------
270 estimate 3d grid RMS force error
271 ------------------------------------------------------------------------- */
272
estimate_3d_error()273 double MSM::estimate_3d_error()
274 {
275 double xprd = domain->xprd;
276 double yprd = domain->yprd;
277 double zprd = domain->zprd;
278 double error_x = estimate_1d_error(h_x,xprd);
279 double error_y = estimate_1d_error(h_y,yprd);
280 double error_z = estimate_1d_error(h_z,zprd);
281 double error_3d =
282 sqrt(error_x*error_x + error_y*error_y + error_z*error_z) / sqrt(3.0);
283 return error_3d;
284 }
285
286 /* ----------------------------------------------------------------------
287 estimate total RMS force error
288 ------------------------------------------------------------------------- */
289
estimate_total_error()290 double MSM::estimate_total_error()
291 {
292 double xprd = domain->xprd;
293 double yprd = domain->yprd;
294 double zprd = domain->zprd;
295 bigint natoms = atom->natoms;
296
297 double grid_error = estimate_3d_error();
298 double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
299 double short_range_error = 0.0;
300 double table_error =
301 estimate_table_accuracy(q2_over_sqrt,short_range_error);
302 double estimated_total_error = sqrt(grid_error*grid_error +
303 short_range_error*short_range_error + table_error*table_error);
304
305 return estimated_total_error;
306 }
307
308 /* ----------------------------------------------------------------------
309 adjust MSM coeffs, called initially and whenever volume has changed
310 ------------------------------------------------------------------------- */
311
setup()312 void MSM::setup()
313 {
314 double *prd;
315 double a = cutoff;
316
317 // volume-dependent factors
318
319 prd = domain->prd;
320
321 double xprd = prd[0];
322 double yprd = prd[1];
323 double zprd = prd[2];
324 volume = xprd * yprd * zprd;
325
326 // loop over grid levels and compute grid spacing
327
328 for (int n=0; n<levels; n++) {
329 if (triclinic == 0) {
330 delxinv[n] = nx_msm[n]/xprd;
331 delyinv[n] = ny_msm[n]/yprd;
332 delzinv[n] = nz_msm[n]/zprd;
333 } else { // use lamda (0-1) coordinates
334 delxinv[n] = nx_msm[n];
335 delyinv[n] = ny_msm[n];
336 delzinv[n] = nz_msm[n];
337 }
338 }
339
340 double ax = a;
341 double ay = a;
342 double az = a;
343
344 // transform the interaction sphere in box coords to an
345 // ellipsoid in lamda (0-1) coords to
346 // get the direct sum interaction limits for a triclinic system
347
348 if (triclinic) {
349 double tmp[3];
350 kspacebbox(a,&tmp[0]);
351 ax = tmp[0];
352 ay = tmp[1];
353 az = tmp[2];
354 }
355
356 // direct sum interaction limits
357
358 nxhi_direct = static_cast<int> (2.0*ax*delxinv[0]);
359 nxlo_direct = -nxhi_direct;
360 nyhi_direct = static_cast<int> (2.0*ay*delyinv[0]);
361 nylo_direct = -nyhi_direct;
362 nzhi_direct = static_cast<int> (2.0*az*delzinv[0]);
363 nzlo_direct = -nzhi_direct;
364
365 nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1);
366
367 deallocate();
368 if (peratom_allocate_flag) deallocate_peratom();
369
370 // compute direct sum interaction weights
371
372 if (!peratom_allocate_flag) { // Timestep 0
373 get_g_direct();
374 get_virial_direct();
375 if (domain->nonperiodic) {
376 get_g_direct_top(levels-1);
377 get_virial_direct_top(levels-1);
378 }
379 } else {
380 get_g_direct();
381 if (domain->nonperiodic) get_g_direct_top(levels-1);
382 if (vflag_either && !scalar_pressure_flag) {
383 get_virial_direct();
384 if (domain->nonperiodic) get_virial_direct_top(levels-1);
385 }
386 }
387
388 if (!triclinic)
389 boxlo = domain->boxlo;
390 else
391 boxlo = domain->boxlo_lamda;
392
393 // ghost grid points depend on direct sum interaction limits,
394 // so need to re-compute local grid
395
396 set_grid_local();
397
398 // allocate K-space dependent memory
399 // don't invoke allocate_peratom(), compute() will allocate when needed
400
401 allocate();
402 }
403
404 /* ----------------------------------------------------------------------
405 compute the MSM long-range force, energy, virial
406 ------------------------------------------------------------------------- */
407
compute(int eflag,int vflag)408 void MSM::compute(int eflag, int vflag)
409 {
410 int i,j;
411
412 // set energy/virial flags
413
414 ev_init(eflag,vflag);
415
416 if (scalar_pressure_flag && vflag_either) {
417 if (vflag_atom)
418 error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' to obtain "
419 "per-atom virial with kspace_style MSM");
420
421 // must switch on global energy computation if not already on
422
423 if (eflag == 0 || eflag == 2) {
424 eflag++;
425 ev_setup(eflag,vflag);
426 }
427 }
428
429 // if atom count has changed, update qsum and qsqsum
430
431 if (atom->natoms != natoms_original) {
432 qsum_qsq();
433 natoms_original = atom->natoms;
434 }
435
436 // return if there are no charges
437
438 if (qsqsum == 0.0) return;
439
440 // invoke allocate_peratom() if needed for first time
441
442 if (vflag_atom && !peratom_allocate_flag) allocate_peratom();
443
444 // convert atoms from box to lamda coords
445
446 if (triclinic)
447 domain->x2lamda(atom->nlocal);
448
449 // extend size of per-atom arrays if necessary
450
451 if (atom->nmax > nmax) {
452 memory->destroy(part2grid);
453 nmax = atom->nmax;
454 memory->create(part2grid,nmax,3,"msm:part2grid");
455 }
456
457 // find grid points for all my particles
458 // map my particle charge onto my local 3d density grid (aninterpolation)
459
460 particle_map();
461 make_rho();
462
463 // all procs reverse communicate charge density values from
464 // their ghost grid points
465 // to fully sum contribution in their 3d grid
466
467 current_level = 0;
468 gcall->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
469 REVERSE_RHO,gcall_buf1,gcall_buf2,MPI_DOUBLE);
470
471 // forward communicate charge density values to fill ghost grid points
472 // compute direct sum interaction and then restrict to coarser grid
473
474 for (int n=0; n<=levels-2; n++) {
475 if (!active_flag[n]) continue;
476 current_level = n;
477 gc[n]->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
478 FORWARD_RHO,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
479 direct(n);
480 restriction(n);
481 }
482
483 // compute direct interation for top grid level for non-periodic
484 // and for second from top grid level for periodic
485
486 if (active_flag[levels-1]) {
487 if (domain->nonperiodic) {
488 current_level = levels-1;
489 gc[levels-1]->
490 forward_comm(GridComm::KSPACE,this,1,sizeof(double),
491 FORWARD_RHO,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
492 direct_top(levels-1);
493 gc[levels-1]->
494 reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
495 REVERSE_AD,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
496 if (vflag_atom)
497 gc[levels-1]->
498 reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
499 REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
500
501 } else {
502 // Here using MPI_Allreduce is cheaper than using commgrid
503 grid_swap_forward(levels-1,qgrid[levels-1]);
504 direct(levels-1);
505 grid_swap_reverse(levels-1,egrid[levels-1]);
506 current_level = levels-1;
507 if (vflag_atom)
508 gc[levels-1]->
509 reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
510 REVERSE_AD_PERATOM,gc_buf1[levels-1],gc_buf2[levels-1],MPI_DOUBLE);
511 }
512 }
513
514 // prolongate energy/virial from coarser grid to finer grid
515 // reverse communicate from ghost grid points to get full sum
516
517 for (int n=levels-2; n>=0; n--) {
518 if (!active_flag[n]) continue;
519 prolongation(n);
520
521 current_level = n;
522 gc[n]->reverse_comm(GridComm::KSPACE,this,1,sizeof(double),
523 REVERSE_AD,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
524
525 // extra per-atom virial communication
526
527 if (vflag_atom)
528 gc[n]->reverse_comm(GridComm::KSPACE,this,6,sizeof(double),
529 REVERSE_AD_PERATOM,gc_buf1[n],gc_buf2[n],MPI_DOUBLE);
530 }
531
532 // all procs communicate E-field values
533 // to fill ghost cells surrounding their 3d bricks
534
535 current_level = 0;
536 gcall->forward_comm(GridComm::KSPACE,this,1,sizeof(double),
537 FORWARD_AD,gcall_buf1,gcall_buf2,MPI_DOUBLE);
538
539 // extra per-atom energy/virial communication
540
541 if (vflag_atom)
542 gcall->forward_comm(GridComm::KSPACE,this,6,sizeof(double),
543 FORWARD_AD_PERATOM,gcall_buf1,gcall_buf2,MPI_DOUBLE);
544
545 // calculate the force on my particles (interpolation)
546
547 fieldforce();
548
549 // calculate the per-atom energy/virial for my particles
550
551 if (evflag_atom) fieldforce_peratom();
552
553 // sum global energy across procs and add in self-energy term
554
555 const double qscale = qqrd2e * scale;
556
557 if (eflag_global) {
558 double energy_all;
559 MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
560 energy = energy_all;
561
562 double e_self = qsqsum*gamma(0.0)/cutoff;
563 energy -= e_self;
564 energy *= 0.5*qscale;
565 }
566
567 // total long-range virial
568
569 if (vflag_global && !scalar_pressure_flag) {
570 double virial_all[6];
571 MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
572 for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*virial_all[i];
573 }
574
575 // fast compute of scalar pressure (if requested)
576
577 if (scalar_pressure_flag && vflag_global)
578 for (i = 0; i < 3; i++) virial[i] = energy/3.0;
579
580 // per-atom energy/virial
581 // energy includes self-energy correction
582
583 if (evflag_atom) {
584 double *q = atom->q;
585 int nlocal = atom->nlocal;
586
587 if (eflag_atom) {
588 for (i = 0; i < nlocal; i++) {
589 eatom[i] -= q[i]*q[i]*gamma(0.0)/cutoff;
590 eatom[i] *= 0.5*qscale;
591 }
592 }
593
594 if (vflag_atom) {
595 for (i = 0; i < nlocal; i++)
596 for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
597 }
598 }
599
600 // convert atoms back from lamda to box coords
601
602 if (triclinic) domain->lamda2x(atom->nlocal);
603 }
604
605 /* ----------------------------------------------------------------------
606 allocate memory that depends on # of grid points
607 ------------------------------------------------------------------------- */
608
allocate()609 void MSM::allocate()
610 {
611 // interpolation coeffs
612
613 order_allocated = order;
614 memory->create2d_offset(phi1d,3,-order,order,"msm:phi1d");
615 memory->create2d_offset(dphi1d,3,-order,order,"msm:dphi1d");
616
617 // commgrid using all processors for finest grid level
618
619 gcall = new GridComm(lmp,world,1,nx_msm[0],ny_msm[0],nz_msm[0],
620 nxlo_in[0],nxhi_in[0],nylo_in[0],
621 nyhi_in[0],nzlo_in[0],nzhi_in[0],
622 nxlo_out_all,nxhi_out_all,nylo_out_all,
623 nyhi_out_all,nzlo_out_all,nzhi_out_all,
624 nxlo_out[0],nxhi_out[0],nylo_out[0],
625 nyhi_out[0],nzlo_out[0],nzhi_out[0]);
626
627 gcall->setup(ngcall_buf1,ngcall_buf2);
628 npergrid = 1;
629 memory->create(gcall_buf1,npergrid*ngcall_buf1,"msm:gcall_buf1");
630 memory->create(gcall_buf2,npergrid*ngcall_buf2,"msm:gcall_buf2");
631
632 // allocate memory for each grid level
633
634 for (int n=0; n<levels; n++) {
635 memory->create3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n],
636 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid");
637
638 memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n],
639 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid");
640
641 // create commgrid object for rho and electric field communication
642
643 if (active_flag[n]) {
644 delete gc[n];
645 int **procneigh = procneigh_levels[n];
646
647 gc[n] = new GridComm(lmp,world_levels[n],2,nx_msm[n],ny_msm[n],nz_msm[n],
648 nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
649 nzlo_in[n],nzhi_in[n],
650 nxlo_out[n],nxhi_out[n],nylo_out[n],nyhi_out[n],
651 nzlo_out[n],nzhi_out[n],
652 procneigh[0][0],procneigh[0][1],procneigh[1][0],
653 procneigh[1][1],procneigh[2][0],procneigh[2][1]);
654
655 gc[n]->setup(ngc_buf1[n],ngc_buf2[n]);
656 npergrid = 1;
657 memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"msm:gc_buf1");
658 memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"msm:gc_buf2");
659 } else {
660 delete gc[n];
661 memory->destroy(gc_buf1[n]);
662 memory->destroy(gc_buf2[n]);
663 gc[n] = nullptr;
664 gc_buf1[n] = gc_buf2[n] = nullptr;
665 }
666 }
667 }
668
669 /* ----------------------------------------------------------------------
670 deallocate memory that depends on # of grid points
671 ------------------------------------------------------------------------- */
672
deallocate()673 void MSM::deallocate()
674 {
675 memory->destroy2d_offset(phi1d,-order_allocated);
676 memory->destroy2d_offset(dphi1d,-order_allocated);
677
678 if (gcall) delete gcall;
679 memory->destroy(gcall_buf1);
680 memory->destroy(gcall_buf2);
681 gcall = nullptr;
682 gcall_buf1 = gcall_buf2 = nullptr;
683 }
684
685 /* ----------------------------------------------------------------------
686 allocate per-atom virial memory that depends on # of grid points
687 ------------------------------------------------------------------------- */
688
allocate_peratom()689 void MSM::allocate_peratom()
690 {
691 peratom_allocate_flag = 1;
692
693 // create commgrid object for per-atom virial using all processors
694
695 npergrid = 6;
696 memory->destroy(gcall_buf1);
697 memory->destroy(gcall_buf2);
698 memory->create(gcall_buf1,npergrid*ngcall_buf1,"pppm:gcall_buf1");
699 memory->create(gcall_buf2,npergrid*ngcall_buf2,"pppm:gcall_buf2");
700
701 // allocate memory for each grid level
702
703 for (int n=0; n<levels; n++) {
704 memory->create3d_offset(v0grid[n],nzlo_out[n],nzhi_out[n],
705 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v0grid");
706 memory->create3d_offset(v1grid[n],nzlo_out[n],nzhi_out[n],
707 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v1grid");
708 memory->create3d_offset(v2grid[n],nzlo_out[n],nzhi_out[n],
709 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v2grid");
710 memory->create3d_offset(v3grid[n],nzlo_out[n],nzhi_out[n],
711 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v3grid");
712 memory->create3d_offset(v4grid[n],nzlo_out[n],nzhi_out[n],
713 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v4grid");
714 memory->create3d_offset(v5grid[n],nzlo_out[n],nzhi_out[n],
715 nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:v5grid");
716
717 // create commgrid object for per-atom virial
718
719 if (active_flag[n]) {
720 npergrid = 6;
721 memory->destroy(gc_buf1[n]);
722 memory->destroy(gc_buf2[n]);
723 memory->create(gc_buf1[n],npergrid*ngc_buf1[n],"pppm:gc_buf1");
724 memory->create(gc_buf2[n],npergrid*ngc_buf2[n],"pppm:gc_buf2");
725 }
726 }
727 }
728
729 /* ----------------------------------------------------------------------
730 deallocate per-atom virial memory that depends on # of grid points
731 ------------------------------------------------------------------------- */
732
deallocate_peratom()733 void MSM::deallocate_peratom()
734 {
735 peratom_allocate_flag = 0;
736
737 for (int n=0; n<levels; n++) {
738 if (v0grid[n])
739 memory->destroy3d_offset(v0grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
740 if (v1grid[n])
741 memory->destroy3d_offset(v1grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
742 if (v2grid[n])
743 memory->destroy3d_offset(v2grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
744 if (v3grid[n])
745 memory->destroy3d_offset(v3grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
746 if (v4grid[n])
747 memory->destroy3d_offset(v4grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
748 if (v5grid[n])
749 memory->destroy3d_offset(v5grid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
750 }
751 }
752
753 /* ----------------------------------------------------------------------
754 allocate memory that depends on # of grid levels
755 ------------------------------------------------------------------------- */
756
allocate_levels()757 void MSM::allocate_levels()
758 {
759 ngrid = new int[levels];
760
761 gc = new GridComm*[levels];
762 gc_buf1 = new double*[levels];
763 gc_buf2 = new double*[levels];
764 ngc_buf1 = new int[levels];
765 ngc_buf2 = new int[levels];
766
767 memory->create(procneigh_levels,levels,3,2,"msm:procneigh_levels");
768 world_levels = new MPI_Comm[levels];
769 active_flag = new int[levels];
770
771 alpha = new int[levels];
772 betax = new int[levels];
773 betay = new int[levels];
774 betaz = new int[levels];
775
776 nx_msm = new int[levels];
777 ny_msm = new int[levels];
778 nz_msm = new int[levels];
779
780 nxlo_in = new int[levels];
781 nylo_in = new int[levels];
782 nzlo_in = new int[levels];
783
784 nxhi_in = new int[levels];
785 nyhi_in = new int[levels];
786 nzhi_in = new int[levels];
787
788 nxlo_out = new int[levels];
789 nylo_out = new int[levels];
790 nzlo_out = new int[levels];
791
792 nxhi_out = new int[levels];
793 nyhi_out = new int[levels];
794 nzhi_out = new int[levels];
795
796 delxinv = new double[levels];
797 delyinv = new double[levels];
798 delzinv = new double[levels];
799
800 qgrid = new double***[levels];
801 egrid = new double***[levels];
802
803 v0grid = new double***[levels];
804 v1grid = new double***[levels];
805 v2grid = new double***[levels];
806 v3grid = new double***[levels];
807 v4grid = new double***[levels];
808 v5grid = new double***[levels];
809
810 for (int n=0; n<levels; n++) {
811 gc[n] = nullptr;
812
813 gc_buf1[n] = nullptr;
814 gc_buf2[n] = nullptr;
815
816 world_levels[n] = MPI_COMM_NULL;
817
818 qgrid[n] = nullptr;
819 egrid[n] = nullptr;
820
821 v0grid[n] = nullptr;
822 v1grid[n] = nullptr;
823 v2grid[n] = nullptr;
824 v3grid[n] = nullptr;
825 v4grid[n] = nullptr;
826 v5grid[n] = nullptr;
827 }
828 }
829
830 /* ----------------------------------------------------------------------
831 deallocate memory that depends on # of grid levels
832 ------------------------------------------------------------------------- */
833
deallocate_levels()834 void MSM::deallocate_levels()
835 {
836 if (world_levels) {
837 for (int n=0; n < levels; ++n) {
838 if (qgrid[n])
839 memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
840
841 if (egrid[n])
842 memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
843
844 if (gc) {
845 if (gc[n]) {
846 delete gc[n];
847 memory->destroy(gc_buf1[n]);
848 memory->destroy(gc_buf2[n]);
849 gc[n] = nullptr;
850 gc_buf1[n] = gc_buf2[n] = nullptr;
851 }
852 }
853
854 if (world_levels[n] != MPI_COMM_NULL) {
855 MPI_Comm_free(&world_levels[n]);
856 }
857 }
858 }
859
860 delete [] ngrid;
861 ngrid = nullptr;
862
863 memory->destroy(procneigh_levels);
864 delete [] world_levels;
865 delete [] active_flag;
866
867 delete [] gc;
868 delete [] gc_buf1;
869 delete [] gc_buf2;
870 delete [] ngc_buf1;
871 delete [] ngc_buf2;
872
873 delete [] alpha;
874 delete [] betax;
875 delete [] betay;
876 delete [] betaz;
877
878 delete [] nx_msm;
879 delete [] ny_msm;
880 delete [] nz_msm;
881
882 delete [] nxlo_in;
883 delete [] nylo_in;
884 delete [] nzlo_in;
885
886 delete [] nxhi_in;
887 delete [] nyhi_in;
888 delete [] nzhi_in;
889
890 delete [] nxlo_out;
891 delete [] nylo_out;
892 delete [] nzlo_out;
893
894 delete [] nxhi_out;
895 delete [] nyhi_out;
896 delete [] nzhi_out;
897
898 delete [] delxinv;
899 delete [] delyinv;
900 delete [] delzinv;
901
902 delete [] qgrid;
903 delete [] egrid;
904
905 delete [] v0grid;
906 delete [] v1grid;
907 delete [] v2grid;
908 delete [] v3grid;
909 delete [] v4grid;
910 delete [] v5grid;
911
912 world_levels = nullptr;
913 active_flag = nullptr;
914 gc = nullptr;
915 gc_buf1 = gc_buf2 = nullptr;
916
917 alpha = nullptr;
918 betax = nullptr;
919 betay = nullptr;
920 betaz = nullptr;
921
922 nx_msm = nullptr;
923 ny_msm = nullptr;
924 nz_msm = nullptr;
925
926 nxlo_in = nullptr;
927 nylo_in = nullptr;
928 nzlo_in = nullptr;
929
930 nxhi_in = nullptr;
931 nyhi_in = nullptr;
932 nzhi_in = nullptr;
933
934 nxlo_out = nullptr;
935 nylo_out = nullptr;
936 nzlo_out = nullptr;
937
938 nxhi_out = nullptr;
939 nyhi_out = nullptr;
940 nzhi_out = nullptr;
941
942 delxinv = nullptr;
943 delyinv = nullptr;
944 delzinv = nullptr;
945
946 qgrid = nullptr;
947 egrid = nullptr;
948
949 v0grid = nullptr;
950 v1grid = nullptr;
951 v2grid = nullptr;
952 v3grid = nullptr;
953 v4grid = nullptr;
954 v5grid = nullptr;
955 }
956
957 /* ----------------------------------------------------------------------
958 set total size of MSM grids
959 ------------------------------------------------------------------------- */
960
set_grid_global()961 void MSM::set_grid_global()
962 {
963 if (accuracy_relative <= 0.0)
964 error->all(FLERR,"KSpace accuracy must be > 0");
965
966 double xprd = domain->xprd;
967 double yprd = domain->yprd;
968 double zprd = domain->zprd;
969
970 int nx_max,ny_max,nz_max;
971 double hx,hy,hz;
972
973 if (adjust_cutoff_flag && !gridflag) {
974 // seek to choose optimal Coulombic cutoff and number of grid levels
975 // (based on a cost estimate in Hardy's thesis)
976 int p = order - 1;
977 double hmin = 3072.0*(p+1)/(p-1)/
978 (448.0*MY_PI + 56.0*MY_PI*order/2 + 1701.0);
979 hmin = pow(hmin,1.0/6.0)*pow(xprd*yprd*zprd/atom->natoms,1.0/3.0);
980
981 nx_max = static_cast<int>(xprd/hmin);
982 ny_max = static_cast<int>(yprd/hmin);
983 nz_max = static_cast<int>(zprd/hmin);
984
985 nx_max = MAX(nx_max,2);
986 ny_max = MAX(ny_max,2);
987 nz_max = MAX(nz_max,2);
988
989 } else if (!gridflag) {
990 // Coulombic cutoff is set by user, choose grid to give requested error
991 nx_max = ny_max = nz_max = 2;
992 hx = xprd/nx_max;
993 hy = yprd/ny_max;
994 hz = zprd/nz_max;
995
996 double x_error = 2.0*accuracy;
997 double y_error = 2.0*accuracy;
998 double z_error = 2.0*accuracy;
999
1000 while (x_error > accuracy) {
1001 nx_max *= 2;
1002 hx = xprd/nx_max;
1003 x_error = estimate_1d_error(hx,xprd);
1004 }
1005
1006 while (y_error > accuracy) {
1007 ny_max *= 2;
1008 hy = yprd/ny_max;
1009 y_error = estimate_1d_error(hy,yprd);
1010 }
1011
1012 while (z_error > accuracy) {
1013 nz_max *= 2;
1014 hz = zprd/nz_max;
1015 z_error = estimate_1d_error(hz,zprd);
1016 }
1017 } else {
1018 // cutoff and grid are set by user
1019 nx_max = nx_msm_max;
1020 ny_max = ny_msm_max;
1021 nz_max = nz_msm_max;
1022 }
1023
1024 // scale grid for triclinic skew
1025
1026 if (triclinic && !gridflag) {
1027 double tmp[3];
1028 tmp[0] = nx_max/xprd;
1029 tmp[1] = ny_max/yprd;
1030 tmp[2] = nz_max/zprd;
1031 lamda2xT(&tmp[0],&tmp[0]);
1032 nx_max = static_cast<int>(tmp[0]);
1033 ny_max = static_cast<int>(tmp[1]);
1034 nz_max = static_cast<int>(tmp[2]);
1035 }
1036
1037 // boost grid size until it is factorable by 2
1038 // round up or down, depending on which is closer
1039
1040 int flag = 0;
1041 int xlevels,ylevels,zlevels;
1042
1043 while (!factorable(nx_max,flag,xlevels)) {
1044 double k = log(nx_max)/log(2.0);
1045 double r = k - floor(k);
1046 if (r > 0.5) nx_max++;
1047 else nx_max--;
1048 }
1049 while (!factorable(ny_max,flag,ylevels)) {
1050 double k = log(ny_max)/log(2.0);
1051 double r = k - floor(k);
1052 if (r > 0.5) ny_max++;
1053 else ny_max--;
1054 }
1055 while (!factorable(nz_max,flag,zlevels)) {
1056 double k = log(nz_max)/log(2.0);
1057 double r = k - floor(k);
1058 if (r > 0.5) nz_max++;
1059 else nz_max--;
1060 }
1061
1062 if (flag && gridflag && me == 0)
1063 error->warning(FLERR,
1064 "Number of MSM mesh points changed to be a multiple of 2");
1065
1066 // adjust Coulombic cutoff to give desired error (if requested)
1067
1068 if (adjust_cutoff_flag) {
1069 hx = xprd/nx_max;
1070 hy = yprd/ny_max;
1071 hz = zprd/nz_max;
1072
1073 int p = order - 1;
1074 double Lx2 = xprd*xprd;
1075 double Ly2 = yprd*yprd;
1076 double Lz2 = zprd*zprd;
1077 double hx2pm2 = pow(hx,2.0*p-2.0);
1078 double hy2pm2 = pow(hy,2.0*p-2.0);
1079 double hz2pm2 = pow(hz,2.0*p-2.0);
1080 estimate_1d_error(1.0,1.0); // make sure that C_p is defined
1081 double k = q2*C_p/accuracy/sqrt(double(atom->natoms));
1082 double sum = hx2pm2/Lx2 + hy2pm2/Ly2 + hz2pm2/Lz2;
1083
1084 cutoff = pow(k*k*sum/3.0,1.0/(2.0*p));
1085 int itmp;
1086 double *p_cutoff = (double *) force->pair->extract("cut_coul",itmp);
1087 *p_cutoff = cutoff;
1088
1089 if (me == 0)
1090 error->warning(FLERR,"Adjusting Coulombic cutoff for "
1091 "MSM, new cutoff = {:.8}", cutoff);
1092 }
1093
1094 if (triclinic == 0) {
1095 h_x = xprd/nx_max;
1096 h_y = yprd/ny_max;
1097 h_z = zprd/nz_max;
1098 } else {
1099 double tmp[3];
1100 tmp[0] = nx_max;
1101 tmp[1] = ny_max;
1102 tmp[2] = nz_max;
1103 x2lamdaT(&tmp[0],&tmp[0]);
1104 h_x = 1.0/tmp[0];
1105 h_y = 1.0/tmp[1];
1106 h_z = 1.0/tmp[2];
1107 }
1108
1109 // find maximum number of levels
1110
1111 levels = MAX(xlevels,ylevels);
1112 levels = MAX(levels,zlevels);
1113
1114 if (levels > MAX_LEVELS) error->all(FLERR,"Too many MSM grid levels");
1115
1116 // need at least 2 MSM levels for periodic systems
1117
1118 if (levels <= 1) {
1119 levels = xlevels = ylevels = zlevels = 2;
1120 nx_max = ny_max = nz_max = 2;
1121 if (gridflag)
1122 error->warning(FLERR,
1123 "MSM mesh too small, increasing to 2 points in each direction");
1124 }
1125
1126 // omit top grid level for periodic systems
1127
1128 if (!domain->nonperiodic) levels -= 1;
1129
1130 deallocate_levels();
1131 allocate_levels();
1132
1133 // find number of grid levels in each direction
1134
1135 for (int n = 0; n < levels; n++) {
1136
1137 if (xlevels-n-1 > 0)
1138 nx_msm[n] = static_cast<int> (pow(2.0,xlevels-n-1));
1139 else
1140 nx_msm[n] = 1;
1141
1142 if (ylevels-n-1 > 0)
1143 ny_msm[n] = static_cast<int> (pow(2.0,ylevels-n-1));
1144 else
1145 ny_msm[n] = 1;
1146
1147 if (zlevels-n-1 > 0)
1148 nz_msm[n] = static_cast<int> (pow(2.0,zlevels-n-1));
1149 else
1150 nz_msm[n] = 1;
1151 }
1152
1153 if (nx_msm[0] >= OFFSET || ny_msm[0] >= OFFSET || nz_msm[0] >= OFFSET)
1154 error->all(FLERR,"MSM grid is too large");
1155
1156 // compute number of extra grid points needed for non-periodic boundary conditions
1157
1158 if (domain->nonperiodic) {
1159 alpha[0] = -(order/2 - 1);
1160 betax[0] = nx_msm[0] + (order/2 - 1);
1161 betay[0] = ny_msm[0] + (order/2 - 1);
1162 betaz[0] = nz_msm[0] + (order/2 - 1);
1163 for (int n = 1; n < levels; n++) {
1164 alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
1165 betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
1166 betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
1167 betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
1168 }
1169 }
1170
1171 if (domain->nonperiodic) {
1172 alpha[0] = -(order/2 - 1);
1173 betax[0] = nx_msm[0] + (order/2 - 1);
1174 betay[0] = ny_msm[0] + (order/2 - 1);
1175 betaz[0] = nz_msm[0] + (order/2 - 1);
1176 for (int n = 1; n < levels; n++) {
1177 alpha[n] = -((-alpha[n-1]+1)/2) - (order/2 - 1);
1178 betax[n] = ((betax[n-1]+1)/2) + (order/2 - 1);
1179 betay[n] = ((betay[n-1]+1)/2) + (order/2 - 1);
1180 betaz[n] = ((betaz[n-1]+1)/2) + (order/2 - 1);
1181 }
1182 }
1183
1184 }
1185
1186 /* ----------------------------------------------------------------------
1187 set local subset of MSM grid that I own
1188 n xyz lo/hi in = 3d grid that I own (inclusive)
1189 n xyz lo/hi out = 3d grid + ghost cells in 6 directions (inclusive)
1190 ------------------------------------------------------------------------- */
1191
set_grid_local()1192 void MSM::set_grid_local()
1193 {
1194 // loop over grid levels
1195
1196 for (int n=0; n<levels; n++) {
1197
1198 // partition global grid across procs
1199 // n xyz lo/hi in[] = lower/upper bounds of global grid this proc owns
1200 // indices range from 0 to N-1 inclusive in each dim
1201
1202 comm->partition_grid(nx_msm[n],ny_msm[n],nz_msm[n],0.0,
1203 nxlo_in[n],nxhi_in[n],nylo_in[n],nyhi_in[n],
1204 nzlo_in[n],nzhi_in[n]);
1205
1206 nlower = -(order-1)/2;
1207 nupper = order/2;
1208
1209 // lengths of box and processor sub-domains
1210
1211 double *prd,*sublo,*subhi;
1212
1213 if (!triclinic) {
1214 prd = domain->prd;
1215 sublo = domain->sublo;
1216 subhi = domain->subhi;
1217 } else {
1218 prd = domain->prd_lamda;
1219 sublo = domain->sublo_lamda;
1220 subhi = domain->subhi_lamda;
1221 }
1222
1223 double xprd = prd[0];
1224 double yprd = prd[1];
1225 double zprd = prd[2];
1226
1227 // shift values for particle <-> grid mapping
1228 // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
1229
1230 // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
1231 // global MSM grid that my particles can contribute charge to
1232 // effectively nlo_in,nhi_in + ghost cells
1233 // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
1234 // position a particle in my box can be at
1235 // dist[3] = particle position bound = subbox + skin/2.0
1236 // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
1237
1238 double dist[3];
1239 double cuthalf = 0.0;
1240 if (n == 0) cuthalf = 0.5*neighbor->skin; // only applies to finest grid
1241 dist[0] = dist[1] = dist[2] = cuthalf;
1242 if (triclinic) kspacebbox(cuthalf,&dist[0]);
1243
1244 int nlo,nhi;
1245
1246 nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
1247 nx_msm[n]/xprd + OFFSET) - OFFSET;
1248 nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
1249 nx_msm[n]/xprd + OFFSET) - OFFSET;
1250 if (n == 0) {
1251 // use a smaller ghost region for interpolation
1252 nxlo_out_all = nlo + nlower;
1253 nxhi_out_all = nhi + nupper;
1254 }
1255 // a larger ghost region is needed for the direct sum and for restriction/prolongation
1256 nxlo_out[n] = nlo + MIN(-order,nxlo_direct);
1257 nxhi_out[n] = nhi + MAX(order,nxhi_direct);
1258
1259 nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
1260 ny_msm[n]/yprd + OFFSET) - OFFSET;
1261 nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
1262 ny_msm[n]/yprd + OFFSET) - OFFSET;
1263 if (n == 0) {
1264 nylo_out_all = nlo + nlower;
1265 nyhi_out_all = nhi + nupper;
1266 }
1267 nylo_out[n] = nlo + MIN(-order,nylo_direct);
1268 nyhi_out[n] = nhi + MAX(order,nyhi_direct);
1269
1270 nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
1271 nz_msm[n]/zprd + OFFSET) - OFFSET;
1272 nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
1273 nz_msm[n]/zprd + OFFSET) - OFFSET;
1274 if (n == 0) {
1275 nzlo_out_all = nlo + nlower;
1276 nzhi_out_all = nhi + nupper;
1277 }
1278 // a hemisphere is used for direct sum interactions,
1279 // so no ghosting is needed for direct sum in the -z direction
1280 nzlo_out[n] = nlo - order;
1281 nzhi_out[n] = nhi + MAX(order,nzhi_direct);
1282
1283 // add extra grid points for non-periodic boundary conditions
1284 // skip reset of lo/hi for procs who do not own any grid cells
1285
1286 if (domain->nonperiodic) {
1287
1288 if (!domain->xperiodic && nxlo_in[n] <= nxhi_in[n]) {
1289 if (nxlo_in[n] == 0) nxlo_in[n] = alpha[n];
1290 nxlo_out[n] = MAX(nxlo_out[n],alpha[n]);
1291 if (n == 0) nxlo_out_all = MAX(nxlo_out_all,alpha[0]);
1292
1293 if (nxhi_in[n] == nx_msm[n] - 1) nxhi_in[n] = betax[n];
1294 nxhi_out[n] = MIN(nxhi_out[n],betax[n]);
1295 if (n == 0) nxhi_out_all = MIN(nxhi_out_all,betax[0]);
1296 }
1297
1298 if (!domain->yperiodic && nylo_in[n] <= nyhi_in[n]) {
1299 if (nylo_in[n] == 0) nylo_in[n] = alpha[n];
1300 nylo_out[n] = MAX(nylo_out[n],alpha[n]);
1301 if (n == 0) nylo_out_all = MAX(nylo_out_all,alpha[0]);
1302
1303 if (nyhi_in[n] == ny_msm[n] - 1) nyhi_in[n] = betay[n];
1304 nyhi_out[n] = MIN(nyhi_out[n],betay[n]);
1305 if (n == 0) nyhi_out_all = MIN(nyhi_out_all,betay[0]);
1306 }
1307
1308 if (!domain->zperiodic && nzlo_in[n] <= nzhi_in[n]) {
1309 if (nzlo_in[n] == 0) nzlo_in[n] = alpha[n];
1310 nzlo_out[n] = MAX(nzlo_out[n],alpha[n]);
1311 if (n == 0) nzlo_out_all = MAX(nzlo_out_all,alpha[0]);
1312
1313 if (nzhi_in[n] == nz_msm[n] - 1) nzhi_in[n] = betaz[n];
1314 nzhi_out[n] = MIN(nzhi_out[n],betaz[n]);
1315 if (n == 0) nzhi_out_all = MIN(nzhi_out_all,betaz[0]);
1316 }
1317 }
1318
1319 // prevent inactive processors from participating in MPI communication routines
1320
1321 set_proc_grid(n);
1322
1323 // MSM grids for this proc, including ghosts
1324
1325 ngrid[n] = (nxhi_out[n]-nxlo_out[n]+1) * (nyhi_out[n]-nylo_out[n]+1) *
1326 (nzhi_out[n]-nzlo_out[n]+1);
1327 }
1328 }
1329
1330 /* ----------------------------------------------------------------------
1331 find active procs and neighbor procs for each grid level
1332 ------------------------------------------------------------------------- */
1333
set_proc_grid(int n)1334 void MSM::set_proc_grid(int n)
1335 {
1336 for (int i=0; i<3; i++)
1337 myloc[i] = comm->myloc[i];
1338
1339 // size of inner MSM grid owned by this proc
1340
1341 int nxgrid_in = nxhi_in[n]-nxlo_in[n]+1;
1342 int nygrid_in = nyhi_in[n]-nylo_in[n]+1;
1343 int nzgrid_in = nzhi_in[n]-nzlo_in[n]+1;
1344 int ngrid_in = nxgrid_in * nygrid_in * nzgrid_in;
1345
1346 // check to see if this proc owns any inner grid points on this grid level
1347 // if not, deactivate by setting active_flag = 0
1348
1349 int cnt[3];
1350
1351 cnt[0] = 0;
1352 if (myloc[1] == 0 && myloc[2] == 0)
1353 if (nxgrid_in > 0)
1354 cnt[0] = 1;
1355
1356 cnt[1] = 0;
1357 if (myloc[0] == 0 && myloc[2] == 0)
1358 if (nygrid_in > 0)
1359 cnt[1] = 1;
1360
1361 cnt[2] = 0;
1362 if (myloc[0] == 0 && myloc[1] == 0)
1363 if (nzgrid_in > 0)
1364 cnt[2] = 1;
1365
1366 MPI_Allreduce(&cnt[0],&procgrid[0],3,MPI_INT,MPI_SUM,world);
1367
1368 int color;
1369
1370 if (ngrid_in > 0) {
1371 active_flag[n] = 1;
1372 color = 0;
1373 } else {
1374 active_flag[n] = 0;
1375 color = MPI_UNDEFINED;
1376 }
1377
1378 // define a new MPI communicator for this grid level that only includes active procs
1379
1380 if (world_levels[n] != MPI_COMM_NULL) MPI_Comm_free(&world_levels[n]);
1381 MPI_Comm_split(world,color,me,&world_levels[n]);
1382
1383 if (!active_flag[n]) return;
1384
1385 int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
1386
1387 // map processor IDs to new 3d processor grid
1388 // produces myloc, procneigh
1389
1390 int periods[3];
1391 periods[0] = periods[1] = periods[2] = 1;
1392 MPI_Comm cartesian;
1393
1394 MPI_Cart_create(world_levels[n],3,procgrid,periods,0,&cartesian);
1395 MPI_Cart_get(cartesian,3,procgrid,periods,myloc);
1396 MPI_Cart_shift(cartesian,0,1,&procneigh[0][0],&procneigh[0][1]);
1397 MPI_Cart_shift(cartesian,1,1,&procneigh[1][0],&procneigh[1][1]);
1398 MPI_Cart_shift(cartesian,2,1,&procneigh[2][0],&procneigh[2][1]);
1399
1400 MPI_Comm_free(&cartesian);
1401
1402 for (int i=0; i<3; i++)
1403 for (int j=0; j<2; j++)
1404 procneigh_levels[n][i][j] = procneigh[i][j];
1405 }
1406
1407 /* ----------------------------------------------------------------------
1408 reset local grid arrays and communication stencils
1409 called by fix balance b/c it changed sizes of processor sub-domains
1410 ------------------------------------------------------------------------- */
1411
setup_grid()1412 void MSM::setup_grid()
1413 {
1414 // free all arrays previously allocated
1415 // pre-compute volume-dependent coeffs
1416 // reset portion of global grid that each proc owns
1417 // reallocate MSM long-range dependent memory
1418 // don't invoke allocate_peratom(), compute() will allocate when needed
1419
1420 setup();
1421 }
1422
1423 /* ----------------------------------------------------------------------
1424 check if all factors of n are in list of factors
1425 return 1 if yes, 0 if no
1426 ------------------------------------------------------------------------- */
1427
factorable(int n,int & flag,int & nlevels)1428 int MSM::factorable(int n, int &flag, int &nlevels)
1429 {
1430 int i;
1431 nlevels = 1;
1432
1433 while (n > 1) {
1434 for (i = 0; i < nfactors; i++) {
1435 if (n % factors[i] == 0) {
1436 n /= factors[i];
1437 nlevels++;
1438 break;
1439 }
1440 }
1441 if (i == nfactors) {
1442 flag = 1;
1443 return 0;
1444 }
1445 }
1446
1447 return 1;
1448 }
1449
1450 /* ----------------------------------------------------------------------
1451 find center grid pt for each of my particles
1452 check that full stencil for the particle will fit in my 3d brick
1453 store central grid pt indices in part2grid array
1454 ------------------------------------------------------------------------- */
1455
particle_map()1456 void MSM::particle_map()
1457 {
1458 int nx,ny,nz;
1459
1460 double **x = atom->x;
1461 int nlocal = atom->nlocal;
1462
1463 int flag = 0;
1464
1465 if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2]))
1466 error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
1467
1468 for (int i = 0; i < nlocal; i++) {
1469
1470 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
1471 // current particle coord can be outside global and local box
1472 // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
1473
1474 nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv[0]+OFFSET) - OFFSET;
1475 ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv[0]+OFFSET) - OFFSET;
1476 nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv[0]+OFFSET) - OFFSET;
1477
1478 part2grid[i][0] = nx;
1479 part2grid[i][1] = ny;
1480 part2grid[i][2] = nz;
1481
1482 // check that entire stencil around nx,ny,nz will fit in my 3d brick
1483
1484 if (nx+nlower < nxlo_out[0] || nx+nupper > nxhi_out[0] ||
1485 ny+nlower < nylo_out[0] || ny+nupper > nyhi_out[0] ||
1486 nz+nlower < nzlo_out[0] || nz+nupper > nzhi_out[0]) flag = 1;
1487 }
1488
1489 if (flag) error->one(FLERR,"Out of range atoms - cannot compute MSM");
1490 }
1491
1492 /* ----------------------------------------------------------------------
1493 aninterpolation: interpolate charges from particles to grid
1494 ------------------------------------------------------------------------- */
1495
make_rho()1496 void MSM::make_rho()
1497 {
1498 int i,l,m,n,nx,ny,nz,mx,my,mz;
1499 double dx,dy,dz,x0,y0,z0;
1500
1501 // clear 3d density array
1502
1503 double ***qgrid0 = qgrid[0];
1504 memset(&(qgrid0[nzlo_out[0]][nylo_out[0]][nxlo_out[0]]),0,ngrid[0]*sizeof(double));
1505
1506 // loop over my charges, add their contribution to nearby grid points
1507 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
1508 // (dx,dy,dz) = distance to "lower left" grid pt
1509 // (mx,my,mz) = global coords of moving stencil pt
1510
1511 double *q = atom->q;
1512 double **x = atom->x;
1513 int nlocal = atom->nlocal;
1514
1515 for (i = 0; i < nlocal; i++) {
1516
1517 nx = part2grid[i][0];
1518 ny = part2grid[i][1];
1519 nz = part2grid[i][2];
1520 dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
1521 dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
1522 dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
1523
1524 compute_phis(dx,dy,dz);
1525
1526 z0 = q[i];
1527 for (n = nlower; n <= nupper; n++) {
1528 mz = n+nz;
1529 y0 = z0*phi1d[2][n];
1530 for (m = nlower; m <= nupper; m++) {
1531 my = m+ny;
1532 x0 = y0*phi1d[1][m];
1533 for (l = nlower; l <= nupper; l++) {
1534 mx = l+nx;
1535 qgrid0[mz][my][mx] += x0*phi1d[0][l];
1536 }
1537 }
1538 }
1539 }
1540
1541 }
1542
1543 /* ----------------------------------------------------------------------
1544 MSM direct sum procedure for intermediate grid levels, solve Poisson's
1545 equation to get energy, virial, etc.
1546 ------------------------------------------------------------------------- */
1547
direct(int n)1548 void MSM::direct(int n)
1549 {
1550 double ***qgridn = qgrid[n];
1551 double ***egridn = egrid[n];
1552 double ***v0gridn = v0grid[n];
1553 double ***v1gridn = v1grid[n];
1554 double ***v2gridn = v2grid[n];
1555 double ***v3gridn = v3grid[n];
1556 double ***v4gridn = v4grid[n];
1557 double ***v5gridn = v5grid[n];
1558 double *g_directn = g_direct[n];
1559 double *v0_directn = v0_direct[n];
1560 double *v1_directn = v1_direct[n];
1561 double *v2_directn = v2_direct[n];
1562 double *v3_directn = v3_direct[n];
1563 double *v4_directn = v4_direct[n];
1564 double *v5_directn = v5_direct[n];
1565
1566 // zero out electric potential
1567
1568 memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1569
1570 // zero out virial
1571
1572 if (vflag_atom) {
1573 memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1574 memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1575 memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1576 memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1577 memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1578 memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1579 }
1580
1581 int icx,icy,icz,ix,iy,iz,zk,zyk,k;
1582 int ii,jj,kk;
1583 int imin,imax,jmin,jmax,kmax;
1584 double qtmp,qtmp2,gtmp;
1585 double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
1586 double **qk,**ek;
1587 double *qkj,*ekj;
1588
1589 int nx = nxhi_direct - nxlo_direct + 1;
1590 int ny = nyhi_direct - nylo_direct + 1;
1591
1592 // loop over inner grid points
1593
1594 for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
1595
1596 if (domain->zperiodic) {
1597 kmax = nzhi_direct;
1598 } else {
1599 kmax = MIN(nzhi_direct,betaz[n] - icz);
1600 }
1601
1602 for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
1603
1604 if (domain->yperiodic) {
1605 jmin = nylo_direct;
1606 jmax = nyhi_direct;
1607 } else {
1608 jmin = MAX(nylo_direct,alpha[n] - icy);
1609 jmax = MIN(nyhi_direct,betay[n] - icy);
1610 }
1611
1612 for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
1613
1614 if (domain->xperiodic) {
1615 imin = nxlo_direct;
1616 imax = nxhi_direct;
1617 } else {
1618 imin = MAX(nxlo_direct,alpha[n] - icx);
1619 imax = MIN(nxhi_direct,betax[n] - icx);
1620 }
1621
1622 qtmp = qgridn[icz][icy][icx]; // charge on center grid point
1623
1624 esum = 0.0;
1625 if (vflag_either && !scalar_pressure_flag)
1626 v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
1627
1628 // use hemisphere to avoid double computation of pair-wise
1629 // interactions in direct sum (no computations in -z direction)
1630
1631 for (iz = 1; iz <= kmax; iz++) {
1632 kk = icz+iz;
1633 qk = qgridn[kk];
1634 ek = egridn[kk];
1635 zk = (iz + nzhi_direct)*ny;
1636 for (iy = jmin; iy <= jmax; iy++) {
1637 jj = icy+iy;
1638 qkj = qk[jj];
1639 ekj = ek[jj];
1640 zyk = (zk + iy + nyhi_direct)*nx;
1641 for (ix = imin; ix <= imax; ix++) {
1642 ii = icx+ix;
1643 qtmp2 = qkj[ii]; // charge on outer grid point
1644 k = zyk + ix + nxhi_direct;
1645 gtmp = g_directn[k];
1646 esum += gtmp * qtmp2;
1647 ekj[ii] += gtmp * qtmp;
1648
1649 if (vflag_either && !scalar_pressure_flag) {
1650 v0sum += v0_directn[k] * qtmp2;
1651 v1sum += v1_directn[k] * qtmp2;
1652 v2sum += v2_directn[k] * qtmp2;
1653 v3sum += v3_directn[k] * qtmp2;
1654 v4sum += v4_directn[k] * qtmp2;
1655 v5sum += v5_directn[k] * qtmp2;
1656 }
1657 }
1658 }
1659 }
1660
1661 // iz=0
1662
1663 iz = 0;
1664 kk = icz+iz;
1665 qk = qgridn[kk];
1666 ek = egridn[kk];
1667 zk = (iz + nzhi_direct)*ny;
1668 for (iy = 1; iy <= jmax; iy++) {
1669 jj = icy+iy;
1670 qkj = qk[jj];
1671 ekj = ek[jj];
1672 zyk = (zk + iy + nyhi_direct)*nx;
1673 for (ix = imin; ix <= imax; ix++) {
1674 ii = icx+ix;
1675 qtmp2 = qkj[ii];
1676 k = zyk + ix + nxhi_direct;
1677 gtmp = g_directn[k];
1678 esum += gtmp * qtmp2;
1679 ekj[ii] += gtmp * qtmp;
1680
1681 if (vflag_either && !scalar_pressure_flag) {
1682 v0sum += v0_directn[k] * qtmp2;
1683 v1sum += v1_directn[k] * qtmp2;
1684 v2sum += v2_directn[k] * qtmp2;
1685 v3sum += v3_directn[k] * qtmp2;
1686 v4sum += v4_directn[k] * qtmp2;
1687 v5sum += v5_directn[k] * qtmp2;
1688 }
1689 }
1690 }
1691
1692 // iz=0, iy=0
1693
1694 iz = 0;
1695 kk = icz+iz;
1696 qk = qgridn[kk];
1697 ek = egridn[kk];
1698 zk = (iz + nzhi_direct)*ny;
1699 iy = 0;
1700 jj = icy+iy;
1701 qkj = qk[jj];
1702 ekj = ek[jj];
1703 zyk = (zk + iy + nyhi_direct)*nx;
1704 for (ix = 1; ix <= imax; ix++) {
1705 ii = icx+ix;
1706 qtmp2 = qkj[ii];
1707 k = zyk + ix + nxhi_direct;
1708 gtmp = g_directn[k];
1709 esum += gtmp * qtmp2;
1710 ekj[ii] += gtmp * qtmp;
1711
1712 if (vflag_either && !scalar_pressure_flag) {
1713 v0sum += v0_directn[k] * qtmp2;
1714 v1sum += v1_directn[k] * qtmp2;
1715 v2sum += v2_directn[k] * qtmp2;
1716 v3sum += v3_directn[k] * qtmp2;
1717 v4sum += v4_directn[k] * qtmp2;
1718 v5sum += v5_directn[k] * qtmp2;
1719 }
1720 }
1721
1722 // iz=0, iy=0, ix=0
1723
1724 iz = 0;
1725 zk = (iz + nzhi_direct)*ny;
1726 iy = 0;
1727 zyk = (zk + iy + nyhi_direct)*nx;
1728 ix = 0;
1729 k = zyk + ix + nxhi_direct;
1730 gtmp = g_directn[k];
1731 esum += 0.5 * gtmp * qtmp;
1732 egridn[icz][icy][icx] += 0.5 * gtmp * qtmp;
1733
1734 // virial is zero for iz=0, iy=0, ix=0
1735
1736 // accumulate per-atom energy/virial
1737
1738 egridn[icz][icy][icx] += esum;
1739
1740 if (vflag_atom && !scalar_pressure_flag) {
1741 v0gridn[icz][icy][icx] += v0sum;
1742 v1gridn[icz][icy][icx] += v1sum;
1743 v2gridn[icz][icy][icx] += v2sum;
1744 v3gridn[icz][icy][icx] += v3sum;
1745 v4gridn[icz][icy][icx] += v4sum;
1746 v5gridn[icz][icy][icx] += v5sum;
1747 }
1748
1749 // accumulate total energy/virial
1750
1751 if (evflag) {
1752 qtmp = qgridn[icz][icy][icx];
1753 if (eflag_global) energy += 2.0 * esum * qtmp;
1754 if (vflag_global && !scalar_pressure_flag) {
1755 virial[0] += 2.0 * v0sum * qtmp;
1756 virial[1] += 2.0 * v1sum * qtmp;
1757 virial[2] += 2.0 * v2sum * qtmp;
1758 virial[3] += 2.0 * v3sum * qtmp;
1759 virial[4] += 2.0 * v4sum * qtmp;
1760 virial[5] += 2.0 * v5sum * qtmp;
1761 }
1762 }
1763
1764 }
1765 }
1766 }
1767
1768 // compute per-atom virial (if requested)
1769
1770 if (vflag_atom)
1771 direct_peratom(n);
1772 }
1773
1774 /* ----------------------------------------------------------------------
1775 MSM direct sum procedure for intermediate grid levels, solve Poisson's
1776 equation to get per-atom virial, separate method used for performance
1777 reasons
1778 ------------------------------------------------------------------------- */
1779
direct_peratom(int n)1780 void MSM::direct_peratom(int n)
1781 {
1782 double ***qgridn = qgrid[n];
1783 double ***v0gridn = v0grid[n];
1784 double ***v1gridn = v1grid[n];
1785 double ***v2gridn = v2grid[n];
1786 double ***v3gridn = v3grid[n];
1787 double ***v4gridn = v4grid[n];
1788 double ***v5gridn = v5grid[n];
1789
1790 int icx,icy,icz,ix,iy,iz,zk,zyk,k;
1791 int ii,jj,kk;
1792 int imin,imax,jmin,jmax,kmax;
1793 double qtmp;
1794
1795 int nx = nxhi_direct - nxlo_direct + 1;
1796 int ny = nyhi_direct - nylo_direct + 1;
1797
1798 // loop over inner grid points
1799
1800 for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
1801
1802 if (domain->zperiodic) {
1803 kmax = nzhi_direct;
1804 } else {
1805 kmax = MIN(nzhi_direct,betaz[n] - icz);
1806 }
1807
1808 for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
1809
1810 if (domain->yperiodic) {
1811 jmin = nylo_direct;
1812 jmax = nyhi_direct;
1813 } else {
1814 jmin = MAX(nylo_direct,alpha[n] - icy);
1815 jmax = MIN(nyhi_direct,betay[n] - icy);
1816 }
1817
1818 for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
1819
1820 if (domain->xperiodic) {
1821 imin = nxlo_direct;
1822 imax = nxhi_direct;
1823 } else {
1824 imin = MAX(nxlo_direct,alpha[n] - icx);
1825 imax = MIN(nxhi_direct,betax[n] - icx);
1826 }
1827
1828 qtmp = qgridn[icz][icy][icx]; // center grid point
1829
1830 // use hemisphere to avoid double computation of pair-wise
1831 // interactions in direct sum (no computations in -z direction)
1832
1833 for (iz = 1; iz <= kmax; iz++) {
1834 kk = icz+iz;
1835 zk = (iz + nzhi_direct)*ny;
1836 for (iy = jmin; iy <= jmax; iy++) {
1837 jj = icy+iy;
1838 zyk = (zk + iy + nyhi_direct)*nx;
1839 for (ix = imin; ix <= imax; ix++) {
1840 ii = icx+ix;
1841 k = zyk + ix + nxhi_direct;
1842 v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
1843 v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
1844 v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
1845 v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
1846 v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
1847 v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
1848 }
1849 }
1850 }
1851
1852 // iz=0
1853
1854 iz = 0;
1855 kk = icz+iz;
1856 zk = (iz + nzhi_direct)*ny;
1857 for (iy = 1; iy <= jmax; iy++) {
1858 jj = icy+iy;
1859 zyk = (zk + iy + nyhi_direct)*nx;
1860 for (ix = imin; ix <= imax; ix++) {
1861 ii = icx+ix;
1862 k = zyk + ix + nxhi_direct;
1863 v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
1864 v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
1865 v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
1866 v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
1867 v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
1868 v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
1869 }
1870 }
1871
1872 // iz=0, iy=0
1873
1874 iz = 0;
1875 kk = icz+iz;
1876 zk = (iz + nzhi_direct)*ny;
1877 iy = 0;
1878 jj = icy+iy;
1879 zyk = (zk + iy + nyhi_direct)*nx;
1880 for (ix = 1; ix <= imax; ix++) {
1881 ii = icx+ix;
1882 k = zyk + ix + nxhi_direct;
1883 v0gridn[kk][jj][ii] += v0_direct[n][k] * qtmp;
1884 v1gridn[kk][jj][ii] += v1_direct[n][k] * qtmp;
1885 v2gridn[kk][jj][ii] += v2_direct[n][k] * qtmp;
1886 v3gridn[kk][jj][ii] += v3_direct[n][k] * qtmp;
1887 v4gridn[kk][jj][ii] += v4_direct[n][k] * qtmp;
1888 v5gridn[kk][jj][ii] += v5_direct[n][k] * qtmp;
1889 }
1890
1891 // virial is zero for iz=0, iy=0, ix=0
1892
1893 }
1894 }
1895 }
1896 }
1897
1898 /* ----------------------------------------------------------------------
1899 MSM direct sum procedure for top grid level (nonperiodic systems only)
1900 ------------------------------------------------------------------------- */
1901
direct_top(int n)1902 void MSM::direct_top(int n)
1903 {
1904 double ***qgridn = qgrid[n];
1905 double ***egridn = egrid[n];
1906 double ***v0gridn = v0grid[n];
1907 double ***v1gridn = v1grid[n];
1908 double ***v2gridn = v2grid[n];
1909 double ***v3gridn = v3grid[n];
1910 double ***v4gridn = v4grid[n];
1911 double ***v5gridn = v5grid[n];
1912
1913 // zero out electric potential
1914
1915 memset(&(egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1916
1917 // zero out virial
1918
1919 if (vflag_atom) {
1920 memset(&(v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1921 memset(&(v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1922 memset(&(v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1923 memset(&(v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1924 memset(&(v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1925 memset(&(v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]]),0,ngrid[n]*sizeof(double));
1926 }
1927
1928 int icx,icy,icz,ix,iy,iz,zk,zyk,k;
1929 int ii,jj,kk;
1930 int imin,imax,jmin,jmax,kmax;
1931 double qtmp,qtmp2,gtmp;
1932 double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
1933 double **qk,**ek;
1934 double *qkj,*ekj;
1935
1936 int nx_top = betax[n] - alpha[n];
1937 int ny_top = betay[n] - alpha[n];
1938 int nz_top = betaz[n] - alpha[n];
1939
1940 int nx = 2*nx_top + 1;
1941 int ny = 2*ny_top + 1;
1942
1943 // loop over inner grid points
1944
1945 for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
1946
1947 if (domain->zperiodic) {
1948 kmax = nz_msm[n]-1;
1949 } else {
1950 kmax = betaz[n] - icz;
1951 }
1952
1953 for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
1954
1955 if (domain->yperiodic) {
1956 jmin = 0;
1957 jmax = ny_msm[n]-1;
1958 } else {
1959 jmin = alpha[n] - icy;
1960 jmax = betay[n] - icy;
1961 }
1962
1963 for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
1964
1965 if (domain->xperiodic) {
1966 imin = 0;
1967 imax = nx_msm[n]-1;
1968 } else {
1969 imin = alpha[n] - icx;
1970 imax = betax[n] - icx;
1971 }
1972
1973 qtmp = qgridn[icz][icy][icx];
1974
1975 esum = 0.0;
1976 if (vflag_either && !scalar_pressure_flag)
1977 v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
1978
1979 // use hemisphere to avoid double computation of pair-wise
1980 // interactions in direct sum (no computations in -z direction)
1981
1982 for (iz = 1; iz <= kmax; iz++) {
1983 kk = icz+iz;
1984 qk = qgridn[kk];
1985 ek = egridn[kk];
1986 zk = (iz + nz_top)*ny;
1987 for (iy = jmin; iy <= jmax; iy++) {
1988 jj = icy+iy;
1989 qkj = qk[jj];
1990 ekj = ek[jj];
1991 zyk = (zk + iy + ny_top)*nx;
1992 for (ix = imin; ix <= imax; ix++) {
1993 ii = icx+ix;
1994 qtmp2 = qkj[ii];
1995 k = zyk + ix + nx_top;
1996 gtmp = g_direct_top[k];
1997 esum += gtmp * qtmp2;
1998 ekj[ii] += gtmp * qtmp;
1999
2000 if (vflag_either && !scalar_pressure_flag) {
2001 v0sum += v0_direct_top[k] * qtmp2;
2002 v1sum += v1_direct_top[k] * qtmp2;
2003 v2sum += v2_direct_top[k] * qtmp2;
2004 v3sum += v3_direct_top[k] * qtmp2;
2005 v4sum += v4_direct_top[k] * qtmp2;
2006 v5sum += v5_direct_top[k] * qtmp2;
2007 }
2008 }
2009 }
2010 }
2011
2012 // iz=0
2013
2014 iz = 0;
2015 kk = icz+iz;
2016 qk = qgridn[kk];
2017 ek = egridn[kk];
2018 zk = (iz + nz_top)*ny;
2019 for (iy = 1; iy <= jmax; iy++) {
2020 jj = icy+iy;
2021 qkj = qk[jj];
2022 ekj = ek[jj];
2023 zyk = (zk + iy + ny_top)*nx;
2024 for (ix = imin; ix <= imax; ix++) {
2025 ii = icx+ix;
2026 qtmp2 = qkj[ii];
2027 k = zyk + ix + nx_top;
2028 gtmp = g_direct_top[k];
2029 esum += gtmp * qtmp2;
2030 ekj[ii] += gtmp * qtmp;
2031
2032 if (vflag_either && !scalar_pressure_flag) {
2033 v0sum += v0_direct_top[k] * qtmp2;
2034 v1sum += v1_direct_top[k] * qtmp2;
2035 v2sum += v2_direct_top[k] * qtmp2;
2036 v3sum += v3_direct_top[k] * qtmp2;
2037 v4sum += v4_direct_top[k] * qtmp2;
2038 v5sum += v5_direct_top[k] * qtmp2;
2039 }
2040 }
2041 }
2042
2043 // iz=0, iy=0
2044
2045 iz = 0;
2046 kk = icz+iz;
2047 qk = qgridn[kk];
2048 ek = egridn[kk];
2049 zk = (iz + nz_top)*ny;
2050 iy = 0;
2051 jj = icy+iy;
2052 qkj = qk[jj];
2053 ekj = ek[jj];
2054 zyk = (zk + iy + ny_top)*nx;
2055 for (ix = 1; ix <= imax; ix++) {
2056 ii = icx+ix;
2057 qtmp2 = qkj[ii];
2058 k = zyk + ix + nx_top;
2059 gtmp = g_direct_top[k];
2060 esum += gtmp * qtmp2;
2061 ekj[ii] += gtmp * qtmp;
2062
2063 if (vflag_either && !scalar_pressure_flag) {
2064 v0sum += v0_direct_top[k] * qtmp2;
2065 v1sum += v1_direct_top[k] * qtmp2;
2066 v2sum += v2_direct_top[k] * qtmp2;
2067 v3sum += v3_direct_top[k] * qtmp2;
2068 v4sum += v4_direct_top[k] * qtmp2;
2069 v5sum += v5_direct_top[k] * qtmp2;
2070 }
2071 }
2072
2073 // iz=0, iy=0, ix=0
2074
2075 iz = 0;
2076 zk = (iz + nz_top)*ny;
2077 iy = 0;
2078 zyk = (zk + iy + ny_top)*nx;
2079 ix = 0;
2080 ii = icx+ix;
2081 k = zyk + ix + nx_top;
2082 gtmp = g_direct_top[k];
2083 esum += 0.5 * gtmp * qtmp;
2084 egridn[icz][icy][icx] += 0.5 * gtmp * qtmp;
2085
2086 if (vflag_either && !scalar_pressure_flag) {
2087 v0sum += v0_direct_top[k] * qtmp;
2088 v1sum += v1_direct_top[k] * qtmp;
2089 v2sum += v2_direct_top[k] * qtmp;
2090 v3sum += v3_direct_top[k] * qtmp;
2091 v4sum += v4_direct_top[k] * qtmp;
2092 v5sum += v5_direct_top[k] * qtmp;
2093 }
2094
2095 // accumulate per-atom energy/virial
2096
2097 egridn[icz][icy][icx] += esum;
2098
2099 if (vflag_atom && !scalar_pressure_flag) {
2100 v0gridn[icz][icy][icx] += v0sum;
2101 v1gridn[icz][icy][icx] += v1sum;
2102 v2gridn[icz][icy][icx] += v2sum;
2103 v3gridn[icz][icy][icx] += v3sum;
2104 v4gridn[icz][icy][icx] += v4sum;
2105 v5gridn[icz][icy][icx] += v5sum;
2106 }
2107
2108 // accumulate total energy/virial
2109
2110 if (evflag) {
2111 qtmp = qgridn[icz][icy][icx];
2112 if (eflag_global) energy += 2.0 * esum * qtmp;
2113 if (vflag_global && !scalar_pressure_flag) {
2114 virial[0] += 2.0 * v0sum * qtmp;
2115 virial[1] += 2.0 * v1sum * qtmp;
2116 virial[2] += 2.0 * v2sum * qtmp;
2117 virial[3] += 2.0 * v3sum * qtmp;
2118 virial[4] += 2.0 * v4sum * qtmp;
2119 virial[5] += 2.0 * v5sum * qtmp;
2120 }
2121 }
2122
2123 }
2124 }
2125 }
2126
2127 // compute per-atom virial (if requested)
2128
2129 if (vflag_atom)
2130 direct_peratom_top(n);
2131 }
2132 /* ----------------------------------------------------------------------
2133 MSM direct sum procedure for top grid level, solve Poisson's
2134 equation to get per-atom virial, separate method used for performance
2135 reasons
2136 ------------------------------------------------------------------------- */
2137
direct_peratom_top(int n)2138 void MSM::direct_peratom_top(int n)
2139 {
2140 double ***qgridn = qgrid[n];
2141 double ***v0gridn = v0grid[n];
2142 double ***v1gridn = v1grid[n];
2143 double ***v2gridn = v2grid[n];
2144 double ***v3gridn = v3grid[n];
2145 double ***v4gridn = v4grid[n];
2146 double ***v5gridn = v5grid[n];
2147
2148 int icx,icy,icz,ix,iy,iz,zk,zyk,k;
2149 int ii,jj,kk;
2150 int imin,imax,jmin,jmax,kmax;
2151 double qtmp;
2152
2153 int nx_top = betax[n] - alpha[n];
2154 int ny_top = betay[n] - alpha[n];
2155 int nz_top = betaz[n] - alpha[n];
2156
2157 int nx = 2*nx_top + 1;
2158 int ny = 2*ny_top + 1;
2159
2160 // loop over inner grid points
2161
2162 for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++) {
2163
2164 if (domain->zperiodic) {
2165 kmax = nz_msm[n]-1;
2166 } else {
2167 kmax = betaz[n] - icz;
2168 }
2169
2170 for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++) {
2171
2172 if (domain->yperiodic) {
2173 jmin = 0;
2174 jmax = ny_msm[n]-1;
2175 } else {
2176 jmin = alpha[n] - icy;
2177 jmax = betay[n] - icy;
2178 }
2179
2180 for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++) {
2181
2182 if (domain->xperiodic) {
2183 imin = 0;
2184 imax = nx_msm[n]-1;
2185 } else {
2186 imin = alpha[n] - icx;
2187 imax = betax[n] - icx;
2188 }
2189
2190 qtmp = qgridn[icz][icy][icx]; // center grid point
2191
2192 // use hemisphere to avoid double computation of pair-wise
2193 // interactions in direct sum (no computations in -z direction)
2194
2195 for (iz = 1; iz <= kmax; iz++) {
2196 kk = icz+iz;
2197 zk = (iz + nz_top)*ny;
2198 for (iy = jmin; iy <= jmax; iy++) {
2199 jj = icy+iy;
2200 zyk = (zk + iy + ny_top)*nx;
2201 for (ix = imin; ix <= imax; ix++) {
2202 ii = icx+ix;
2203 k = zyk + ix + nx_top;
2204 v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
2205 v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
2206 v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
2207 v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
2208 v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
2209 v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
2210 }
2211 }
2212 }
2213
2214 // iz=0
2215
2216 iz = 0;
2217 kk = icz+iz;
2218 zk = (iz + nz_top)*ny;
2219 for (iy = 1; iy <= jmax; iy++) {
2220 jj = icy+iy;
2221 zyk = (zk + iy + ny_top)*nx;
2222 for (ix = imin; ix <= imax; ix++) {
2223 ii = icx+ix;
2224 k = zyk + ix + nx_top;
2225 v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
2226 v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
2227 v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
2228 v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
2229 v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
2230 v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
2231 }
2232 }
2233
2234 // iz=0, iy=0
2235
2236 iz = 0;
2237 kk = icz+iz;
2238 zk = (iz + nz_top)*ny;
2239 iy = 0;
2240 jj = icy+iy;
2241 zyk = (zk + iy + ny_top)*nx;
2242 for (ix = 1; ix <= imax; ix++) {
2243 ii = icx+ix;
2244 k = zyk + ix + nx_top;
2245 v0gridn[kk][jj][ii] += v0_direct_top[k] * qtmp;
2246 v1gridn[kk][jj][ii] += v1_direct_top[k] * qtmp;
2247 v2gridn[kk][jj][ii] += v2_direct_top[k] * qtmp;
2248 v3gridn[kk][jj][ii] += v3_direct_top[k] * qtmp;
2249 v4gridn[kk][jj][ii] += v4_direct_top[k] * qtmp;
2250 v5gridn[kk][jj][ii] += v5_direct_top[k] * qtmp;
2251 }
2252
2253 // virial is zero for iz=0, iy=0, ix=0
2254
2255 }
2256 }
2257 }
2258 }
2259
2260 /* ----------------------------------------------------------------------
2261 MSM restriction procedure for intermediate grid levels, interpolate
2262 charges from finer grid to coarser grid
2263 ------------------------------------------------------------------------- */
2264
restriction(int n)2265 void MSM::restriction(int n)
2266 {
2267 const int p = order-1;
2268
2269 double ***qgrid1 = qgrid[n];
2270 double ***qgrid2 = qgrid[n+1];
2271
2272 int k = 0;
2273 int *index = new int[p+2];
2274 for (int nu=-p; nu<=p; nu++) {
2275 if (nu%2 == 0 && nu != 0) continue;
2276 phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
2277 phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
2278 phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
2279 index[k] = nu;
2280 k++;
2281 }
2282
2283 int ip,jp,kp,ic,jc,kc,i,j;
2284 int ii,jj,kk;
2285 double phiz,phizy,q2sum;
2286
2287 // zero out charge on coarser grid
2288
2289 memset(&(qgrid2[nzlo_out[n+1]][nylo_out[n+1]][nxlo_out[n+1]]),0,ngrid[n+1]*sizeof(double));
2290
2291 for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
2292 for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
2293 for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
2294
2295 ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
2296 jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
2297 kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
2298
2299 q2sum = 0.0;
2300
2301 for (k=0; k<=p+1; k++) {
2302 kk = kc+index[k];
2303 if (!domain->zperiodic) {
2304 if (kk < alpha[n]) continue;
2305 if (kk > betaz[n]) break;
2306 }
2307 phiz = phi1d[2][k];
2308 for (j=0; j<=p+1; j++) {
2309 jj = jc+index[j];
2310 if (!domain->yperiodic) {
2311 if (jj < alpha[n]) continue;
2312 if (jj > betay[n]) break;
2313 }
2314 phizy = phi1d[1][j]*phiz;
2315 for (i=0; i<=p+1; i++) {
2316 ii = ic+index[i];
2317 if (!domain->xperiodic) {
2318 if (ii < alpha[n]) continue;
2319 if (ii > betax[n]) break;
2320 }
2321 q2sum += qgrid1[kk][jj][ii] *
2322 phi1d[0][i]*phizy;
2323 }
2324 }
2325 }
2326 qgrid2[kp][jp][ip] += q2sum;
2327 }
2328 delete[] index;
2329 }
2330
2331 /* ----------------------------------------------------------------------
2332 MSM prolongation procedure for intermediate grid levels, interpolate
2333 per-atom energy/virial from coarser grid to finer grid
2334 ------------------------------------------------------------------------- */
2335
prolongation(int n)2336 void MSM::prolongation(int n)
2337 {
2338 const int p = order-1;
2339
2340 double ***egrid1 = egrid[n];
2341 double ***egrid2 = egrid[n+1];
2342
2343 double ***v0grid1 = v0grid[n];
2344 double ***v0grid2 = v0grid[n+1];
2345 double ***v1grid1 = v1grid[n];
2346 double ***v1grid2 = v1grid[n+1];
2347 double ***v2grid1 = v2grid[n];
2348 double ***v2grid2 = v2grid[n+1];
2349 double ***v3grid1 = v3grid[n];
2350 double ***v3grid2 = v3grid[n+1];
2351 double ***v4grid1 = v4grid[n];
2352 double ***v4grid2 = v4grid[n+1];
2353 double ***v5grid1 = v5grid[n];
2354 double ***v5grid2 = v5grid[n+1];
2355
2356 int k = 0;
2357 int *index = new int[p+2];
2358 for (int nu=-p; nu<=p; nu++) {
2359 if (nu%2 == 0 && nu != 0) continue;
2360 phi1d[0][k] = compute_phi(nu*delxinv[n+1]/delxinv[n]);
2361 phi1d[1][k] = compute_phi(nu*delyinv[n+1]/delyinv[n]);
2362 phi1d[2][k] = compute_phi(nu*delzinv[n+1]/delzinv[n]);
2363 index[k] = nu;
2364 k++;
2365 }
2366
2367 int ip,jp,kp,ic,jc,kc,i,j;
2368 int ii,jj,kk;
2369 double phiz,phizy,phi3d;
2370 double etmp2,v0tmp2,v1tmp2,v2tmp2,v3tmp2,v4tmp2,v5tmp2;
2371
2372 for (kp = nzlo_in[n+1]; kp <= nzhi_in[n+1]; kp++)
2373 for (jp = nylo_in[n+1]; jp <= nyhi_in[n+1]; jp++)
2374 for (ip = nxlo_in[n+1]; ip <= nxhi_in[n+1]; ip++) {
2375
2376 ic = ip * static_cast<int> (delxinv[n]/delxinv[n+1]);
2377 jc = jp * static_cast<int> (delyinv[n]/delyinv[n+1]);
2378 kc = kp * static_cast<int> (delzinv[n]/delzinv[n+1]);
2379
2380 etmp2 = egrid2[kp][jp][ip];
2381
2382 if (vflag_atom) {
2383 v0tmp2 = v0grid2[kp][jp][ip];
2384 v1tmp2 = v1grid2[kp][jp][ip];
2385 v2tmp2 = v2grid2[kp][jp][ip];
2386 v3tmp2 = v3grid2[kp][jp][ip];
2387 v4tmp2 = v4grid2[kp][jp][ip];
2388 v5tmp2 = v5grid2[kp][jp][ip];
2389 }
2390
2391 for (k=0; k<=p+1; k++) {
2392 kk = kc+index[k];
2393 if (!domain->zperiodic) {
2394 if (kk < alpha[n]) continue;
2395 if (kk > betaz[n]) break;
2396 }
2397 phiz = phi1d[2][k];
2398 for (j=0; j<=p+1; j++) {
2399 jj = jc+index[j];
2400 if (!domain->yperiodic) {
2401 if (jj < alpha[n]) continue;
2402 if (jj > betay[n]) break;
2403 }
2404 phizy = phi1d[1][j]*phiz;
2405 for (i=0; i<=p+1; i++) {
2406 ii = ic+index[i];
2407 if (!domain->xperiodic) {
2408 if (ii < alpha[n]) continue;
2409 if (ii > betax[n]) break;
2410 }
2411 phi3d = phi1d[0][i]*phizy;
2412
2413 egrid1[kk][jj][ii] += etmp2 * phi3d;
2414
2415 if (vflag_atom) {
2416 v0grid1[kk][jj][ii] += v0tmp2 * phi3d;
2417 v1grid1[kk][jj][ii] += v1tmp2 * phi3d;
2418 v2grid1[kk][jj][ii] += v2tmp2 * phi3d;
2419 v3grid1[kk][jj][ii] += v3tmp2 * phi3d;
2420 v4grid1[kk][jj][ii] += v4tmp2 * phi3d;
2421 v5grid1[kk][jj][ii] += v5tmp2 * phi3d;
2422 }
2423
2424 }
2425 }
2426 }
2427
2428 }
2429 delete[] index;
2430 }
2431
2432 /* ----------------------------------------------------------------------
2433 Use MPI_Allreduce to fill ghost grid values, for coarse grids this may
2434 be cheaper than using nearest-neighbor communication (commgrid), right
2435 now only works for periodic boundary conditions
2436 ------------------------------------------------------------------------- */
2437
grid_swap_forward(int n,double *** & gridn)2438 void MSM::grid_swap_forward(int n, double*** &gridn)
2439 {
2440 double ***gridn_tmp;
2441 memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp");
2442
2443 double ***gridn_all;
2444 memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all");
2445
2446 int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n];
2447
2448 memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double));
2449 memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double));
2450
2451 // copy inner grid cell values from gridn into gridn_tmp
2452
2453 int icx,icy,icz;
2454
2455 for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++)
2456 for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++)
2457 for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++)
2458 gridn_tmp[icz][icy][icx] = gridn[icz][icy][icx];
2459
2460 MPI_Allreduce(&(gridn_tmp[0][0][0]),
2461 &(gridn_all[0][0][0]),
2462 ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]);
2463
2464 // bitmask for PBCs (only works for power of 2 numbers)
2465
2466 int PBCx,PBCy,PBCz;
2467
2468 PBCx = nx_msm[n]-1;
2469 PBCy = ny_msm[n]-1;
2470 PBCz = nz_msm[n]-1;
2471
2472 // copy from gridn_all into gridn to fill ghost grid cell values
2473
2474 for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++)
2475 for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++)
2476 for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++)
2477 gridn[icz][icy][icx] = gridn_all[icz&PBCz][icy&PBCy][icx&PBCx];
2478
2479 memory->destroy(gridn_tmp);
2480 memory->destroy(gridn_all);
2481 }
2482
2483 /* ----------------------------------------------------------------------
2484 Use MPI_Allreduce to get contribution from ghost grid cells, for coarse
2485 grids this may be cheaper than using nearest-neighbor communication
2486 (commgrid), right now only works for periodic boundary conditions
2487 ------------------------------------------------------------------------- */
2488
grid_swap_reverse(int n,double *** & gridn)2489 void MSM::grid_swap_reverse(int n, double*** &gridn)
2490 {
2491 double ***gridn_tmp;
2492 memory->create(gridn_tmp,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_tmp");
2493
2494 double ***gridn_all;
2495 memory->create(gridn_all,nz_msm[n],ny_msm[n],nx_msm[n],"msm:grid_all");
2496
2497 int ngrid_in = nx_msm[n] * ny_msm[n] * nz_msm[n];
2498
2499 memset(&(gridn_tmp[0][0][0]),0,ngrid_in*sizeof(double));
2500 memset(&(gridn_all[0][0][0]),0,ngrid_in*sizeof(double));
2501
2502 // bitmask for PBCs (only works for power of 2 numbers)
2503
2504 int icx,icy,icz;
2505 int PBCx,PBCy,PBCz;
2506
2507 PBCx = nx_msm[n]-1;
2508 PBCy = ny_msm[n]-1;
2509 PBCz = nz_msm[n]-1;
2510
2511 // copy ghost grid cell values from gridn into inner portion of gridn_tmp
2512
2513 for (icz = nzlo_out[n]; icz <= nzhi_out[n]; icz++)
2514 for (icy = nylo_out[n]; icy <= nyhi_out[n]; icy++)
2515 for (icx = nxlo_out[n]; icx <= nxhi_out[n]; icx++)
2516 gridn_tmp[icz&PBCz][icy&PBCy][icx&PBCx] += gridn[icz][icy][icx];
2517
2518 MPI_Allreduce(&(gridn_tmp[0][0][0]),
2519 &(gridn_all[0][0][0]),
2520 ngrid_in,MPI_DOUBLE,MPI_SUM,world_levels[n]);
2521
2522 // copy inner grid cell values from gridn_all into gridn
2523
2524 for (icz = nzlo_in[n]; icz <= nzhi_in[n]; icz++)
2525 for (icy = nylo_in[n]; icy <= nyhi_in[n]; icy++)
2526 for (icx = nxlo_in[n]; icx <= nxhi_in[n]; icx++)
2527 gridn[icz][icy][icx] = gridn_all[icz][icy][icx];
2528
2529 memory->destroy(gridn_tmp);
2530 memory->destroy(gridn_all);
2531 }
2532
2533 /* ----------------------------------------------------------------------
2534 pack own values to buf to send to another proc (used by commgrid)
2535 ------------------------------------------------------------------------- */
2536
pack_forward_grid(int flag,void * vbuf,int nlist,int * list)2537 void MSM::pack_forward_grid(int flag, void *vbuf, int nlist, int *list)
2538 {
2539 double *buf = (double *) vbuf;
2540
2541 int n = current_level;
2542 int k = 0;
2543
2544 if (flag == FORWARD_RHO) {
2545 double ***qgridn = qgrid[n];
2546 double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2547 for (int i = 0; i < nlist; i++) {
2548 buf[k++] = qsrc[list[i]];
2549 }
2550 } else if (flag == FORWARD_AD) {
2551 double ***egridn = egrid[n];
2552 double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2553 for (int i = 0; i < nlist; i++)
2554 buf[i] = src[list[i]];
2555 } else if (flag == FORWARD_AD_PERATOM) {
2556 double ***v0gridn = v0grid[n];
2557 double ***v1gridn = v1grid[n];
2558 double ***v2gridn = v2grid[n];
2559 double ***v3gridn = v3grid[n];
2560 double ***v4gridn = v4grid[n];
2561 double ***v5gridn = v5grid[n];
2562 double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2563 double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2564 double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2565 double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2566 double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2567 double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2568 for (int i = 0; i < nlist; i++) {
2569 buf[k++] = v0src[list[i]];
2570 buf[k++] = v1src[list[i]];
2571 buf[k++] = v2src[list[i]];
2572 buf[k++] = v3src[list[i]];
2573 buf[k++] = v4src[list[i]];
2574 buf[k++] = v5src[list[i]];
2575 }
2576 }
2577 }
2578
2579 /* ----------------------------------------------------------------------
2580 unpack another proc's own values from buf and set own ghost values
2581 ------------------------------------------------------------------------- */
2582
unpack_forward_grid(int flag,void * vbuf,int nlist,int * list)2583 void MSM::unpack_forward_grid(int flag, void *vbuf, int nlist, int *list)
2584 {
2585 double *buf = (double *) vbuf;
2586
2587 int n = current_level;
2588 int k = 0;
2589
2590 if (flag == FORWARD_RHO) {
2591 double ***qgridn = qgrid[n];
2592 double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2593 for (int i = 0; i < nlist; i++) {
2594 dest[list[i]] = buf[k++];
2595 }
2596 } else if (flag == FORWARD_AD) {
2597 double ***egridn = egrid[n];
2598 double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2599 for (int i = 0; i < nlist; i++)
2600 dest[list[i]] = buf[k++];
2601 } else if (flag == FORWARD_AD_PERATOM) {
2602 double ***v0gridn = v0grid[n];
2603 double ***v1gridn = v1grid[n];
2604 double ***v2gridn = v2grid[n];
2605 double ***v3gridn = v3grid[n];
2606 double ***v4gridn = v4grid[n];
2607 double ***v5gridn = v5grid[n];
2608 double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2609 double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2610 double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2611 double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2612 double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2613 double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2614 for (int i = 0; i < nlist; i++) {
2615 v0src[list[i]] = buf[k++];
2616 v1src[list[i]] = buf[k++];
2617 v2src[list[i]] = buf[k++];
2618 v3src[list[i]] = buf[k++];
2619 v4src[list[i]] = buf[k++];
2620 v5src[list[i]] = buf[k++];
2621 }
2622 }
2623 }
2624
2625 /* ----------------------------------------------------------------------
2626 pack ghost values into buf to send to another proc
2627 ------------------------------------------------------------------------- */
2628
pack_reverse_grid(int flag,void * vbuf,int nlist,int * list)2629 void MSM::pack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
2630 {
2631 double *buf = (double *) vbuf;
2632
2633 int n = current_level;
2634 int k = 0;
2635
2636 if (flag == REVERSE_RHO) {
2637 double ***qgridn = qgrid[n];
2638 double *qsrc = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2639 for (int i = 0; i < nlist; i++) {
2640 buf[k++] = qsrc[list[i]];
2641 }
2642 } else if (flag == REVERSE_AD) {
2643 double ***egridn = egrid[n];
2644 double *src = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2645 for (int i = 0; i < nlist; i++)
2646 buf[i] = src[list[i]];
2647 } else if (flag == REVERSE_AD_PERATOM) {
2648 double ***v0gridn = v0grid[n];
2649 double ***v1gridn = v1grid[n];
2650 double ***v2gridn = v2grid[n];
2651 double ***v3gridn = v3grid[n];
2652 double ***v4gridn = v4grid[n];
2653 double ***v5gridn = v5grid[n];
2654 double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2655 double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2656 double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2657 double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2658 double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2659 double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2660 for (int i = 0; i < nlist; i++) {
2661 buf[k++] = v0src[list[i]];
2662 buf[k++] = v1src[list[i]];
2663 buf[k++] = v2src[list[i]];
2664 buf[k++] = v3src[list[i]];
2665 buf[k++] = v4src[list[i]];
2666 buf[k++] = v5src[list[i]];
2667 }
2668 }
2669 }
2670
2671 /* ----------------------------------------------------------------------
2672 unpack another proc's ghost values from buf and add to own values
2673 ------------------------------------------------------------------------- */
2674
unpack_reverse_grid(int flag,void * vbuf,int nlist,int * list)2675 void MSM::unpack_reverse_grid(int flag, void *vbuf, int nlist, int *list)
2676 {
2677 double *buf = (double *) vbuf;
2678
2679 int n = current_level;
2680 int k = 0;
2681
2682 if (flag == REVERSE_RHO) {
2683 double ***qgridn = qgrid[n];
2684 double *dest = &qgridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2685 for (int i = 0; i < nlist; i++) {
2686 dest[list[i]] += buf[k++];
2687 }
2688 } else if (flag == REVERSE_AD) {
2689 double ***egridn = egrid[n];
2690 double *dest = &egridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2691 for (int i = 0; i < nlist; i++)
2692 dest[list[i]] += buf[k++];
2693 } else if (flag == REVERSE_AD_PERATOM) {
2694 double ***v0gridn = v0grid[n];
2695 double ***v1gridn = v1grid[n];
2696 double ***v2gridn = v2grid[n];
2697 double ***v3gridn = v3grid[n];
2698 double ***v4gridn = v4grid[n];
2699 double ***v5gridn = v5grid[n];
2700 double *v0src = &v0gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2701 double *v1src = &v1gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2702 double *v2src = &v2gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2703 double *v3src = &v3gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2704 double *v4src = &v4gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2705 double *v5src = &v5gridn[nzlo_out[n]][nylo_out[n]][nxlo_out[n]];
2706 for (int i = 0; i < nlist; i++) {
2707 v0src[list[i]] += buf[k++];
2708 v1src[list[i]] += buf[k++];
2709 v2src[list[i]] += buf[k++];
2710 v3src[list[i]] += buf[k++];
2711 v4src[list[i]] += buf[k++];
2712 v5src[list[i]] += buf[k++];
2713 }
2714 }
2715 }
2716
2717 /* ----------------------------------------------------------------------
2718 interpolate from grid to get force on my particles
2719 ------------------------------------------------------------------------- */
2720
fieldforce()2721 void MSM::fieldforce()
2722 {
2723 double ***egridn = egrid[0];
2724
2725 int i,l,m,n,nx,ny,nz,mx,my,mz;
2726 double dx,dy,dz;
2727 double phi_x,phi_y,phi_z;
2728 double dphi_x,dphi_y,dphi_z;
2729 double ekx,eky,ekz,etmp;
2730
2731
2732 // loop over my charges, interpolate electric field from nearby grid points
2733 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
2734 // (dx,dy,dz) = distance to "lower left" grid pt
2735 // (mx,my,mz) = global coords of moving stencil pt
2736 // ek = 3 components of E-field on particle
2737
2738 double *q = atom->q;
2739 double **x = atom->x;
2740 double **f = atom->f;
2741
2742 int nlocal = atom->nlocal;
2743
2744 for (i = 0; i < nlocal; i++) {
2745 nx = part2grid[i][0];
2746 ny = part2grid[i][1];
2747 nz = part2grid[i][2];
2748 dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
2749 dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
2750 dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
2751
2752 compute_phis_and_dphis(dx,dy,dz);
2753
2754 ekx = eky = ekz = 0.0;
2755 for (n = nlower; n <= nupper; n++) {
2756 mz = n+nz;
2757 phi_z = phi1d[2][n];
2758 dphi_z = dphi1d[2][n];
2759 for (m = nlower; m <= nupper; m++) {
2760 my = m+ny;
2761 phi_y = phi1d[1][m];
2762 dphi_y = dphi1d[1][m];
2763 for (l = nlower; l <= nupper; l++) {
2764 mx = l+nx;
2765 phi_x = phi1d[0][l];
2766 dphi_x = dphi1d[0][l];
2767 etmp = egridn[mz][my][mx];
2768 ekx += dphi_x*phi_y*phi_z*etmp;
2769 eky += phi_x*dphi_y*phi_z*etmp;
2770 ekz += phi_x*phi_y*dphi_z*etmp;
2771 }
2772 }
2773 }
2774
2775 ekx *= delxinv[0];
2776 eky *= delyinv[0];
2777 ekz *= delzinv[0];
2778
2779 // effectively divide by length for a triclinic system
2780
2781 if (triclinic) {
2782 double tmp[3];
2783 tmp[0] = ekx;
2784 tmp[1] = eky;
2785 tmp[2] = ekz;
2786 x2lamdaT(&tmp[0],&tmp[0]);
2787 ekx = tmp[0];
2788 eky = tmp[1];
2789 ekz = tmp[2];
2790 }
2791
2792 // convert E-field to force
2793
2794 const double qfactor = qqrd2e*scale*q[i];
2795 f[i][0] += qfactor*ekx;
2796 f[i][1] += qfactor*eky;
2797 f[i][2] += qfactor*ekz;
2798 }
2799 }
2800
2801 /* ----------------------------------------------------------------------
2802 interpolate from grid to get per-atom energy/virial
2803 ------------------------------------------------------------------------- */
2804
fieldforce_peratom()2805 void MSM::fieldforce_peratom()
2806 {
2807 int i,l,m,n,nx,ny,nz,mx,my,mz;
2808 double dx,dy,dz,x0,y0,z0;
2809 double u,v0,v1,v2,v3,v4,v5;
2810
2811 double ***egridn = egrid[0];
2812
2813 double ***v0gridn = v0grid[0];
2814 double ***v1gridn = v1grid[0];
2815 double ***v2gridn = v2grid[0];
2816 double ***v3gridn = v3grid[0];
2817 double ***v4gridn = v4grid[0];
2818 double ***v5gridn = v5grid[0];
2819
2820 // loop over my charges, interpolate from nearby grid points
2821 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
2822 // (dx,dy,dz) = distance to "lower left" grid pt
2823 // (mx,my,mz) = global coords of moving stencil pt
2824
2825 double *q = atom->q;
2826 double **x = atom->x;
2827
2828 int nlocal = atom->nlocal;
2829
2830 for (i = 0; i < nlocal; i++) {
2831 nx = part2grid[i][0];
2832 ny = part2grid[i][1];
2833 nz = part2grid[i][2];
2834 dx = nx - (x[i][0]-boxlo[0])*delxinv[0];
2835 dy = ny - (x[i][1]-boxlo[1])*delyinv[0];
2836 dz = nz - (x[i][2]-boxlo[2])*delzinv[0];
2837
2838 compute_phis_and_dphis(dx,dy,dz);
2839
2840 u = v0 = v1 = v2 = v3 = v4 = v5 = 0.0;
2841 for (n = nlower; n <= nupper; n++) {
2842 mz = n+nz;
2843 z0 = phi1d[2][n];
2844 for (m = nlower; m <= nupper; m++) {
2845 my = m+ny;
2846 y0 = z0*phi1d[1][m];
2847 for (l = nlower; l <= nupper; l++) {
2848 mx = l+nx;
2849 x0 = y0*phi1d[0][l];
2850 if (eflag_atom) u += x0*egridn[mz][my][mx];
2851 if (vflag_atom) {
2852 v0 += x0*v0gridn[mz][my][mx];
2853 v1 += x0*v1gridn[mz][my][mx];
2854 v2 += x0*v2gridn[mz][my][mx];
2855 v3 += x0*v3gridn[mz][my][mx];
2856 v4 += x0*v4gridn[mz][my][mx];
2857 v5 += x0*v5gridn[mz][my][mx];
2858 }
2859 }
2860 }
2861 }
2862
2863 if (eflag_atom) eatom[i] += q[i]*u;
2864 if (vflag_atom) {
2865 vatom[i][0] += q[i]*v0;
2866 vatom[i][1] += q[i]*v1;
2867 vatom[i][2] += q[i]*v2;
2868 vatom[i][3] += q[i]*v3;
2869 vatom[i][4] += q[i]*v4;
2870 vatom[i][5] += q[i]*v5;
2871 }
2872 }
2873 }
2874
2875 /* ----------------------------------------------------------------------
2876 charge assignment into phi1d (interpolation coefficients)
2877 ------------------------------------------------------------------------- */
2878
compute_phis(const double & dx,const double & dy,const double & dz)2879 void MSM::compute_phis(const double &dx, const double &dy,
2880 const double &dz)
2881 {
2882 double delx,dely,delz;
2883
2884 for (int nu = nlower; nu <= nupper; nu++) {
2885 delx = dx + double(nu);
2886 dely = dy + double(nu);
2887 delz = dz + double(nu);
2888
2889 phi1d[0][nu] = compute_phi(delx);
2890 phi1d[1][nu] = compute_phi(dely);
2891 phi1d[2][nu] = compute_phi(delz);
2892 }
2893 }
2894
2895 /* ----------------------------------------------------------------------
2896 charge assignment into phi1d and dphi1d (interpolation coefficients)
2897 ------------------------------------------------------------------------- */
2898
compute_phis_and_dphis(const double & dx,const double & dy,const double & dz)2899 void MSM::compute_phis_and_dphis(const double &dx, const double &dy,
2900 const double &dz)
2901 {
2902 double delx,dely,delz;
2903
2904 for (int nu = nlower; nu <= nupper; nu++) {
2905 delx = dx + double(nu);
2906 dely = dy + double(nu);
2907 delz = dz + double(nu);
2908
2909 phi1d[0][nu] = compute_phi(delx);
2910 phi1d[1][nu] = compute_phi(dely);
2911 phi1d[2][nu] = compute_phi(delz);
2912 dphi1d[0][nu] = compute_dphi(delx);
2913 dphi1d[1][nu] = compute_dphi(dely);
2914 dphi1d[2][nu] = compute_dphi(delz);
2915 }
2916 }
2917
2918 /* ----------------------------------------------------------------------
2919 compute phi using interpolating polynomial
2920 see Eq 7 from Parallel Computing 35 (2009) 164-177
2921 and Hardy's thesis
2922 ------------------------------------------------------------------------- */
2923
compute_phi(const double & xi)2924 inline double MSM::compute_phi(const double &xi)
2925 {
2926 double phi = 0.0;
2927 double abs_xi = fabs(xi);
2928 double xi2 = xi*xi;
2929
2930 if (order == 4) {
2931 if (abs_xi <= 1) {
2932 phi = (1.0 - abs_xi)*(1.0 + abs_xi - 1.5*xi2);
2933 } else if (abs_xi <= 2) {
2934 phi = -0.5*(abs_xi - 1.0)*(2.0 - abs_xi)*(2.0 - abs_xi);
2935 } else {
2936 phi = 0.0;
2937 }
2938
2939 } else if (order == 6) {
2940 if (abs_xi <= 1) {
2941 phi = (1.0 - xi2)*(2.0 - abs_xi)*(6.0 + 3.0*abs_xi -
2942 5.0*xi2)/12.0;
2943 } else if (abs_xi <= 2) {
2944 phi = -(abs_xi - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
2945 (4.0 + 9.0*abs_xi - 5.0*xi2)/24.0;
2946 } else if (abs_xi <= 3) {
2947 phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*
2948 (3.0 - abs_xi)*(4.0 - abs_xi)/24.0;
2949 } else {
2950 phi = 0.0;
2951 }
2952
2953 } else if (order == 8) {
2954 if (abs_xi <= 1) {
2955 phi = (1.0 - xi2)*(4.0 - xi2)*(3.0 - abs_xi)*
2956 (12.0 + 4.0*abs_xi - 7.0*xi2)/144.0;
2957 } else if (abs_xi <= 2) {
2958 phi = -(xi2 - 1.0)*(2.0 - abs_xi)*(3.0 - abs_xi)*
2959 (4.0 - abs_xi)*(10.0 + 12.0*abs_xi - 7.0*xi2)/240.0;
2960 } else if (abs_xi <= 3) {
2961 phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
2962 (5.0 - abs_xi)*(6.0 + 20.0*abs_xi - 7.0*xi2)/720.0;
2963 } else if (abs_xi <= 4) {
2964 phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
2965 (4.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)/720.0;
2966 } else {
2967 phi = 0.0;
2968 }
2969
2970 } else if (order == 10) {
2971 if (abs_xi <= 1) {
2972 phi = (1.0 - xi2)*(4.0 - xi2)*(9.0 - xi2)*
2973 (4.0 - abs_xi)*(20.0 + 5.0*abs_xi - 9.0*xi2)/2880.0;
2974 } else if (abs_xi <= 2) {
2975 phi = -(xi2 - 1.0)*(4.0 - xi2)*(3.0 - abs_xi)*(4.0 - abs_xi)*
2976 (5.0 - abs_xi)*(6.0 + 5.0*abs_xi - 3.0*xi2)/1440.0;
2977 } else if (abs_xi <= 3) {
2978 phi = (xi2 - 1.0)*(abs_xi - 2.0)*(3.0 - abs_xi)*(4.0 - abs_xi)*
2979 (5.0 - abs_xi)*(6.0 - abs_xi)*(14.0 + 25.0*abs_xi - 9.0*xi2)/10080.0;
2980 } else if (abs_xi <= 4) {
2981 phi = -(abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*(4.0 - abs_xi)*
2982 (5.0 - abs_xi)*(6.0 - abs_xi)*(7.0 - abs_xi)*
2983 (8.0 + 35.0*abs_xi - 9.0*xi2)/40320.0;
2984 } else if (abs_xi <= 5) {
2985 phi = (abs_xi - 1.0)*(abs_xi - 2.0)*(abs_xi - 3.0)*
2986 (abs_xi - 4.0)*(5.0 - abs_xi)*(5.0 - abs_xi)*(6.0 - abs_xi)*
2987 (7.0 - abs_xi)*(8.0 - abs_xi)/40320.0;
2988 } else {
2989 phi = 0.0;
2990 }
2991 }
2992
2993 return phi;
2994 }
2995
2996 /* ----------------------------------------------------------------------
2997 compute the derivative of phi
2998 phi is an interpolating polynomial
2999 see Eq 7 from Parallel Computing 35 (2009) 164-177
3000 and Hardy's thesis
3001 ------------------------------------------------------------------------- */
3002
compute_dphi(const double & xi)3003 inline double MSM::compute_dphi(const double &xi)
3004 {
3005 double dphi = 0.0;
3006 double abs_xi = fabs(xi);
3007
3008 if (order == 4) {
3009 double xi2 = xi*xi;
3010 double abs_xi2 = abs_xi*abs_xi;
3011 if (abs_xi == 0.0) {
3012 dphi = 0.0;
3013 } else if (abs_xi <= 1) {
3014 dphi = xi*(3*xi2 + 6*abs_xi2 - 10*abs_xi)/2.0/abs_xi;
3015 } else if (abs_xi <= 2) {
3016 dphi = xi*(2 - abs_xi)*(3*abs_xi - 4)/2.0/abs_xi;
3017 } else {
3018 dphi = 0.0;
3019 }
3020
3021 } else if (order == 6) {
3022 double xi2 = xi*xi;
3023 double xi4 = xi2*xi2;
3024 double abs_xi2 = abs_xi*abs_xi;
3025 double abs_xi3 = abs_xi2*abs_xi;
3026 double abs_xi4 = abs_xi2*abs_xi2;
3027 if (abs_xi == 0.0) {
3028 dphi = 0.0;
3029 } else if (abs_xi <= 1) {
3030 dphi = xi*(46*xi2*abs_xi - 20*xi2*abs_xi2 - 5*xi4 + 5*xi2 +
3031 6*abs_xi3 + 10*abs_xi2 - 50*abs_xi)/12.0/abs_xi;
3032 } else if (abs_xi <= 2) {
3033 dphi = xi*(15*xi2*abs_xi2 - 60*xi2*abs_xi + 55*xi2 +
3034 10*abs_xi4 - 96*abs_xi3 + 260*abs_xi2 - 210*abs_xi + 10)/
3035 24.0/abs_xi;
3036 } else if (abs_xi <= 3) {
3037 dphi = -xi*(abs_xi - 3)*(5*abs_xi3 - 37*abs_xi2 +
3038 84*abs_xi - 58)/24.0/abs_xi;
3039 } else {
3040 dphi = 0.0;
3041 }
3042
3043 } else if (order == 8) {
3044 double xi2 = xi*xi;
3045 double xi4 = xi2*xi2;
3046 double xi6 = xi4*xi2;
3047 double abs_xi3 = xi2*abs_xi;
3048 double abs_xi5 = xi4*abs_xi;
3049 if (abs_xi == 0.0) {
3050 dphi = 0.0;
3051 } else if (abs_xi <= 1) {
3052 dphi = xi*(49*xi6 - 175*xi4 + 84*xi2 - 150*abs_xi5 +
3053 644*abs_xi3 - 560*abs_xi)/144.0/abs_xi;
3054 } else if (abs_xi <= 2) {
3055 dphi = xi*(-49*xi6 - 1365*xi4 + 756*xi2 +
3056 450*abs_xi5 + 1260*abs_xi3 - 1260*abs_xi + 28)/240.0/abs_xi;
3057 } else if (abs_xi <= 3) {
3058 dphi = xi*(49*xi6 + 4445*xi4 + 17724*xi2 -
3059 750*abs_xi5 - 12740*abs_xi3 - 9940*abs_xi + 756)/720.0/abs_xi;
3060 } else if (abs_xi <= 4) {
3061 dphi = -xi*(abs_xi - 4)*(7*abs_xi5 - 122*xi4 +
3062 807*abs_xi3 - 2512*xi2 + 3644*abs_xi - 1944)/720.0/abs_xi;
3063 } else {
3064 dphi = 0.0;
3065 }
3066
3067 } else if (order == 10) {
3068 double xi2 = xi*xi;
3069 double xi4 = xi2*xi2;
3070 double xi6 = xi4*xi2;
3071 double xi8 = xi6*xi2;
3072 double abs_xi2 = abs_xi*abs_xi;
3073 double abs_xi3 = abs_xi2*abs_xi;
3074 double abs_xi4 = abs_xi2*abs_xi2;
3075 double abs_xi5 = abs_xi4*abs_xi;
3076 double abs_xi6 = abs_xi5*abs_xi;
3077 double abs_xi7 = abs_xi6*abs_xi;
3078 double abs_xi8 = abs_xi7*abs_xi;
3079 if (abs_xi == 0.0) {
3080 dphi = 0.0;
3081 } else if (abs_xi <= 1) {
3082 dphi = xi*(298*xi6*abs_xi - 72*xi6*abs_xi2 - 9*xi8 +
3083 126*xi6 + 30*xi4*abs_xi3 + 756*xi4*abs_xi2 - 3644*xi4*abs_xi -
3084 441*xi4 - 280*xi2*abs_xi3 - 1764*xi2*abs_xi2 + 12026*xi2*abs_xi +
3085 324*xi2 + 490*abs_xi3 + 648*abs_xi2 - 10792*abs_xi)/2880.0/abs_xi;
3086 } else if (abs_xi <= 2) {
3087 dphi = xi*(9*xi6*abs_xi2 - 72*xi6*abs_xi + 141*xi6 +
3088 18*xi4*abs_xi4 - 236*xi4*abs_xi3 + 963*xi4*abs_xi2 -
3089 1046*xi4*abs_xi - 687*xi4 - 20*xi2*abs_xi5 + 156*xi2*abs_xi4 +
3090 168*xi2*abs_xi3 - 3522*xi2*abs_xi2 + 6382*xi2*abs_xi + 474*xi2 +
3091 50*abs_xi5 - 516*abs_xi4 + 1262*abs_xi3 + 1596*abs_xi2 -
3092 6344*abs_xi + 72)/1440.0/abs_xi;
3093 } else if (abs_xi <= 3) {
3094 dphi = xi*(720*xi4*abs_xi3 - 45*xi4*abs_xi4 - 4185*xi4*abs_xi2 +
3095 10440*xi4*abs_xi - 9396*xi4 - 36*xi2*abs_xi6 + 870*xi2*abs_xi5 -
3096 7965*xi2*abs_xi4 + 34540*xi2*abs_xi3 - 70389*xi2*abs_xi2 +
3097 51440*xi2*abs_xi + 6012*xi2 + 50*abs_xi7 - 954*abs_xi6 +
3098 6680*abs_xi5 - 19440*abs_xi4 + 11140*abs_xi3 + 49014*abs_xi2 -
3099 69080*abs_xi + 3384)/10080.0/abs_xi;
3100 } else if (abs_xi <= 4) {
3101 dphi = xi*(63*xi2*abs_xi6 - 1512*xi2*abs_xi5 + 14490*xi2*abs_xi4 -
3102 70560*xi2*abs_xi3 + 182763*xi2*abs_xi2 - 236376*xi2*abs_xi +
3103 117612*xi2 + 18*abs_xi8 - 784*abs_xi7 + 12600*abs_xi6 -
3104 101556*abs_xi5 + 451962*abs_xi4 - 1121316*abs_xi3 +
3105 1451628*abs_xi2 - 795368*abs_xi + 71856)/40320.0/abs_xi;
3106 } else if (abs_xi <= 5) {
3107 dphi = -xi*(abs_xi - 5)*(9*abs_xi7 - 283*abs_xi6 +
3108 3667*abs_xi5 - 25261*abs_xi4 + 99340*abs_xi3 -
3109 221416*abs_xi2 + 256552*abs_xi - 117648)/40320.0/abs_xi;
3110 } else {
3111 dphi = 0.0;
3112 }
3113 }
3114
3115 return dphi;
3116 }
3117
3118 /* ----------------------------------------------------------------------
3119 Compute direct interaction (energy) weights for intermediate grid levels
3120 ------------------------------------------------------------------------- */
get_g_direct()3121 void MSM::get_g_direct()
3122 {
3123 if (g_direct) memory->destroy(g_direct);
3124 memory->create(g_direct,levels,nmax_direct,"msm:g_direct");
3125
3126 double a = cutoff;
3127
3128 int n,zk,zyk,k,ix,iy,iz;
3129 double xdiff,ydiff,zdiff;
3130 double dx,dy,dz;
3131 double tmp[3];
3132 double rsq,rho,two_n;
3133
3134 two_n = 1.0;
3135
3136 int nx = nxhi_direct - nxlo_direct + 1;
3137 int ny = nyhi_direct - nylo_direct + 1;
3138
3139 for (n=0; n<levels; n++) {
3140
3141 for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
3142 zdiff = iz/delzinv[n];
3143 zk = (iz + nzhi_direct)*ny;
3144 for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
3145 ydiff = iy/delyinv[n];
3146 zyk = (zk + iy + nyhi_direct)*nx;
3147 for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
3148 xdiff = ix/delxinv[n];
3149
3150 // transform grid point pair-wise distance from lamda (0-1) coords to box coords
3151
3152 if (triclinic) {
3153 tmp[0] = xdiff;
3154 tmp[1] = ydiff;
3155 tmp[2] = zdiff;
3156 lamda2xvector(&tmp[0],&tmp[0]);
3157 dx = tmp[0];
3158 dy = tmp[1];
3159 dz = tmp[2];
3160 } else {
3161 dx = xdiff;
3162 dy = ydiff;
3163 dz = zdiff;
3164 }
3165
3166 rsq = dx*dx + dy*dy + dz*dz;
3167
3168 rho = sqrt(rsq)/(two_n*a);
3169 k = zyk + ix + nxhi_direct;
3170 g_direct[n][k] = gamma(rho)/(two_n*a) - gamma(rho/2.0)/(2.0*two_n*a);
3171 }
3172 }
3173 }
3174 two_n *= 2.0;
3175 }
3176 }
3177
3178 /* ----------------------------------------------------------------------
3179 Compute direct interaction (virial) weights for intermediate grid levels
3180 ------------------------------------------------------------------------- */
3181
get_virial_direct()3182 void MSM::get_virial_direct()
3183 {
3184 if (v0_direct) memory->destroy(v0_direct);
3185 memory->create(v0_direct,levels,nmax_direct,"msm:v0_direct");
3186 if (v1_direct) memory->destroy(v1_direct);
3187 memory->create(v1_direct,levels,nmax_direct,"msm:v1_direct");
3188 if (v2_direct) memory->destroy(v2_direct);
3189 memory->create(v2_direct,levels,nmax_direct,"msm:v2_direct");
3190 if (v3_direct) memory->destroy(v3_direct);
3191 memory->create(v3_direct,levels,nmax_direct,"msm:v3_direct");
3192 if (v4_direct) memory->destroy(v4_direct);
3193 memory->create(v4_direct,levels,nmax_direct,"msm:v4_direct");
3194 if (v5_direct) memory->destroy(v5_direct);
3195 memory->create(v5_direct,levels,nmax_direct,"msm:v5_direct");
3196
3197 double a = cutoff;
3198 double a_sq = cutoff*cutoff;
3199
3200 int n,zk,zyk,k,ix,iy,iz;
3201 double xdiff,ydiff,zdiff;
3202 double dx,dy,dz;
3203 double tmp[3];
3204 double rsq,r,rho,two_n,two_nsq,dg;
3205
3206 two_n = 1.0;
3207
3208 int nx = nxhi_direct - nxlo_direct + 1;
3209 int ny = nyhi_direct - nylo_direct + 1;
3210
3211 for (n=0; n<levels; n++) {
3212 two_nsq = two_n * two_n;
3213
3214 for (iz = nzlo_direct; iz <= nzhi_direct; iz++) {
3215 zdiff = iz/delzinv[n];
3216 zk = (iz + nzhi_direct)*ny;
3217 for (iy = nylo_direct; iy <= nyhi_direct; iy++) {
3218 ydiff = iy/delyinv[n];
3219 zyk = (zk + iy + nyhi_direct)*nx;
3220 for (ix = nxlo_direct; ix <= nxhi_direct; ix++) {
3221 xdiff = ix/delxinv[n];
3222
3223 if (triclinic) {
3224 tmp[0] = xdiff;
3225 tmp[1] = ydiff;
3226 tmp[2] = zdiff;
3227 lamda2xvector(&tmp[0],&tmp[0]);
3228 dx = tmp[0];
3229 dy = tmp[1];
3230 dz = tmp[2];
3231 } else {
3232 dx = xdiff;
3233 dy = ydiff;
3234 dz = zdiff;
3235 }
3236
3237 rsq = dx*dx + dy*dy + dz*dz;
3238 k = zyk + ix + nxhi_direct;
3239 r = sqrt(rsq);
3240 if (r == 0) {
3241 v0_direct[n][k] = 0.0;
3242 v1_direct[n][k] = 0.0;
3243 v2_direct[n][k] = 0.0;
3244 v3_direct[n][k] = 0.0;
3245 v4_direct[n][k] = 0.0;
3246 v5_direct[n][k] = 0.0;
3247 } else {
3248 rho = r/(two_n*a);
3249 dg = -(dgamma(rho)/(two_nsq*a_sq) -
3250 dgamma(rho/2.0)/(4.0*two_nsq*a_sq))/r;
3251 v0_direct[n][k] = dg * dx * dx;
3252 v1_direct[n][k] = dg * dy * dy;
3253 v2_direct[n][k] = dg * dz * dz;
3254 v3_direct[n][k] = dg * dx * dy;
3255 v4_direct[n][k] = dg * dx * dz;
3256 v5_direct[n][k] = dg * dy * dz;
3257 }
3258
3259 }
3260 }
3261 }
3262 two_n *= 2.0;
3263 }
3264 }
3265
3266 /* ----------------------------------------------------------------------
3267 Compute direct interaction (energy) weights for top grid level
3268 (nonperiodic systems only)
3269 ------------------------------------------------------------------------- */
3270
get_g_direct_top(int n)3271 void MSM::get_g_direct_top(int n)
3272 {
3273 int nx_top = betax[n] - alpha[n];
3274 int ny_top = betay[n] - alpha[n];
3275 int nz_top = betaz[n] - alpha[n];
3276
3277 int nx = 2*nx_top + 1;
3278 int ny = 2*ny_top + 1;
3279 int nz = 2*nz_top + 1;
3280
3281 int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
3282
3283 if (g_direct_top) memory->destroy(g_direct_top);
3284 memory->create(g_direct_top,nmax_top,"msm:g_direct_top");
3285
3286 double a = cutoff;
3287
3288 int zk,zyk,k,ix,iy,iz;
3289 double xdiff,ydiff,zdiff;
3290 double dx,dy,dz;
3291 double tmp[3];
3292 double rsq,rho,two_n;
3293
3294 two_n = pow(2.0,n);
3295
3296 for (iz = -nz_top; iz <= nz_top; iz++) {
3297 zdiff = iz/delzinv[n];
3298 zk = (iz + nz_top)*ny;
3299 for (iy = -ny_top; iy <= ny_top; iy++) {
3300 ydiff = iy/delyinv[n];
3301 zyk = (zk + iy + ny_top)*nx;
3302 for (ix = -nx_top; ix <= nx_top; ix++) {
3303 xdiff = ix/delxinv[n];
3304
3305 if (triclinic) {
3306 tmp[0] = xdiff;
3307 tmp[1] = ydiff;
3308 tmp[2] = zdiff;
3309 lamda2xvector(&tmp[0],&tmp[0]);
3310 dx = tmp[0];
3311 dy = tmp[1];
3312 dz = tmp[2];
3313 } else {
3314 dx = xdiff;
3315 dy = ydiff;
3316 dz = zdiff;
3317 }
3318
3319 rsq = dx*dx + dy*dy + dz*dz;
3320 rho = sqrt(rsq)/(two_n*a);
3321 k = zyk + ix + nx_top;
3322 g_direct_top[k] = gamma(rho)/(two_n*a);
3323 }
3324 }
3325 }
3326 }
3327
3328 /* ----------------------------------------------------------------------
3329 Compute direct interaction (virial) weights for top grid level
3330 (nonperiodic systems only)
3331 ------------------------------------------------------------------------- */
3332
get_virial_direct_top(int n)3333 void MSM::get_virial_direct_top(int n)
3334 {
3335 int nx_top = betax[n] - alpha[n];
3336 int ny_top = betay[n] - alpha[n];
3337 int nz_top = betaz[n] - alpha[n];
3338
3339 int nx = 2*nx_top + 1;
3340 int ny = 2*ny_top + 1;
3341 int nz = 2*nz_top + 1;
3342
3343 int nmax_top = 8*(nx+1)*(ny*1)*(nz+1);
3344
3345 if (v0_direct_top) memory->destroy(v0_direct_top);
3346 memory->create(v0_direct_top,nmax_top,"msm:v0_direct_top");
3347 if (v1_direct_top) memory->destroy(v1_direct_top);
3348 memory->create(v1_direct_top,nmax_top,"msm:v1_direct_top");
3349 if (v2_direct_top) memory->destroy(v2_direct_top);
3350 memory->create(v2_direct_top,nmax_top,"msm:v2_direct_top");
3351 if (v3_direct_top) memory->destroy(v3_direct_top);
3352 memory->create(v3_direct_top,nmax_top,"msm:v3_direct_top");
3353 if (v4_direct_top) memory->destroy(v4_direct_top);
3354 memory->create(v4_direct_top,nmax_top,"msm:v4_direct_top");
3355 if (v5_direct_top) memory->destroy(v5_direct_top);
3356 memory->create(v5_direct_top,nmax_top,"msm:v5_direct_top");
3357
3358 double a = cutoff;
3359 double a_sq = cutoff*cutoff;
3360
3361 int zk,zyk,k,ix,iy,iz;
3362 double xdiff,ydiff,zdiff;
3363 double dx,dy,dz;
3364 double tmp[3];
3365 double rsq,r,rho,two_n,two_nsq,dg;
3366
3367 two_n = pow(2.0,n);
3368 two_nsq = two_n * two_n;
3369
3370 for (iz = -nz_top; iz <= nz_top; iz++) {
3371 zdiff = iz/delzinv[n];
3372 zk = (iz + nz_top)*ny;
3373 for (iy = -ny_top; iy <= ny_top; iy++) {
3374 ydiff = iy/delyinv[n];
3375 zyk = (zk + iy + ny_top)*nx;
3376 for (ix = -nx_top; ix <= nx_top; ix++) {
3377 xdiff = ix/delxinv[n];
3378 if (triclinic) {
3379 tmp[0] = xdiff;
3380 tmp[1] = ydiff;
3381 tmp[2] = zdiff;
3382 lamda2xvector(&tmp[0],&tmp[0]);
3383 dx = tmp[0];
3384 dy = tmp[1];
3385 dz = tmp[2];
3386 } else {
3387 dx = xdiff;
3388 dy = ydiff;
3389 dz = zdiff;
3390 }
3391
3392 rsq = dx*dx + dy*dy + dz*dz;
3393 k = zyk + ix + nx_top;
3394 r = sqrt(rsq);
3395 if (r == 0) {
3396 v0_direct_top[k] = 0.0;
3397 v1_direct_top[k] = 0.0;
3398 v2_direct_top[k] = 0.0;
3399 v3_direct_top[k] = 0.0;
3400 v4_direct_top[k] = 0.0;
3401 v5_direct_top[k] = 0.0;
3402 } else {
3403 rho = r/(two_n*a);
3404 dg = -(dgamma(rho)/(two_nsq*a_sq))/r;
3405 v0_direct_top[k] = dg * dx * dx;
3406 v1_direct_top[k] = dg * dy * dy;
3407 v2_direct_top[k] = dg * dz * dz;
3408 v3_direct_top[k] = dg * dx * dy;
3409 v4_direct_top[k] = dg * dx * dz;
3410 v5_direct_top[k] = dg * dy * dz;
3411 }
3412 }
3413 }
3414 }
3415 }
3416
3417 /* ----------------------------------------------------------------------
3418 memory usage of local arrays
3419 ------------------------------------------------------------------------- */
3420
memory_usage()3421 double MSM::memory_usage()
3422 {
3423 double bytes = 0;
3424
3425 // NOTE: Stan, fill in other memory allocations here
3426
3427 // all GridComm bufs
3428
3429 bytes += (double)(ngcall_buf1 + ngcall_buf2) * npergrid * sizeof(double);
3430
3431 for (int n=0; n<levels; n++)
3432 if (active_flag[n])
3433 bytes += (double)(ngc_buf1[n] + ngc_buf2[n]) * npergrid * sizeof(double);
3434
3435 return bytes;
3436 }
3437