1 // clang-format off
2 /* -------------------------------------------------------------------------
3    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4    https://www.lammps.org/, Sandia National Laboratories
5    Steve Plimpton, sjplimp@sandia.gov
6 
7    Copyright (2003) Sandia Corporation.  Under the terms of Contract
8    DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9    certain rights in this software.  This software is distributed under
10    the GNU General Public License.
11 
12    See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14 
15 /* ----------------------------------------------------------------------
16    Contributing author: Axel Kohlmeyer (Temple U)
17    OpenMP based threading support for LAMMPS
18 ------------------------------------------------------------------------- */
19 
20 #include <cstring>
21 
22 #include "atom.h"
23 #include "comm.h"
24 #include "error.h"
25 #include "force.h"
26 #include "modify.h"
27 #include "neighbor.h"
28 
29 
30 #include "thr_omp.h"
31 
32 #include "pair.h"
33 #include "bond.h"
34 #include "angle.h"
35 #include "dihedral.h"
36 #include "improper.h"
37 #include "compute.h"
38 
39 #include "math_const.h"
40 
41 using namespace LAMMPS_NS;
42 using namespace MathConst;
43 
44 /* ---------------------------------------------------------------------- */
45 
ThrOMP(LAMMPS * ptr,int style)46 ThrOMP::ThrOMP(LAMMPS *ptr, int style)
47   : lmp(ptr), fix(nullptr), thr_style(style), thr_error(0)
48 {
49   // register fix omp with this class
50   int ifix = lmp->modify->find_fix("package_omp");
51   if (ifix < 0)
52     lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles");
53   fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
54 }
55 
56 /* ---------------------------------------------------------------------- */
57 
~ThrOMP()58 ThrOMP::~ThrOMP()
59 {
60   // nothing to do?
61 }
62 
63 /* ----------------------------------------------------------------------
64    Hook up per thread per atom arrays into the tally infrastructure
65    ---------------------------------------------------------------------- */
66 
ev_setup_thr(int eflag,int vflag,int nall,double * eatom,double ** vatom,double ** cvatom,ThrData * thr)67 void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom,
68                           double **vatom, double **cvatom, ThrData *thr)
69 {
70   const int tid = thr->get_tid();
71   if (tid == 0) thr_error = 0;
72 
73   if (thr_style & THR_PAIR) {
74     if (eflag & ENERGY_ATOM) {
75       thr->eatom_pair = eatom + tid*nall;
76       if (nall > 0)
77         memset(&(thr->eatom_pair[0]),0,nall*sizeof(double));
78     }
79     // per-atom virial and per-atom centroid virial are the same for two-body
80     // many-body pair styles not yet implemented
81     if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
82       thr->vatom_pair = vatom + tid*nall;
83       if (nall > 0)
84         memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double));
85     }
86     // check cvatom_pair, because can't access centroidstressflag
87     if ((vflag & VIRIAL_CENTROID) && cvatom) {
88       thr->cvatom_pair = cvatom + tid*nall;
89       if (nall > 0)
90         memset(&(thr->cvatom_pair[0][0]),0,nall*9*sizeof(double));
91     } else {
92       thr->cvatom_pair = nullptr;
93     }
94 
95   }
96 
97   if (thr_style & THR_BOND) {
98     if (eflag & ENERGY_ATOM) {
99       thr->eatom_bond = eatom + tid*nall;
100       if (nall > 0)
101         memset(&(thr->eatom_bond[0]),0,nall*sizeof(double));
102     }
103     // per-atom virial and per-atom centroid virial are the same for bonds
104     if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
105       thr->vatom_bond = vatom + tid*nall;
106       if (nall > 0)
107         memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double));
108     }
109   }
110 
111   if (thr_style & THR_ANGLE) {
112     if (eflag & ENERGY_ATOM) {
113       thr->eatom_angle = eatom + tid*nall;
114       if (nall > 0)
115         memset(&(thr->eatom_angle[0]),0,nall*sizeof(double));
116     }
117     if (vflag & VIRIAL_ATOM) {
118       thr->vatom_angle = vatom + tid*nall;
119       if (nall > 0)
120         memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double));
121     }
122     if (vflag & VIRIAL_CENTROID) {
123       thr->cvatom_angle = cvatom + tid*nall;
124       if (nall > 0)
125         memset(&(thr->cvatom_angle[0][0]),0,nall*9*sizeof(double));
126     }
127   }
128 
129   if (thr_style & THR_DIHEDRAL) {
130     if (eflag & ENERGY_ATOM) {
131       thr->eatom_dihed = eatom + tid*nall;
132       if (nall > 0)
133         memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double));
134     }
135     if (vflag & VIRIAL_ATOM) {
136       thr->vatom_dihed = vatom + tid*nall;
137       if (nall > 0)
138         memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double));
139     }
140     if (vflag & VIRIAL_CENTROID) {
141       thr->cvatom_dihed = cvatom + tid*nall;
142       if (nall > 0)
143         memset(&(thr->cvatom_dihed[0][0]),0,nall*9*sizeof(double));
144     }
145   }
146 
147   if (thr_style & THR_IMPROPER) {
148     if (eflag & ENERGY_ATOM) {
149       thr->eatom_imprp = eatom + tid*nall;
150       if (nall > 0)
151         memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double));
152     }
153     if (vflag & VIRIAL_ATOM) {
154       thr->vatom_imprp = vatom + tid*nall;
155       if (nall > 0)
156         memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double));
157     }
158     if (vflag & VIRIAL_CENTROID) {
159       thr->cvatom_imprp = cvatom + tid*nall;
160       if (nall > 0)
161         memset(&(thr->cvatom_imprp[0][0]),0,nall*9*sizeof(double));
162     }
163   }
164 
165   // nothing to do for THR_KSPACE
166 }
167 
168 /* ----------------------------------------------------------------------
169    Reduce per thread data into the regular structures
170    Reduction of global properties is serialized with a "critical"
171    directive, so that only one thread at a time will access the
172    global variables. Since we are not synchronized, this should
173    come with little overhead. The reduction of per-atom properties
174    in contrast is parallelized over threads in the same way as forces.
175    ---------------------------------------------------------------------- */
176 
reduce_thr(void * style,const int eflag,const int vflag,ThrData * const thr)177 void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
178                         ThrData *const thr)
179 {
180   const int nlocal = lmp->atom->nlocal;
181   const int nghost = lmp->atom->nghost;
182   const int nall = nlocal + nghost;
183   const int nfirst = lmp->atom->nfirst;
184   const int nthreads = lmp->comm->nthreads;
185   const int evflag = eflag | vflag;
186 
187   const int tid = thr->get_tid();
188   double **f = lmp->atom->f;
189   double **x = lmp->atom->x;
190 
191   int need_force_reduce = 1;
192 
193   if (evflag)
194     sync_threads();
195 
196   switch (thr_style) {
197 
198   case THR_PAIR: {
199 
200     if (lmp->force->pair->vflag_fdotr) {
201 
202       // this is a non-hybrid pair style. compute per thread fdotr
203       if (fix->last_pair_hybrid == nullptr) {
204         if (lmp->neighbor->includegroup == 0)
205           thr->virial_fdotr_compute(x, nlocal, nghost, -1);
206         else
207           thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
208       } else {
209         if (style == fix->last_pair_hybrid) {
210           // pair_style hybrid will compute fdotr for us
211           // but we first need to reduce the forces
212           data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
213           fix->did_reduce();
214           need_force_reduce = 0;
215         }
216       }
217     }
218 
219     if (evflag) {
220       Pair * const pair = (Pair *)style;
221 
222 #if defined(_OPENMP)
223 #pragma omp critical
224 #endif
225       {
226         if (eflag & ENERGY_GLOBAL) {
227           pair->eng_vdwl += thr->eng_vdwl;
228           pair->eng_coul += thr->eng_coul;
229           thr->eng_vdwl = 0.0;
230           thr->eng_coul = 0.0;
231         }
232         if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR))
233           for (int i=0; i < 6; ++i) {
234             pair->virial[i] += thr->virial_pair[i];
235             thr->virial_pair[i] = 0.0;
236           }
237       }
238 
239       if (eflag & ENERGY_ATOM) {
240         data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
241       }
242       // per-atom virial and per-atom centroid virial are the same for two-body
243       // many-body pair styles not yet implemented
244       if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
245         data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
246       }
247       // check cvatom_pair, because can't access centroidstressflag
248       if ((vflag & VIRIAL_CENTROID) && thr->cvatom_pair) {
249         data_reduce_thr(&(pair->cvatom[0][0]), nall, nthreads, 9, tid);
250       }
251     }
252   }
253     break;
254 
255   case THR_BOND:
256 
257     if (evflag) {
258       Bond * const bond = lmp->force->bond;
259 #if defined(_OPENMP)
260 #pragma omp critical
261 #endif
262       {
263         if (eflag & ENERGY_GLOBAL) {
264           bond->energy += thr->eng_bond;
265           thr->eng_bond = 0.0;
266         }
267 
268         if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
269           for (int i=0; i < 6; ++i) {
270             bond->virial[i] += thr->virial_bond[i];
271             thr->virial_bond[i] = 0.0;
272           }
273         }
274       }
275 
276       if (eflag & ENERGY_ATOM) {
277         data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid);
278       }
279       // per-atom virial and per-atom centroid virial are the same for bonds
280       if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
281         data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid);
282       }
283 
284     }
285     break;
286 
287   case THR_ANGLE:
288 
289     if (evflag) {
290       Angle * const angle = lmp->force->angle;
291 #if defined(_OPENMP)
292 #pragma omp critical
293 #endif
294       {
295         if (eflag & ENERGY_GLOBAL) {
296           angle->energy += thr->eng_angle;
297           thr->eng_angle = 0.0;
298         }
299 
300         if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
301           for (int i=0; i < 6; ++i) {
302             angle->virial[i] += thr->virial_angle[i];
303             thr->virial_angle[i] = 0.0;
304           }
305         }
306       }
307 
308       if (eflag & ENERGY_ATOM) {
309         data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid);
310       }
311       if (vflag & VIRIAL_ATOM) {
312         data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid);
313       }
314       if (vflag & VIRIAL_CENTROID) {
315         data_reduce_thr(&(angle->cvatom[0][0]), nall, nthreads, 9, tid);
316       }
317 
318     }
319     break;
320 
321   case THR_DIHEDRAL:
322 
323     if (evflag) {
324       Dihedral * const dihedral = lmp->force->dihedral;
325 #if defined(_OPENMP)
326 #pragma omp critical
327 #endif
328       {
329         if (eflag & ENERGY_GLOBAL) {
330           dihedral->energy += thr->eng_dihed;
331           thr->eng_dihed = 0.0;
332         }
333 
334         if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
335           for (int i=0; i < 6; ++i) {
336             dihedral->virial[i] += thr->virial_dihed[i];
337             thr->virial_dihed[i] = 0.0;
338           }
339         }
340       }
341 
342       if (eflag & ENERGY_ATOM) {
343         data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
344       }
345       if (vflag & VIRIAL_ATOM) {
346         data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
347       }
348       if (vflag & VIRIAL_CENTROID) {
349         data_reduce_thr(&(dihedral->cvatom[0][0]), nall, nthreads, 9, tid);
350       }
351 
352     }
353     break;
354 
355   case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals
356 
357     if (evflag) {
358       Dihedral * const dihedral = lmp->force->dihedral;
359       Pair * const pair = lmp->force->pair;
360 #if defined(_OPENMP)
361 #pragma omp critical
362 #endif
363       {
364         if (eflag & ENERGY_GLOBAL) {
365           dihedral->energy += thr->eng_dihed;
366           pair->eng_vdwl += thr->eng_vdwl;
367           pair->eng_coul += thr->eng_coul;
368           thr->eng_dihed = 0.0;
369           thr->eng_vdwl = 0.0;
370           thr->eng_coul = 0.0;
371         }
372 
373         if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
374           for (int i=0; i < 6; ++i) {
375             dihedral->virial[i] += thr->virial_dihed[i];
376             pair->virial[i] += thr->virial_pair[i];
377             thr->virial_dihed[i] = 0.0;
378             thr->virial_pair[i] = 0.0;
379           }
380         }
381       }
382 
383       if (eflag & ENERGY_ATOM) {
384         data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
385         data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
386       }
387       if (vflag & VIRIAL_ATOM) {
388         data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
389       }
390       if (vflag & VIRIAL_CENTROID) {
391         data_reduce_thr(&(dihedral->cvatom[0][0]), nall, nthreads, 9, tid);
392       }
393       // per-atom virial and per-atom centroid virial are the same for two-body
394       // many-body pair styles not yet implemented
395       if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
396         data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
397       }
398       // check cvatom_pair, because can't access centroidstressflag
399       if ((vflag & VIRIAL_CENTROID) && thr->cvatom_pair) {
400         data_reduce_thr(&(pair->cvatom[0][0]), nall, nthreads, 9, tid);
401       }
402     }
403     break;
404 
405   case THR_IMPROPER:
406 
407     if (evflag) {
408       Improper *improper = lmp->force->improper;
409 #if defined(_OPENMP)
410 #pragma omp critical
411 #endif
412       {
413         if (eflag & ENERGY_GLOBAL) {
414           improper->energy += thr->eng_imprp;
415           thr->eng_imprp = 0.0;
416         }
417 
418         if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
419           for (int i=0; i < 6; ++i) {
420             improper->virial[i] += thr->virial_imprp[i];
421             thr->virial_imprp[i] = 0.0;
422           }
423         }
424       }
425 
426       if (eflag & ENERGY_ATOM) {
427         data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid);
428       }
429       if (vflag & VIRIAL_ATOM) {
430         data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid);
431       }
432       if (vflag & VIRIAL_CENTROID) {
433         data_reduce_thr(&(improper->cvatom[0][0]), nall, nthreads, 9, tid);
434       }
435 
436     }
437     break;
438 
439   case THR_KSPACE:
440     // nothing to do. XXX may need to add support for per-atom info
441     break;
442 
443   case THR_INTGR:
444     // nothing to do
445     break;
446 
447   default:
448     printf("tid:%d unhandled thr_style case %d\n", tid, thr_style);
449     break;
450   }
451 
452   if (style == fix->last_omp_style) {
453     if (need_force_reduce) {
454       data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
455       fix->did_reduce();
456     }
457 
458     if (lmp->atom->torque)
459       data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
460   }
461   thr->timer(Timer::COMM);
462 }
463 
464 /* ----------------------------------------------------------------------
465    tally eng_vdwl and eng_coul into per thread global and per-atom accumulators
466 ------------------------------------------------------------------------- */
467 
e_tally_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double evdwl,const double ecoul,ThrData * const thr)468 void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j,
469                          const int nlocal, const int newton_pair,
470                          const double evdwl, const double ecoul, ThrData * const thr)
471 {
472   if (pair->eflag_global) {
473     if (newton_pair) {
474       thr->eng_vdwl += evdwl;
475       thr->eng_coul += ecoul;
476     } else {
477       const double evdwlhalf = 0.5*evdwl;
478       const double ecoulhalf = 0.5*ecoul;
479       if (i < nlocal) {
480         thr->eng_vdwl += evdwlhalf;
481         thr->eng_coul += ecoulhalf;
482       }
483       if (j < nlocal) {
484         thr->eng_vdwl += evdwlhalf;
485         thr->eng_coul += ecoulhalf;
486       }
487     }
488   }
489   if (pair->eflag_atom && thr->eatom_pair) {
490     const double epairhalf = 0.5 * (evdwl + ecoul);
491     if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf;
492     if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf;
493   }
494 }
495 
496 /* helper functions */
v_tally(double * const vout,const double * const vin)497 static void v_tally(double * const vout, const double * const vin)
498 {
499   vout[0] += vin[0];
500   vout[1] += vin[1];
501   vout[2] += vin[2];
502   vout[3] += vin[3];
503   vout[4] += vin[4];
504   vout[5] += vin[5];
505 }
506 
v_tally9(double * const vout,const double * const vin)507 static void v_tally9(double * const vout, const double * const vin)
508 {
509   vout[0] += vin[0];
510   vout[1] += vin[1];
511   vout[2] += vin[2];
512   vout[3] += vin[3];
513   vout[4] += vin[4];
514   vout[5] += vin[5];
515   vout[6] += vin[6];
516   vout[7] += vin[7];
517   vout[8] += vin[8];
518 }
519 
v_tally(double * const vout,const double scale,const double * const vin)520 static void v_tally(double * const vout, const double scale, const double * const vin)
521 {
522   vout[0] += scale*vin[0];
523   vout[1] += scale*vin[1];
524   vout[2] += scale*vin[2];
525   vout[3] += scale*vin[3];
526   vout[4] += scale*vin[4];
527   vout[5] += scale*vin[5];
528 }
529 
530 /* ----------------------------------------------------------------------
531    tally virial into per thread global and per-atom accumulators
532 ------------------------------------------------------------------------- */
v_tally_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double * const v,ThrData * const thr)533 void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j,
534                          const int nlocal, const int newton_pair,
535                          const double * const v, ThrData * const thr)
536 {
537   if (pair->vflag_global) {
538     double * const va = thr->virial_pair;
539     if (newton_pair) {
540       v_tally(va,v);
541     } else {
542       if (i < nlocal) v_tally(va,0.5,v);
543       if (j < nlocal) v_tally(va,0.5,v);
544     }
545   }
546 
547   if (pair->vflag_atom) {
548     if (newton_pair || i < nlocal) {
549       double * const va = thr->vatom_pair[i];
550       v_tally(va,0.5,v);
551     }
552     if (newton_pair || j < nlocal) {
553       double * const va = thr->vatom_pair[j];
554       v_tally(va,0.5,v);
555     }
556   }
557 }
558 
559 /* ----------------------------------------------------------------------
560    tally eng_vdwl and virial into per thread global and per-atom accumulators
561    need i < nlocal test since called by bond_quartic and dihedral_charmm
562 ------------------------------------------------------------------------- */
563 
ev_tally_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double evdwl,const double ecoul,const double fpair,const double delx,const double dely,const double delz,ThrData * const thr)564 void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal,
565                           const int newton_pair, const double evdwl, const double ecoul,
566                           const double fpair, const double delx, const double dely,
567                           const double delz, ThrData * const thr)
568 {
569 
570   if (pair->eflag_either)
571     e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
572 
573   if (pair->vflag_either) {
574     double v[6];
575     v[0] = delx*delx*fpair;
576     v[1] = dely*dely*fpair;
577     v[2] = delz*delz*fpair;
578     v[3] = delx*dely*fpair;
579     v[4] = delx*delz*fpair;
580     v[5] = dely*delz*fpair;
581 
582     v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
583   }
584 
585   if (pair->num_tally_compute > 0) {
586     // ev_tally callbacks are not thread safe and thus have to be protected
587 #if defined(_OPENMP)
588 #pragma omp critical
589 #endif
590     for (int k=0; k < pair->num_tally_compute; ++k) {
591       Compute *c = pair->list_tally_compute[k];
592       c->pair_tally_callback(i, j, nlocal, newton_pair,
593                              evdwl, ecoul, fpair, delx, dely, delz);
594     }
595   }
596 }
597 
598 /* ----------------------------------------------------------------------
599    tally eng_vdwl and virial into global and per-atom accumulators
600    for virial, have delx,dely,delz and fx,fy,fz
601 ------------------------------------------------------------------------- */
602 
ev_tally_xyz_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double evdwl,const double ecoul,const double fx,const double fy,const double fz,const double delx,const double dely,const double delz,ThrData * const thr)603 void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j,
604                               const int nlocal, const int newton_pair,
605                               const double evdwl, const double ecoul,
606                               const double fx, const double fy, const double fz,
607                               const double delx, const double dely, const double delz,
608                               ThrData * const thr)
609 {
610 
611   if (pair->eflag_either)
612     e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
613 
614   if (pair->vflag_either) {
615     double v[6];
616     v[0] = delx*fx;
617     v[1] = dely*fy;
618     v[2] = delz*fz;
619     v[3] = delx*fy;
620     v[4] = delx*fz;
621     v[5] = dely*fz;
622 
623     v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
624   }
625 }
626 
627 
628 /* ----------------------------------------------------------------------
629    tally eng_vdwl and virial into global and per-atom accumulators
630    for virial, have delx,dely,delz and fx,fy,fz
631    called when using full neighbor lists
632 ------------------------------------------------------------------------- */
633 
ev_tally_xyz_full_thr(Pair * const pair,const int i,const double evdwl,const double ecoul,const double fx,const double fy,const double fz,const double delx,const double dely,const double delz,ThrData * const thr)634 void ThrOMP::ev_tally_xyz_full_thr(Pair * const pair, const int i,
635                                    const double evdwl, const double ecoul,
636                                    const double fx, const double fy,
637                                    const double fz, const double delx,
638                                    const double dely, const double delz,
639                                    ThrData * const thr)
640 {
641 
642   if (pair->eflag_either)
643     e_tally_thr(pair,i,i,i+1,0,0.5*evdwl,ecoul,thr);
644 
645   if (pair->vflag_either) {
646     double v[6];
647     v[0] = 0.5*delx*fx;
648     v[1] = 0.5*dely*fy;
649     v[2] = 0.5*delz*fz;
650     v[3] = 0.5*delx*fy;
651     v[4] = 0.5*delx*fz;
652     v[5] = 0.5*dely*fz;
653 
654     v_tally_thr(pair,i,i,i+1,0,v,thr);
655   }
656 }
657 
658 /* ----------------------------------------------------------------------
659    tally eng_vdwl and virial into global and per-atom accumulators
660    called by SW and hbond potentials, newton_pair is always on
661    virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
662  ------------------------------------------------------------------------- */
663 
ev_tally3_thr(Pair * const pair,const int i,const int j,const int k,const double evdwl,const double ecoul,const double * const fj,const double * const fk,const double * const drji,const double * const drki,ThrData * const thr)664 void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k,
665                            const double evdwl, const double ecoul,
666                            const double * const fj, const double * const fk,
667                            const double * const drji, const double * const drki,
668                            ThrData * const thr)
669 {
670   if (pair->eflag_either) {
671     if (pair->eflag_global) {
672       thr->eng_vdwl += evdwl;
673       thr->eng_coul += ecoul;
674     }
675     if (pair->eflag_atom) {
676       const double epairthird = THIRD * (evdwl + ecoul);
677       thr->eatom_pair[i] += epairthird;
678       thr->eatom_pair[j] += epairthird;
679       thr->eatom_pair[k] += epairthird;
680     }
681   }
682 
683   if (pair->vflag_either) {
684     double v[6];
685 
686     v[0] = drji[0]*fj[0] + drki[0]*fk[0];
687     v[1] = drji[1]*fj[1] + drki[1]*fk[1];
688     v[2] = drji[2]*fj[2] + drki[2]*fk[2];
689     v[3] = drji[0]*fj[1] + drki[0]*fk[1];
690     v[4] = drji[0]*fj[2] + drki[0]*fk[2];
691     v[5] = drji[1]*fj[2] + drki[1]*fk[2];
692 
693     if (pair->vflag_global) v_tally(thr->virial_pair,v);
694 
695     if (pair->vflag_atom) {
696       v_tally(thr->vatom_pair[i],THIRD,v);
697       v_tally(thr->vatom_pair[j],THIRD,v);
698       v_tally(thr->vatom_pair[k],THIRD,v);
699     }
700   }
701 }
702 
703 /* ----------------------------------------------------------------------
704    tally eng_vdwl and virial into global and per-atom accumulators
705    called by AIREBO potential, newton_pair is always on
706  ------------------------------------------------------------------------- */
707 
ev_tally4_thr(Pair * const pair,const int i,const int j,const int k,const int m,const double evdwl,const double * const fi,const double * const fj,const double * const fk,const double * const drim,const double * const drjm,const double * const drkm,ThrData * const thr)708 void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j,
709                            const int k, const int m, const double evdwl,
710                            const double * const fi, const double * const fj,
711                            const double * const fk, const double * const drim,
712                            const double * const drjm, const double * const drkm,
713                            ThrData * const thr)
714 {
715   double v[6];
716 
717   if (pair->eflag_either) {
718     if (pair->eflag_global) thr->eng_vdwl += evdwl;
719     if (pair->eflag_atom) {
720       const double epairfourth = 0.25 * evdwl;
721       thr->eatom_pair[i] += epairfourth;
722       thr->eatom_pair[j] += epairfourth;
723       thr->eatom_pair[k] += epairfourth;
724       thr->eatom_pair[m] += epairfourth;
725     }
726   }
727 
728   if (pair->vflag_either) {
729     v[0] = (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
730     v[1] = (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
731     v[2] = (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
732     v[3] = (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
733     v[4] = (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
734     v[5] = (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
735     if (pair->vflag_global) v_tally(thr->virial_pair,v);
736 
737     if (pair->vflag_atom) {
738       v[0] *= 0.25;
739       v[1] *= 0.25;
740       v[2] *= 0.25;
741       v[3] *= 0.25;
742       v[4] *= 0.25;
743       v[5] *= 0.25;
744       v_tally(thr->vatom_pair[i],v);
745       v_tally(thr->vatom_pair[j],v);
746       v_tally(thr->vatom_pair[k],v);
747       v_tally(thr->vatom_pair[m],v);
748     }
749   }
750 }
751 
752 /* ----------------------------------------------------------------------
753    tally ecoul and virial into each of n atoms in list
754    called by TIP4P potential, newton_pair is always on
755    changes v values by dividing by n
756  ------------------------------------------------------------------------- */
757 
ev_tally_list_thr(Pair * const pair,const int key,const int * const list,const double * const v,const double ecoul,const double alpha,ThrData * const thr)758 void ThrOMP::ev_tally_list_thr(Pair * const pair, const int key,
759                                const int * const list, const double * const v,
760                                const double ecoul, const double alpha,
761                                ThrData * const thr)
762 {
763   int i;
764   if (pair->eflag_either) {
765     if (pair->eflag_global) thr->eng_coul += ecoul;
766     if (pair->eflag_atom) {
767       if (key == 0) {
768         thr->eatom_pair[list[0]] += 0.5*ecoul;
769         thr->eatom_pair[list[1]] += 0.5*ecoul;
770       } else if (key == 1) {
771         thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha);
772         thr->eatom_pair[list[1]] += 0.25*ecoul*alpha;
773         thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
774         thr->eatom_pair[list[3]] += 0.5*ecoul;
775       } else if (key == 2) {
776         thr->eatom_pair[list[0]] += 0.5*ecoul;
777         thr->eatom_pair[list[1]] += 0.5*ecoul*(1-alpha);
778         thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
779         thr->eatom_pair[list[3]] += 0.25*ecoul*alpha;
780       } else {
781         thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha);
782         thr->eatom_pair[list[1]] += 0.25*ecoul*alpha;
783         thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
784         thr->eatom_pair[list[3]] += 0.5*ecoul*(1-alpha);
785         thr->eatom_pair[list[4]] += 0.25*ecoul*alpha;
786         thr->eatom_pair[list[5]] += 0.25*ecoul*alpha;
787       }
788     }
789   }
790 
791   if (pair->vflag_either) {
792     if (pair->vflag_global)
793       v_tally(thr->virial_pair,v);
794 
795     if (pair->vflag_atom) {
796       if (key == 0) {
797         for (i = 0; i <= 5; i++) {
798           thr->vatom_pair[list[0]][i] += 0.5*v[i];
799           thr->vatom_pair[list[1]][i] += 0.5*v[i];
800         }
801       } else if (key == 1) {
802         for (i = 0; i <= 5; i++) {
803           thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha);
804           thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha;
805           thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
806           thr->vatom_pair[list[3]][i] += 0.5*v[i];
807         }
808       } else if (key == 2) {
809         for (i = 0; i <= 5; i++) {
810           thr->vatom_pair[list[0]][i] += 0.5*v[i];
811           thr->vatom_pair[list[1]][i] += 0.5*v[i]*(1-alpha);
812           thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
813           thr->vatom_pair[list[3]][i] += 0.25*v[i]*alpha;
814         }
815       } else {
816         for (i = 0; i <= 5; i++) {
817           thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha);
818           thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha;
819           thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
820           thr->vatom_pair[list[3]][i] += 0.5*v[i]*(1-alpha);
821           thr->vatom_pair[list[4]][i] += 0.25*v[i]*alpha;
822           thr->vatom_pair[list[5]][i] += 0.25*v[i]*alpha;
823         }
824       }
825     }
826   }
827 }
828 
829 /* ----------------------------------------------------------------------
830    tally energy and virial into global and per-atom accumulators
831 ------------------------------------------------------------------------- */
832 
ev_tally_thr(Bond * const bond,const int i,const int j,const int nlocal,const int newton_bond,const double ebond,const double fbond,const double delx,const double dely,const double delz,ThrData * const thr)833 void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal,
834                           const int newton_bond, const double ebond, const double fbond,
835                           const double delx, const double dely, const double delz,
836                           ThrData * const thr)
837 {
838   if (bond->eflag_either) {
839     const double ebondhalf = 0.5*ebond;
840     if (newton_bond) {
841       if (bond->eflag_global)
842         thr->eng_bond += ebond;
843       if (bond->eflag_atom) {
844         thr->eatom_bond[i] += ebondhalf;
845         thr->eatom_bond[j] += ebondhalf;
846       }
847     } else {
848       if (bond->eflag_global) {
849         if (i < nlocal) thr->eng_bond += ebondhalf;
850         if (j < nlocal) thr->eng_bond += ebondhalf;
851       }
852       if (bond->eflag_atom) {
853         if (i < nlocal) thr->eatom_bond[i] += ebondhalf;
854         if (j < nlocal) thr->eatom_bond[j] += ebondhalf;
855       }
856     }
857   }
858 
859   if (bond->vflag_either) {
860     double v[6];
861 
862     v[0] = delx*delx*fbond;
863     v[1] = dely*dely*fbond;
864     v[2] = delz*delz*fbond;
865     v[3] = delx*dely*fbond;
866     v[4] = delx*delz*fbond;
867     v[5] = dely*delz*fbond;
868 
869     if (bond->vflag_global) {
870       if (newton_bond)
871         v_tally(thr->virial_bond,v);
872       else {
873         if (i < nlocal)
874           v_tally(thr->virial_bond,0.5,v);
875         if (j < nlocal)
876           v_tally(thr->virial_bond,0.5,v);
877       }
878     }
879 
880     if (bond->vflag_atom) {
881       v[0] *= 0.5;
882       v[1] *= 0.5;
883       v[2] *= 0.5;
884       v[3] *= 0.5;
885       v[4] *= 0.5;
886       v[5] *= 0.5;
887 
888       if (newton_bond) {
889         v_tally(thr->vatom_bond[i],v);
890         v_tally(thr->vatom_bond[j],v);
891       } else {
892         if (i < nlocal)
893           v_tally(thr->vatom_bond[i],v);
894         if (j < nlocal)
895           v_tally(thr->vatom_bond[j],v);
896       }
897     }
898   }
899 }
900 
901 /* ----------------------------------------------------------------------
902    tally energy and virial into global and per-atom accumulators
903    virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
904 ------------------------------------------------------------------------- */
905 
ev_tally_thr(Angle * const angle,const int i,const int j,const int k,const int nlocal,const int newton_bond,const double eangle,const double * const f1,const double * const f3,const double delx1,const double dely1,const double delz1,const double delx2,const double dely2,const double delz2,ThrData * const thr)906 void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k,
907                           const int nlocal, const int newton_bond, const double eangle,
908                           const double * const f1, const double * const f3,
909                           const double delx1, const double dely1, const double delz1,
910                           const double delx2, const double dely2, const double delz2,
911                           ThrData * const thr)
912 {
913   if (angle->eflag_either) {
914     const double eanglethird = THIRD*eangle;
915     if (newton_bond) {
916       if (angle->eflag_global)
917         thr->eng_angle += eangle;
918       if (angle->eflag_atom) {
919         thr->eatom_angle[i] += eanglethird;
920         thr->eatom_angle[j] += eanglethird;
921         thr->eatom_angle[k] += eanglethird;
922       }
923     } else {
924       if (angle->eflag_global) {
925         if (i < nlocal) thr->eng_angle += eanglethird;
926         if (j < nlocal) thr->eng_angle += eanglethird;
927         if (k < nlocal) thr->eng_angle += eanglethird;
928       }
929       if (angle->eflag_atom) {
930         if (i < nlocal) thr->eatom_angle[i] += eanglethird;
931         if (j < nlocal) thr->eatom_angle[j] += eanglethird;
932         if (k < nlocal) thr->eatom_angle[k] += eanglethird;
933       }
934     }
935   }
936 
937   if (angle->vflag_either) {
938     double v[6];
939 
940     v[0] = delx1*f1[0] + delx2*f3[0];
941     v[1] = dely1*f1[1] + dely2*f3[1];
942     v[2] = delz1*f1[2] + delz2*f3[2];
943     v[3] = delx1*f1[1] + delx2*f3[1];
944     v[4] = delx1*f1[2] + delx2*f3[2];
945     v[5] = dely1*f1[2] + dely2*f3[2];
946 
947     if (angle->vflag_global) {
948       if (newton_bond) {
949         v_tally(thr->virial_angle,v);
950       } else {
951         int cnt = 0;
952         if (i < nlocal) ++cnt;
953         if (j < nlocal) ++cnt;
954         if (k < nlocal) ++cnt;
955         v_tally(thr->virial_angle,cnt*THIRD,v);
956       }
957     }
958 
959     if (angle->vflag_atom) {
960       v[0] *= THIRD;
961       v[1] *= THIRD;
962       v[2] *= THIRD;
963       v[3] *= THIRD;
964       v[4] *= THIRD;
965       v[5] *= THIRD;
966 
967       if (newton_bond) {
968         v_tally(thr->vatom_angle[i],v);
969         v_tally(thr->vatom_angle[j],v);
970         v_tally(thr->vatom_angle[k],v);
971       } else {
972         if (i < nlocal) v_tally(thr->vatom_angle[i],v);
973         if (j < nlocal) v_tally(thr->vatom_angle[j],v);
974         if (k < nlocal) v_tally(thr->vatom_angle[k],v);
975       }
976     }
977   }
978 
979   // per-atom centroid virial
980   if (angle->cvflag_atom) {
981     double f2[3], v1[9], v2[9], v3[9];
982     double a1[3], a2[3], a3[3];
983 
984     // r0 = (r1+r2+r3)/3
985     // rij = ri-rj
986     // total virial = r10*f1 + r20*f2 + r30*f3
987     // del1: r12
988     // del2: r32
989 
990     // a1 = r10 = (2*r12 -   r32)/3
991     a1[0] = THIRD*(2*delx1-delx2);
992     a1[1] = THIRD*(2*dely1-dely2);
993     a1[2] = THIRD*(2*delz1-delz2);
994 
995     // a2 = r20 = ( -r12 -   r32)/3
996     a2[0] = THIRD*(-delx1-delx2);
997     a2[1] = THIRD*(-dely1-dely2);
998     a2[2] = THIRD*(-delz1-delz2);
999 
1000     // a3 = r30 = ( -r12 + 2*r32)/3
1001     a3[0] = THIRD*(-delx1+2*delx2);
1002     a3[1] = THIRD*(-dely1+2*dely2);
1003     a3[2] = THIRD*(-delz1+2*delz2);
1004 
1005     f2[0] = - f1[0] - f3[0];
1006     f2[1] = - f1[1] - f3[1];
1007     f2[2] = - f1[2] - f3[2];
1008 
1009     v1[0] = a1[0]*f1[0];
1010     v1[1] = a1[1]*f1[1];
1011     v1[2] = a1[2]*f1[2];
1012     v1[3] = a1[0]*f1[1];
1013     v1[4] = a1[0]*f1[2];
1014     v1[5] = a1[1]*f1[2];
1015     v1[6] = a1[1]*f1[0];
1016     v1[7] = a1[2]*f1[0];
1017     v1[8] = a1[2]*f1[1];
1018 
1019     v2[0] = a2[0]*f2[0];
1020     v2[1] = a2[1]*f2[1];
1021     v2[2] = a2[2]*f2[2];
1022     v2[3] = a2[0]*f2[1];
1023     v2[4] = a2[0]*f2[2];
1024     v2[5] = a2[1]*f2[2];
1025     v2[6] = a2[1]*f2[0];
1026     v2[7] = a2[2]*f2[0];
1027     v2[8] = a2[2]*f2[1];
1028 
1029     v3[0] = a3[0]*f3[0];
1030     v3[1] = a3[1]*f3[1];
1031     v3[2] = a3[2]*f3[2];
1032     v3[3] = a3[0]*f3[1];
1033     v3[4] = a3[0]*f3[2];
1034     v3[5] = a3[1]*f3[2];
1035     v3[6] = a3[1]*f3[0];
1036     v3[7] = a3[2]*f3[0];
1037     v3[8] = a3[2]*f3[1];
1038 
1039     if (newton_bond) {
1040       v_tally9(thr->cvatom_angle[i],v1);
1041       v_tally9(thr->cvatom_angle[j],v2);
1042       v_tally9(thr->cvatom_angle[k],v3);
1043     } else {
1044       if (i < nlocal) v_tally9(thr->cvatom_angle[i],v1);
1045       if (j < nlocal) v_tally9(thr->cvatom_angle[j],v2);
1046       if (k < nlocal) v_tally9(thr->cvatom_angle[k],v3);
1047     }
1048   }
1049 }
1050 
1051 /* ----------------------------------------------------------------------
1052    tally energy and virial from 1-3 repulsion of SDK angle into accumulators
1053 ------------------------------------------------------------------------- */
1054 
ev_tally13_thr(Angle * const angle,const int i1,const int i3,const int nlocal,const int newton_bond,const double epair,const double fpair,const double delx,const double dely,const double delz,ThrData * const thr)1055 void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3,
1056                             const int nlocal, const int newton_bond,
1057                             const double epair, const double fpair,
1058                             const double delx, const double dely,
1059                             const double delz, ThrData * const thr)
1060 {
1061 
1062   if (angle->eflag_either) {
1063     const double epairhalf = 0.5 * epair;
1064 
1065     if (angle->eflag_global) {
1066       if (newton_bond || i1 < nlocal)
1067         thr->eng_angle += epairhalf;
1068       if (newton_bond || i3 < nlocal)
1069         thr->eng_angle += epairhalf;
1070     }
1071 
1072     if (angle->eflag_atom) {
1073       if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf;
1074       if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf;
1075     }
1076   }
1077 
1078   if (angle->vflag_either) {
1079     double v[6];
1080     v[0] = delx*delx*fpair;
1081     v[1] = dely*dely*fpair;
1082     v[2] = delz*delz*fpair;
1083     v[3] = delx*dely*fpair;
1084     v[4] = delx*delz*fpair;
1085     v[5] = dely*delz*fpair;
1086 
1087     if (angle->vflag_global) {
1088       double * const va = thr->virial_angle;
1089       if (newton_bond || i1 < nlocal) v_tally(va,0.5,v);
1090       if (newton_bond || i3 < nlocal) v_tally(va,0.5,v);
1091     }
1092 
1093     if (angle->vflag_atom) {
1094       if (newton_bond || i1 < nlocal) {
1095         double * const va = thr->vatom_angle[i1];
1096         v_tally(va,0.5,v);
1097       }
1098       if (newton_bond || i3 < nlocal) {
1099         double * const va = thr->vatom_angle[i3];
1100         v_tally(va,0.5,v);
1101       }
1102     }
1103   }
1104 }
1105 
1106 
1107 /* ----------------------------------------------------------------------
1108    tally energy and virial into global and per-atom accumulators
1109    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
1110           = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
1111           = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
1112 ------------------------------------------------------------------------- */
1113 
ev_tally_thr(Dihedral * const dihed,const int i1,const int i2,const int i3,const int i4,const int nlocal,const int newton_bond,const double edihedral,const double * const f1,const double * const f3,const double * const f4,const double vb1x,const double vb1y,const double vb1z,const double vb2x,const double vb2y,const double vb2z,const double vb3x,const double vb3y,const double vb3z,ThrData * const thr)1114 void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2,
1115                           const int i3, const int i4, const int nlocal,
1116                           const int newton_bond, const double edihedral,
1117                           const double * const f1, const double * const f3,
1118                           const double * const f4, const double vb1x,
1119                           const double vb1y, const double vb1z, const double vb2x,
1120                           const double vb2y, const double vb2z, const double vb3x,
1121                           const double vb3y, const double vb3z, ThrData * const thr)
1122 {
1123 
1124   if (dihed->eflag_either) {
1125     if (dihed->eflag_global) {
1126       if (newton_bond) {
1127         thr->eng_dihed += edihedral;
1128       } else {
1129         const double edihedralquarter = 0.25*edihedral;
1130         int cnt = 0;
1131         if (i1 < nlocal) ++cnt;
1132         if (i2 < nlocal) ++cnt;
1133         if (i3 < nlocal) ++cnt;
1134         if (i4 < nlocal) ++cnt;
1135         thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter;
1136       }
1137     }
1138     if (dihed->eflag_atom) {
1139       const double edihedralquarter = 0.25*edihedral;
1140       if (newton_bond) {
1141         thr->eatom_dihed[i1] += edihedralquarter;
1142         thr->eatom_dihed[i2] += edihedralquarter;
1143         thr->eatom_dihed[i3] += edihedralquarter;
1144         thr->eatom_dihed[i4] += edihedralquarter;
1145       } else {
1146         if (i1 < nlocal) thr->eatom_dihed[i1] +=  edihedralquarter;
1147         if (i2 < nlocal) thr->eatom_dihed[i2] +=  edihedralquarter;
1148         if (i3 < nlocal) thr->eatom_dihed[i3] +=  edihedralquarter;
1149         if (i4 < nlocal) thr->eatom_dihed[i4] +=  edihedralquarter;
1150       }
1151     }
1152   }
1153 
1154   if (dihed->vflag_either) {
1155     double v[6];
1156     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
1157     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
1158     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
1159     v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
1160     v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
1161     v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
1162 
1163     if (dihed->vflag_global) {
1164       if (newton_bond) {
1165         v_tally(thr->virial_dihed,v);
1166       } else {
1167         int cnt = 0;
1168         if (i1 < nlocal) ++cnt;
1169         if (i2 < nlocal) ++cnt;
1170         if (i3 < nlocal) ++cnt;
1171         if (i4 < nlocal) ++cnt;
1172         v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v);
1173       }
1174     }
1175 
1176     v[0] *= 0.25;
1177     v[1] *= 0.25;
1178     v[2] *= 0.25;
1179     v[3] *= 0.25;
1180     v[4] *= 0.25;
1181     v[5] *= 0.25;
1182 
1183     if (dihed->vflag_atom) {
1184       if (newton_bond) {
1185         v_tally(thr->vatom_dihed[i1],v);
1186         v_tally(thr->vatom_dihed[i2],v);
1187         v_tally(thr->vatom_dihed[i3],v);
1188         v_tally(thr->vatom_dihed[i4],v);
1189       } else {
1190         if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v);
1191         if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v);
1192         if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v);
1193         if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v);
1194       }
1195     }
1196   }
1197 
1198   // per-atom centroid virial
1199   if (dihed->cvflag_atom) {
1200     double f2[3], v1[9], v2[9], v3[9], v4[9];
1201     double a1[3], a2[3], a3[3], a4[3];
1202 
1203     // r0 = (r1+r2+r3+r4)/4
1204     // rij = ri-rj
1205     // total virial = r10*f1 + r20*f2 + r30*f3 + r40*f4
1206     // vb1: r12
1207     // vb2: r32
1208     // vb3: r43
1209 
1210     // a1 = r10 = (3*r12 - 2*r32 -   r43)/4
1211     a1[0] = 0.25*(3*vb1x - 2*vb2x - vb3x);
1212     a1[1] = 0.25*(3*vb1y - 2*vb2y - vb3y);
1213     a1[2] = 0.25*(3*vb1z - 2*vb2z - vb3z);
1214 
1215     // a2 = r20 = ( -r12 - 2*r32 -   r43)/4
1216     a2[0] = 0.25*(-vb1x - 2*vb2x - vb3x);
1217     a2[1] = 0.25*(-vb1y - 2*vb2y - vb3y);
1218     a2[2] = 0.25*(-vb1z - 2*vb2z - vb3z);
1219 
1220     // a3 = r30 = ( -r12 + 2*r32 -   r43)/4
1221     a3[0] = 0.25*(-vb1x + 2*vb2x - vb3x);
1222     a3[1] = 0.25*(-vb1y + 2*vb2y - vb3y);
1223     a3[2] = 0.25*(-vb1z + 2*vb2z - vb3z);
1224 
1225     // a4 = r40 = ( -r12 + 2*r32 + 3*r43)/4
1226     a4[0] = 0.25*(-vb1x + 2*vb2x + 3*vb3x);
1227     a4[1] = 0.25*(-vb1y + 2*vb2y + 3*vb3y);
1228     a4[2] = 0.25*(-vb1z + 2*vb2z + 3*vb3z);
1229 
1230     f2[0] = - f1[0] - f3[0] - f4[0];
1231     f2[1] = - f1[1] - f3[1] - f4[1];
1232     f2[2] = - f1[2] - f3[2] - f4[2];
1233 
1234     v1[0] = a1[0]*f1[0];
1235     v1[1] = a1[1]*f1[1];
1236     v1[2] = a1[2]*f1[2];
1237     v1[3] = a1[0]*f1[1];
1238     v1[4] = a1[0]*f1[2];
1239     v1[5] = a1[1]*f1[2];
1240     v1[6] = a1[1]*f1[0];
1241     v1[7] = a1[2]*f1[0];
1242     v1[8] = a1[2]*f1[1];
1243 
1244     v2[0] = a2[0]*f2[0];
1245     v2[1] = a2[1]*f2[1];
1246     v2[2] = a2[2]*f2[2];
1247     v2[3] = a2[0]*f2[1];
1248     v2[4] = a2[0]*f2[2];
1249     v2[5] = a2[1]*f2[2];
1250     v2[6] = a2[1]*f2[0];
1251     v2[7] = a2[2]*f2[0];
1252     v2[8] = a2[2]*f2[1];
1253 
1254     v3[0] = a3[0]*f3[0];
1255     v3[1] = a3[1]*f3[1];
1256     v3[2] = a3[2]*f3[2];
1257     v3[3] = a3[0]*f3[1];
1258     v3[4] = a3[0]*f3[2];
1259     v3[5] = a3[1]*f3[2];
1260     v3[6] = a3[1]*f3[0];
1261     v3[7] = a3[2]*f3[0];
1262     v3[8] = a3[2]*f3[1];
1263 
1264     v4[0] = a4[0]*f4[0];
1265     v4[1] = a4[1]*f4[1];
1266     v4[2] = a4[2]*f4[2];
1267     v4[3] = a4[0]*f4[1];
1268     v4[4] = a4[0]*f4[2];
1269     v4[5] = a4[1]*f4[2];
1270     v4[6] = a4[1]*f4[0];
1271     v4[7] = a4[2]*f4[0];
1272     v4[8] = a4[2]*f4[1];
1273 
1274     if (newton_bond) {
1275       v_tally9(thr->cvatom_dihed[i1],v1);
1276       v_tally9(thr->cvatom_dihed[i2],v2);
1277       v_tally9(thr->cvatom_dihed[i3],v3);
1278       v_tally9(thr->cvatom_dihed[i4],v4);
1279     } else {
1280       if (i1 < nlocal) v_tally9(thr->cvatom_dihed[i1],v1);
1281       if (i2 < nlocal) v_tally9(thr->cvatom_dihed[i2],v2);
1282       if (i3 < nlocal) v_tally9(thr->cvatom_dihed[i3],v3);
1283       if (i4 < nlocal) v_tally9(thr->cvatom_dihed[i4],v4);
1284     }
1285   }
1286 
1287 }
1288 
1289 /* ----------------------------------------------------------------------
1290    tally energy and virial into global and per-atom accumulators
1291    virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
1292           = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
1293           = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
1294 ------------------------------------------------------------------------- */
1295 
ev_tally_thr(Improper * const imprp,const int i1,const int i2,const int i3,const int i4,const int nlocal,const int newton_bond,const double eimproper,const double * const f1,const double * const f3,const double * const f4,const double vb1x,const double vb1y,const double vb1z,const double vb2x,const double vb2y,const double vb2z,const double vb3x,const double vb3y,const double vb3z,ThrData * const thr)1296 void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2,
1297                           const int i3, const int i4, const int nlocal,
1298                           const int newton_bond, const double eimproper,
1299                           const double * const f1, const double * const f3,
1300                           const double * const f4, const double vb1x,
1301                           const double vb1y, const double vb1z, const double vb2x,
1302                           const double vb2y, const double vb2z, const double vb3x,
1303                           const double vb3y, const double vb3z, ThrData * const thr)
1304 {
1305 
1306   if (imprp->eflag_either) {
1307     if (imprp->eflag_global) {
1308       if (newton_bond) {
1309         thr->eng_imprp += eimproper;
1310       } else {
1311         const double eimproperquarter = 0.25*eimproper;
1312         int cnt = 0;
1313         if (i1 < nlocal) ++cnt;
1314         if (i2 < nlocal) ++cnt;
1315         if (i3 < nlocal) ++cnt;
1316         if (i4 < nlocal) ++cnt;
1317         thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter;
1318       }
1319     }
1320     if (imprp->eflag_atom) {
1321       const double eimproperquarter = 0.25*eimproper;
1322       if (newton_bond) {
1323         thr->eatom_imprp[i1] += eimproperquarter;
1324         thr->eatom_imprp[i2] += eimproperquarter;
1325         thr->eatom_imprp[i3] += eimproperquarter;
1326         thr->eatom_imprp[i4] += eimproperquarter;
1327       } else {
1328         if (i1 < nlocal) thr->eatom_imprp[i1] +=  eimproperquarter;
1329         if (i2 < nlocal) thr->eatom_imprp[i2] +=  eimproperquarter;
1330         if (i3 < nlocal) thr->eatom_imprp[i3] +=  eimproperquarter;
1331         if (i4 < nlocal) thr->eatom_imprp[i4] +=  eimproperquarter;
1332       }
1333     }
1334   }
1335 
1336   if (imprp->vflag_either) {
1337     double v[6];
1338     v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
1339     v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
1340     v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
1341     v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
1342     v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
1343     v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
1344 
1345     if (imprp->vflag_global) {
1346       if (newton_bond) {
1347         v_tally(thr->virial_imprp,v);
1348       } else {
1349         int cnt = 0;
1350         if (i1 < nlocal) ++cnt;
1351         if (i2 < nlocal) ++cnt;
1352         if (i3 < nlocal) ++cnt;
1353         if (i4 < nlocal) ++cnt;
1354         v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v);
1355       }
1356     }
1357 
1358     v[0] *= 0.25;
1359     v[1] *= 0.25;
1360     v[2] *= 0.25;
1361     v[3] *= 0.25;
1362     v[4] *= 0.25;
1363     v[5] *= 0.25;
1364 
1365     if (imprp->vflag_atom) {
1366       if (newton_bond) {
1367         v_tally(thr->vatom_imprp[i1],v);
1368         v_tally(thr->vatom_imprp[i2],v);
1369         v_tally(thr->vatom_imprp[i3],v);
1370         v_tally(thr->vatom_imprp[i4],v);
1371       } else {
1372         if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v);
1373         if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v);
1374         if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v);
1375         if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v);
1376       }
1377     }
1378   }
1379 
1380   // per-atom centroid virial
1381   if (imprp->cvflag_atom) {
1382     double f2[3], v1[9], v2[9], v3[9], v4[9];
1383     double a1[3], a2[3], a3[3], a4[3];
1384 
1385     // r0 = (r1+r2+r3+r4)/4
1386     // rij = ri-rj
1387     // total virial = r10*f1 + r20*f2 + r30*f3 + r40*f4
1388     // vb1: r12
1389     // vb2: r32
1390     // vb3: r43
1391 
1392     // a1 = r10 = (3*r12 - 2*r32 -   r43)/4
1393     a1[0] = 0.25*(3*vb1x - 2*vb2x - vb3x);
1394     a1[1] = 0.25*(3*vb1y - 2*vb2y - vb3y);
1395     a1[2] = 0.25*(3*vb1z - 2*vb2z - vb3z);
1396 
1397     // a2 = r20 = ( -r12 - 2*r32 -   r43)/4
1398     a2[0] = 0.25*(-vb1x - 2*vb2x - vb3x);
1399     a2[1] = 0.25*(-vb1y - 2*vb2y - vb3y);
1400     a2[2] = 0.25*(-vb1z - 2*vb2z - vb3z);
1401 
1402     // a3 = r30 = ( -r12 + 2*r32 -   r43)/4
1403     a3[0] = 0.25*(-vb1x + 2*vb2x - vb3x);
1404     a3[1] = 0.25*(-vb1y + 2*vb2y - vb3y);
1405     a3[2] = 0.25*(-vb1z + 2*vb2z - vb3z);
1406 
1407     // a4 = r40 = ( -r12 + 2*r32 + 3*r43)/4
1408     a4[0] = 0.25*(-vb1x + 2*vb2x + 3*vb3x);
1409     a4[1] = 0.25*(-vb1y + 2*vb2y + 3*vb3y);
1410     a4[2] = 0.25*(-vb1z + 2*vb2z + 3*vb3z);
1411 
1412     f2[0] = - f1[0] - f3[0] - f4[0];
1413     f2[1] = - f1[1] - f3[1] - f4[1];
1414     f2[2] = - f1[2] - f3[2] - f4[2];
1415 
1416     v1[0] = a1[0]*f1[0];
1417     v1[1] = a1[1]*f1[1];
1418     v1[2] = a1[2]*f1[2];
1419     v1[3] = a1[0]*f1[1];
1420     v1[4] = a1[0]*f1[2];
1421     v1[5] = a1[1]*f1[2];
1422     v1[6] = a1[1]*f1[0];
1423     v1[7] = a1[2]*f1[0];
1424     v1[8] = a1[2]*f1[1];
1425 
1426     v2[0] = a2[0]*f2[0];
1427     v2[1] = a2[1]*f2[1];
1428     v2[2] = a2[2]*f2[2];
1429     v2[3] = a2[0]*f2[1];
1430     v2[4] = a2[0]*f2[2];
1431     v2[5] = a2[1]*f2[2];
1432     v2[6] = a2[1]*f2[0];
1433     v2[7] = a2[2]*f2[0];
1434     v2[8] = a2[2]*f2[1];
1435 
1436     v3[0] = a3[0]*f3[0];
1437     v3[1] = a3[1]*f3[1];
1438     v3[2] = a3[2]*f3[2];
1439     v3[3] = a3[0]*f3[1];
1440     v3[4] = a3[0]*f3[2];
1441     v3[5] = a3[1]*f3[2];
1442     v3[6] = a3[1]*f3[0];
1443     v3[7] = a3[2]*f3[0];
1444     v3[8] = a3[2]*f3[1];
1445 
1446     v4[0] = a4[0]*f4[0];
1447     v4[1] = a4[1]*f4[1];
1448     v4[2] = a4[2]*f4[2];
1449     v4[3] = a4[0]*f4[1];
1450     v4[4] = a4[0]*f4[2];
1451     v4[5] = a4[1]*f4[2];
1452     v4[6] = a4[1]*f4[0];
1453     v4[7] = a4[2]*f4[0];
1454     v4[8] = a4[2]*f4[1];
1455 
1456     if (newton_bond) {
1457       v_tally9(thr->cvatom_imprp[i1],v1);
1458       v_tally9(thr->cvatom_imprp[i2],v2);
1459       v_tally9(thr->cvatom_imprp[i3],v3);
1460       v_tally9(thr->cvatom_imprp[i4],v4);
1461     } else {
1462       if (i1 < nlocal) v_tally9(thr->cvatom_imprp[i1],v1);
1463       if (i2 < nlocal) v_tally9(thr->cvatom_imprp[i2],v2);
1464       if (i3 < nlocal) v_tally9(thr->cvatom_imprp[i3],v3);
1465       if (i4 < nlocal) v_tally9(thr->cvatom_imprp[i4],v4);
1466     }
1467   }
1468 
1469 }
1470 
1471 /* ----------------------------------------------------------------------
1472    tally virial into per-atom accumulators
1473    called by AIREBO potential, newton_pair is always on
1474    fpair is magnitude of force on atom I
1475 ------------------------------------------------------------------------- */
1476 
v_tally2_thr(Pair * const pair,const int i,const int j,const double fpair,const double * const drij,ThrData * const thr)1477 void ThrOMP::v_tally2_thr(Pair *const pair, const int i, const int j, const double fpair,
1478                           const double * const drij, ThrData * const thr)
1479 {
1480   double v[6];
1481 
1482   v[0] = drij[0]*drij[0]*fpair;
1483   v[1] = drij[1]*drij[1]*fpair;
1484   v[2] = drij[2]*drij[2]*fpair;
1485   v[3] = drij[0]*drij[1]*fpair;
1486   v[4] = drij[0]*drij[2]*fpair;
1487   v[5] = drij[1]*drij[2]*fpair;
1488   if (pair->vflag_global) v_tally(thr->virial_pair,v);
1489 
1490   if (pair->vflag_atom) {
1491     v[0] *= 0.5;
1492     v[1] *= 0.5;
1493     v[2] *= 0.5;
1494     v[3] *= 0.5;
1495     v[4] *= 0.5;
1496     v[5] *= 0.5;
1497     v_tally(thr->vatom_pair[i],v);
1498     v_tally(thr->vatom_pair[j],v);
1499   }
1500 }
1501 
1502 /* ----------------------------------------------------------------------
1503    tally virial into per-atom accumulators
1504    called by RexaFF potential, newton_pair is always on
1505    fi is magnitude of force on atom i, deli is the direction
1506    note that the other atom (j) is not updated, due to newton on
1507 ------------------------------------------------------------------------- */
1508 
v_tally2_newton_thr(Pair * const pair,const int i,const double * const fi,const double * const deli,ThrData * const thr)1509 void ThrOMP::v_tally2_newton_thr(Pair *const pair, const int i, const double * const fi,
1510                                  const double * const deli, ThrData * const thr)
1511 {
1512   double v[6];
1513 
1514   v[0] = deli[0]*fi[0];
1515   v[1] = deli[1]*fi[1];
1516   v[2] = deli[2]*fi[2];
1517   v[3] = deli[0]*fi[1];
1518   v[4] = deli[0]*fi[2];
1519   v[5] = deli[1]*fi[2];
1520   if (pair->vflag_global) v_tally(thr->virial_pair,v);
1521   if (pair->vflag_atom) v_tally(thr->vatom_pair[i],v);
1522 }
1523 
1524 /* ----------------------------------------------------------------------
1525    tally virial into per-atom accumulators
1526    called by AIREBO and Tersoff potential, newton_pair is always on
1527 ------------------------------------------------------------------------- */
1528 
v_tally3_thr(Pair * const pair,const int i,const int j,const int k,const double * const fi,const double * const fj,const double * const drik,const double * const drjk,ThrData * const thr)1529 void ThrOMP::v_tally3_thr(Pair *const pair, const int i, const int j, const int k,
1530                           const double * const fi, const double * const fj,
1531                           const double * const drik, const double * const drjk,
1532                           ThrData * const thr)
1533 {
1534   double v[6];
1535 
1536   v[0] = (drik[0]*fi[0] + drjk[0]*fj[0]);
1537   v[1] = (drik[1]*fi[1] + drjk[1]*fj[1]);
1538   v[2] = (drik[2]*fi[2] + drjk[2]*fj[2]);
1539   v[3] = (drik[0]*fi[1] + drjk[0]*fj[1]);
1540   v[4] = (drik[0]*fi[2] + drjk[0]*fj[2]);
1541   v[5] = (drik[1]*fi[2] + drjk[1]*fj[2]);
1542   if (pair->vflag_global) v_tally(thr->virial_pair,v);
1543 
1544   if (pair->vflag_atom) {
1545     v[0] *= THIRD;
1546     v[1] *= THIRD;
1547     v[2] *= THIRD;
1548     v[3] *= THIRD;
1549     v[4] *= THIRD;
1550     v[5] *= THIRD;
1551     v_tally(thr->vatom_pair[i],v);
1552     v_tally(thr->vatom_pair[j],v);
1553     v_tally(thr->vatom_pair[k],v);
1554   }
1555 }
1556 
1557 /* ----------------------------------------------------------------------
1558    tally virial into per-atom accumulators
1559    called by AIREBO potential, newton_pair is always on
1560 ------------------------------------------------------------------------- */
1561 
v_tally4_thr(Pair * const pair,const int i,const int j,const int k,const int m,const double * const fi,const double * const fj,const double * const fk,const double * const drim,const double * const drjm,const double * const drkm,ThrData * const thr)1562 void ThrOMP::v_tally4_thr(Pair *const pair, const int i, const int j, const int k, const int m,
1563                           const double * const fi, const double * const fj,
1564                           const double * const fk, const double * const drim,
1565                           const double * const drjm, const double * const drkm,
1566                           ThrData * const thr)
1567 {
1568   double v[6];
1569 
1570   v[0] = (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
1571   v[1] = (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
1572   v[2] = (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
1573   v[3] = (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
1574   v[4] = (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
1575   v[5] = (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
1576   if (pair->vflag_global) v_tally(thr->virial_pair,v);
1577 
1578   if (pair->vflag_atom) {
1579     v[0] *= 0.25;
1580     v[1] *= 0.25;
1581     v[2] *= 0.25;
1582     v[3] *= 0.25;
1583     v[4] *= 0.25;
1584     v[5] *= 0.25;
1585     v_tally(thr->vatom_pair[i],v);
1586     v_tally(thr->vatom_pair[j],v);
1587     v_tally(thr->vatom_pair[k],v);
1588     v_tally(thr->vatom_pair[m],v);
1589   }
1590 }
1591 
1592 /* ---------------------------------------------------------------------- */
1593 
memory_usage_thr()1594 double ThrOMP::memory_usage_thr()
1595 {
1596   double bytes=0.0;
1597 
1598   return bytes;
1599 }
1600