1 // clang-format off
2 /* -------------------------------------------------------------------------
3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
4 https://www.lammps.org/, Sandia National Laboratories
5 Steve Plimpton, sjplimp@sandia.gov
6
7 Copyright (2003) Sandia Corporation. Under the terms of Contract
8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
9 certain rights in this software. This software is distributed under
10 the GNU General Public License.
11
12 See the README file in the top-level LAMMPS directory.
13 ------------------------------------------------------------------------- */
14
15 /* ----------------------------------------------------------------------
16 Contributing author: Axel Kohlmeyer (Temple U)
17 OpenMP based threading support for LAMMPS
18 ------------------------------------------------------------------------- */
19
20 #include <cstring>
21
22 #include "atom.h"
23 #include "comm.h"
24 #include "error.h"
25 #include "force.h"
26 #include "modify.h"
27 #include "neighbor.h"
28
29
30 #include "thr_omp.h"
31
32 #include "pair.h"
33 #include "bond.h"
34 #include "angle.h"
35 #include "dihedral.h"
36 #include "improper.h"
37 #include "compute.h"
38
39 #include "math_const.h"
40
41 using namespace LAMMPS_NS;
42 using namespace MathConst;
43
44 /* ---------------------------------------------------------------------- */
45
ThrOMP(LAMMPS * ptr,int style)46 ThrOMP::ThrOMP(LAMMPS *ptr, int style)
47 : lmp(ptr), fix(nullptr), thr_style(style), thr_error(0)
48 {
49 // register fix omp with this class
50 int ifix = lmp->modify->find_fix("package_omp");
51 if (ifix < 0)
52 lmp->error->all(FLERR,"The 'package omp' command is required for /omp styles");
53 fix = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
54 }
55
56 /* ---------------------------------------------------------------------- */
57
~ThrOMP()58 ThrOMP::~ThrOMP()
59 {
60 // nothing to do?
61 }
62
63 /* ----------------------------------------------------------------------
64 Hook up per thread per atom arrays into the tally infrastructure
65 ---------------------------------------------------------------------- */
66
ev_setup_thr(int eflag,int vflag,int nall,double * eatom,double ** vatom,double ** cvatom,ThrData * thr)67 void ThrOMP::ev_setup_thr(int eflag, int vflag, int nall, double *eatom,
68 double **vatom, double **cvatom, ThrData *thr)
69 {
70 const int tid = thr->get_tid();
71 if (tid == 0) thr_error = 0;
72
73 if (thr_style & THR_PAIR) {
74 if (eflag & ENERGY_ATOM) {
75 thr->eatom_pair = eatom + tid*nall;
76 if (nall > 0)
77 memset(&(thr->eatom_pair[0]),0,nall*sizeof(double));
78 }
79 // per-atom virial and per-atom centroid virial are the same for two-body
80 // many-body pair styles not yet implemented
81 if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
82 thr->vatom_pair = vatom + tid*nall;
83 if (nall > 0)
84 memset(&(thr->vatom_pair[0][0]),0,nall*6*sizeof(double));
85 }
86 // check cvatom_pair, because can't access centroidstressflag
87 if ((vflag & VIRIAL_CENTROID) && cvatom) {
88 thr->cvatom_pair = cvatom + tid*nall;
89 if (nall > 0)
90 memset(&(thr->cvatom_pair[0][0]),0,nall*9*sizeof(double));
91 } else {
92 thr->cvatom_pair = nullptr;
93 }
94
95 }
96
97 if (thr_style & THR_BOND) {
98 if (eflag & ENERGY_ATOM) {
99 thr->eatom_bond = eatom + tid*nall;
100 if (nall > 0)
101 memset(&(thr->eatom_bond[0]),0,nall*sizeof(double));
102 }
103 // per-atom virial and per-atom centroid virial are the same for bonds
104 if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
105 thr->vatom_bond = vatom + tid*nall;
106 if (nall > 0)
107 memset(&(thr->vatom_bond[0][0]),0,nall*6*sizeof(double));
108 }
109 }
110
111 if (thr_style & THR_ANGLE) {
112 if (eflag & ENERGY_ATOM) {
113 thr->eatom_angle = eatom + tid*nall;
114 if (nall > 0)
115 memset(&(thr->eatom_angle[0]),0,nall*sizeof(double));
116 }
117 if (vflag & VIRIAL_ATOM) {
118 thr->vatom_angle = vatom + tid*nall;
119 if (nall > 0)
120 memset(&(thr->vatom_angle[0][0]),0,nall*6*sizeof(double));
121 }
122 if (vflag & VIRIAL_CENTROID) {
123 thr->cvatom_angle = cvatom + tid*nall;
124 if (nall > 0)
125 memset(&(thr->cvatom_angle[0][0]),0,nall*9*sizeof(double));
126 }
127 }
128
129 if (thr_style & THR_DIHEDRAL) {
130 if (eflag & ENERGY_ATOM) {
131 thr->eatom_dihed = eatom + tid*nall;
132 if (nall > 0)
133 memset(&(thr->eatom_dihed[0]),0,nall*sizeof(double));
134 }
135 if (vflag & VIRIAL_ATOM) {
136 thr->vatom_dihed = vatom + tid*nall;
137 if (nall > 0)
138 memset(&(thr->vatom_dihed[0][0]),0,nall*6*sizeof(double));
139 }
140 if (vflag & VIRIAL_CENTROID) {
141 thr->cvatom_dihed = cvatom + tid*nall;
142 if (nall > 0)
143 memset(&(thr->cvatom_dihed[0][0]),0,nall*9*sizeof(double));
144 }
145 }
146
147 if (thr_style & THR_IMPROPER) {
148 if (eflag & ENERGY_ATOM) {
149 thr->eatom_imprp = eatom + tid*nall;
150 if (nall > 0)
151 memset(&(thr->eatom_imprp[0]),0,nall*sizeof(double));
152 }
153 if (vflag & VIRIAL_ATOM) {
154 thr->vatom_imprp = vatom + tid*nall;
155 if (nall > 0)
156 memset(&(thr->vatom_imprp[0][0]),0,nall*6*sizeof(double));
157 }
158 if (vflag & VIRIAL_CENTROID) {
159 thr->cvatom_imprp = cvatom + tid*nall;
160 if (nall > 0)
161 memset(&(thr->cvatom_imprp[0][0]),0,nall*9*sizeof(double));
162 }
163 }
164
165 // nothing to do for THR_KSPACE
166 }
167
168 /* ----------------------------------------------------------------------
169 Reduce per thread data into the regular structures
170 Reduction of global properties is serialized with a "critical"
171 directive, so that only one thread at a time will access the
172 global variables. Since we are not synchronized, this should
173 come with little overhead. The reduction of per-atom properties
174 in contrast is parallelized over threads in the same way as forces.
175 ---------------------------------------------------------------------- */
176
reduce_thr(void * style,const int eflag,const int vflag,ThrData * const thr)177 void ThrOMP::reduce_thr(void *style, const int eflag, const int vflag,
178 ThrData *const thr)
179 {
180 const int nlocal = lmp->atom->nlocal;
181 const int nghost = lmp->atom->nghost;
182 const int nall = nlocal + nghost;
183 const int nfirst = lmp->atom->nfirst;
184 const int nthreads = lmp->comm->nthreads;
185 const int evflag = eflag | vflag;
186
187 const int tid = thr->get_tid();
188 double **f = lmp->atom->f;
189 double **x = lmp->atom->x;
190
191 int need_force_reduce = 1;
192
193 if (evflag)
194 sync_threads();
195
196 switch (thr_style) {
197
198 case THR_PAIR: {
199
200 if (lmp->force->pair->vflag_fdotr) {
201
202 // this is a non-hybrid pair style. compute per thread fdotr
203 if (fix->last_pair_hybrid == nullptr) {
204 if (lmp->neighbor->includegroup == 0)
205 thr->virial_fdotr_compute(x, nlocal, nghost, -1);
206 else
207 thr->virial_fdotr_compute(x, nlocal, nghost, nfirst);
208 } else {
209 if (style == fix->last_pair_hybrid) {
210 // pair_style hybrid will compute fdotr for us
211 // but we first need to reduce the forces
212 data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
213 fix->did_reduce();
214 need_force_reduce = 0;
215 }
216 }
217 }
218
219 if (evflag) {
220 Pair * const pair = (Pair *)style;
221
222 #if defined(_OPENMP)
223 #pragma omp critical
224 #endif
225 {
226 if (eflag & ENERGY_GLOBAL) {
227 pair->eng_vdwl += thr->eng_vdwl;
228 pair->eng_coul += thr->eng_coul;
229 thr->eng_vdwl = 0.0;
230 thr->eng_coul = 0.0;
231 }
232 if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR))
233 for (int i=0; i < 6; ++i) {
234 pair->virial[i] += thr->virial_pair[i];
235 thr->virial_pair[i] = 0.0;
236 }
237 }
238
239 if (eflag & ENERGY_ATOM) {
240 data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
241 }
242 // per-atom virial and per-atom centroid virial are the same for two-body
243 // many-body pair styles not yet implemented
244 if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
245 data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
246 }
247 // check cvatom_pair, because can't access centroidstressflag
248 if ((vflag & VIRIAL_CENTROID) && thr->cvatom_pair) {
249 data_reduce_thr(&(pair->cvatom[0][0]), nall, nthreads, 9, tid);
250 }
251 }
252 }
253 break;
254
255 case THR_BOND:
256
257 if (evflag) {
258 Bond * const bond = lmp->force->bond;
259 #if defined(_OPENMP)
260 #pragma omp critical
261 #endif
262 {
263 if (eflag & ENERGY_GLOBAL) {
264 bond->energy += thr->eng_bond;
265 thr->eng_bond = 0.0;
266 }
267
268 if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
269 for (int i=0; i < 6; ++i) {
270 bond->virial[i] += thr->virial_bond[i];
271 thr->virial_bond[i] = 0.0;
272 }
273 }
274 }
275
276 if (eflag & ENERGY_ATOM) {
277 data_reduce_thr(&(bond->eatom[0]), nall, nthreads, 1, tid);
278 }
279 // per-atom virial and per-atom centroid virial are the same for bonds
280 if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
281 data_reduce_thr(&(bond->vatom[0][0]), nall, nthreads, 6, tid);
282 }
283
284 }
285 break;
286
287 case THR_ANGLE:
288
289 if (evflag) {
290 Angle * const angle = lmp->force->angle;
291 #if defined(_OPENMP)
292 #pragma omp critical
293 #endif
294 {
295 if (eflag & ENERGY_GLOBAL) {
296 angle->energy += thr->eng_angle;
297 thr->eng_angle = 0.0;
298 }
299
300 if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
301 for (int i=0; i < 6; ++i) {
302 angle->virial[i] += thr->virial_angle[i];
303 thr->virial_angle[i] = 0.0;
304 }
305 }
306 }
307
308 if (eflag & ENERGY_ATOM) {
309 data_reduce_thr(&(angle->eatom[0]), nall, nthreads, 1, tid);
310 }
311 if (vflag & VIRIAL_ATOM) {
312 data_reduce_thr(&(angle->vatom[0][0]), nall, nthreads, 6, tid);
313 }
314 if (vflag & VIRIAL_CENTROID) {
315 data_reduce_thr(&(angle->cvatom[0][0]), nall, nthreads, 9, tid);
316 }
317
318 }
319 break;
320
321 case THR_DIHEDRAL:
322
323 if (evflag) {
324 Dihedral * const dihedral = lmp->force->dihedral;
325 #if defined(_OPENMP)
326 #pragma omp critical
327 #endif
328 {
329 if (eflag & ENERGY_GLOBAL) {
330 dihedral->energy += thr->eng_dihed;
331 thr->eng_dihed = 0.0;
332 }
333
334 if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
335 for (int i=0; i < 6; ++i) {
336 dihedral->virial[i] += thr->virial_dihed[i];
337 thr->virial_dihed[i] = 0.0;
338 }
339 }
340 }
341
342 if (eflag & ENERGY_ATOM) {
343 data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
344 }
345 if (vflag & VIRIAL_ATOM) {
346 data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
347 }
348 if (vflag & VIRIAL_CENTROID) {
349 data_reduce_thr(&(dihedral->cvatom[0][0]), nall, nthreads, 9, tid);
350 }
351
352 }
353 break;
354
355 case THR_DIHEDRAL|THR_CHARMM: // special case for CHARMM dihedrals
356
357 if (evflag) {
358 Dihedral * const dihedral = lmp->force->dihedral;
359 Pair * const pair = lmp->force->pair;
360 #if defined(_OPENMP)
361 #pragma omp critical
362 #endif
363 {
364 if (eflag & ENERGY_GLOBAL) {
365 dihedral->energy += thr->eng_dihed;
366 pair->eng_vdwl += thr->eng_vdwl;
367 pair->eng_coul += thr->eng_coul;
368 thr->eng_dihed = 0.0;
369 thr->eng_vdwl = 0.0;
370 thr->eng_coul = 0.0;
371 }
372
373 if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
374 for (int i=0; i < 6; ++i) {
375 dihedral->virial[i] += thr->virial_dihed[i];
376 pair->virial[i] += thr->virial_pair[i];
377 thr->virial_dihed[i] = 0.0;
378 thr->virial_pair[i] = 0.0;
379 }
380 }
381 }
382
383 if (eflag & ENERGY_ATOM) {
384 data_reduce_thr(&(dihedral->eatom[0]), nall, nthreads, 1, tid);
385 data_reduce_thr(&(pair->eatom[0]), nall, nthreads, 1, tid);
386 }
387 if (vflag & VIRIAL_ATOM) {
388 data_reduce_thr(&(dihedral->vatom[0][0]), nall, nthreads, 6, tid);
389 }
390 if (vflag & VIRIAL_CENTROID) {
391 data_reduce_thr(&(dihedral->cvatom[0][0]), nall, nthreads, 9, tid);
392 }
393 // per-atom virial and per-atom centroid virial are the same for two-body
394 // many-body pair styles not yet implemented
395 if (vflag & (VIRIAL_ATOM | VIRIAL_CENTROID)) {
396 data_reduce_thr(&(pair->vatom[0][0]), nall, nthreads, 6, tid);
397 }
398 // check cvatom_pair, because can't access centroidstressflag
399 if ((vflag & VIRIAL_CENTROID) && thr->cvatom_pair) {
400 data_reduce_thr(&(pair->cvatom[0][0]), nall, nthreads, 9, tid);
401 }
402 }
403 break;
404
405 case THR_IMPROPER:
406
407 if (evflag) {
408 Improper *improper = lmp->force->improper;
409 #if defined(_OPENMP)
410 #pragma omp critical
411 #endif
412 {
413 if (eflag & ENERGY_GLOBAL) {
414 improper->energy += thr->eng_imprp;
415 thr->eng_imprp = 0.0;
416 }
417
418 if (vflag & (VIRIAL_PAIR | VIRIAL_FDOTR)) {
419 for (int i=0; i < 6; ++i) {
420 improper->virial[i] += thr->virial_imprp[i];
421 thr->virial_imprp[i] = 0.0;
422 }
423 }
424 }
425
426 if (eflag & ENERGY_ATOM) {
427 data_reduce_thr(&(improper->eatom[0]), nall, nthreads, 1, tid);
428 }
429 if (vflag & VIRIAL_ATOM) {
430 data_reduce_thr(&(improper->vatom[0][0]), nall, nthreads, 6, tid);
431 }
432 if (vflag & VIRIAL_CENTROID) {
433 data_reduce_thr(&(improper->cvatom[0][0]), nall, nthreads, 9, tid);
434 }
435
436 }
437 break;
438
439 case THR_KSPACE:
440 // nothing to do. XXX may need to add support for per-atom info
441 break;
442
443 case THR_INTGR:
444 // nothing to do
445 break;
446
447 default:
448 printf("tid:%d unhandled thr_style case %d\n", tid, thr_style);
449 break;
450 }
451
452 if (style == fix->last_omp_style) {
453 if (need_force_reduce) {
454 data_reduce_thr(&(f[0][0]), nall, nthreads, 3, tid);
455 fix->did_reduce();
456 }
457
458 if (lmp->atom->torque)
459 data_reduce_thr(&(lmp->atom->torque[0][0]), nall, nthreads, 3, tid);
460 }
461 thr->timer(Timer::COMM);
462 }
463
464 /* ----------------------------------------------------------------------
465 tally eng_vdwl and eng_coul into per thread global and per-atom accumulators
466 ------------------------------------------------------------------------- */
467
e_tally_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double evdwl,const double ecoul,ThrData * const thr)468 void ThrOMP::e_tally_thr(Pair * const pair, const int i, const int j,
469 const int nlocal, const int newton_pair,
470 const double evdwl, const double ecoul, ThrData * const thr)
471 {
472 if (pair->eflag_global) {
473 if (newton_pair) {
474 thr->eng_vdwl += evdwl;
475 thr->eng_coul += ecoul;
476 } else {
477 const double evdwlhalf = 0.5*evdwl;
478 const double ecoulhalf = 0.5*ecoul;
479 if (i < nlocal) {
480 thr->eng_vdwl += evdwlhalf;
481 thr->eng_coul += ecoulhalf;
482 }
483 if (j < nlocal) {
484 thr->eng_vdwl += evdwlhalf;
485 thr->eng_coul += ecoulhalf;
486 }
487 }
488 }
489 if (pair->eflag_atom && thr->eatom_pair) {
490 const double epairhalf = 0.5 * (evdwl + ecoul);
491 if (newton_pair || i < nlocal) thr->eatom_pair[i] += epairhalf;
492 if (newton_pair || j < nlocal) thr->eatom_pair[j] += epairhalf;
493 }
494 }
495
496 /* helper functions */
v_tally(double * const vout,const double * const vin)497 static void v_tally(double * const vout, const double * const vin)
498 {
499 vout[0] += vin[0];
500 vout[1] += vin[1];
501 vout[2] += vin[2];
502 vout[3] += vin[3];
503 vout[4] += vin[4];
504 vout[5] += vin[5];
505 }
506
v_tally9(double * const vout,const double * const vin)507 static void v_tally9(double * const vout, const double * const vin)
508 {
509 vout[0] += vin[0];
510 vout[1] += vin[1];
511 vout[2] += vin[2];
512 vout[3] += vin[3];
513 vout[4] += vin[4];
514 vout[5] += vin[5];
515 vout[6] += vin[6];
516 vout[7] += vin[7];
517 vout[8] += vin[8];
518 }
519
v_tally(double * const vout,const double scale,const double * const vin)520 static void v_tally(double * const vout, const double scale, const double * const vin)
521 {
522 vout[0] += scale*vin[0];
523 vout[1] += scale*vin[1];
524 vout[2] += scale*vin[2];
525 vout[3] += scale*vin[3];
526 vout[4] += scale*vin[4];
527 vout[5] += scale*vin[5];
528 }
529
530 /* ----------------------------------------------------------------------
531 tally virial into per thread global and per-atom accumulators
532 ------------------------------------------------------------------------- */
v_tally_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double * const v,ThrData * const thr)533 void ThrOMP::v_tally_thr(Pair * const pair, const int i, const int j,
534 const int nlocal, const int newton_pair,
535 const double * const v, ThrData * const thr)
536 {
537 if (pair->vflag_global) {
538 double * const va = thr->virial_pair;
539 if (newton_pair) {
540 v_tally(va,v);
541 } else {
542 if (i < nlocal) v_tally(va,0.5,v);
543 if (j < nlocal) v_tally(va,0.5,v);
544 }
545 }
546
547 if (pair->vflag_atom) {
548 if (newton_pair || i < nlocal) {
549 double * const va = thr->vatom_pair[i];
550 v_tally(va,0.5,v);
551 }
552 if (newton_pair || j < nlocal) {
553 double * const va = thr->vatom_pair[j];
554 v_tally(va,0.5,v);
555 }
556 }
557 }
558
559 /* ----------------------------------------------------------------------
560 tally eng_vdwl and virial into per thread global and per-atom accumulators
561 need i < nlocal test since called by bond_quartic and dihedral_charmm
562 ------------------------------------------------------------------------- */
563
ev_tally_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double evdwl,const double ecoul,const double fpair,const double delx,const double dely,const double delz,ThrData * const thr)564 void ThrOMP::ev_tally_thr(Pair * const pair, const int i, const int j, const int nlocal,
565 const int newton_pair, const double evdwl, const double ecoul,
566 const double fpair, const double delx, const double dely,
567 const double delz, ThrData * const thr)
568 {
569
570 if (pair->eflag_either)
571 e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
572
573 if (pair->vflag_either) {
574 double v[6];
575 v[0] = delx*delx*fpair;
576 v[1] = dely*dely*fpair;
577 v[2] = delz*delz*fpair;
578 v[3] = delx*dely*fpair;
579 v[4] = delx*delz*fpair;
580 v[5] = dely*delz*fpair;
581
582 v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
583 }
584
585 if (pair->num_tally_compute > 0) {
586 // ev_tally callbacks are not thread safe and thus have to be protected
587 #if defined(_OPENMP)
588 #pragma omp critical
589 #endif
590 for (int k=0; k < pair->num_tally_compute; ++k) {
591 Compute *c = pair->list_tally_compute[k];
592 c->pair_tally_callback(i, j, nlocal, newton_pair,
593 evdwl, ecoul, fpair, delx, dely, delz);
594 }
595 }
596 }
597
598 /* ----------------------------------------------------------------------
599 tally eng_vdwl and virial into global and per-atom accumulators
600 for virial, have delx,dely,delz and fx,fy,fz
601 ------------------------------------------------------------------------- */
602
ev_tally_xyz_thr(Pair * const pair,const int i,const int j,const int nlocal,const int newton_pair,const double evdwl,const double ecoul,const double fx,const double fy,const double fz,const double delx,const double dely,const double delz,ThrData * const thr)603 void ThrOMP::ev_tally_xyz_thr(Pair * const pair, const int i, const int j,
604 const int nlocal, const int newton_pair,
605 const double evdwl, const double ecoul,
606 const double fx, const double fy, const double fz,
607 const double delx, const double dely, const double delz,
608 ThrData * const thr)
609 {
610
611 if (pair->eflag_either)
612 e_tally_thr(pair, i, j, nlocal, newton_pair, evdwl, ecoul, thr);
613
614 if (pair->vflag_either) {
615 double v[6];
616 v[0] = delx*fx;
617 v[1] = dely*fy;
618 v[2] = delz*fz;
619 v[3] = delx*fy;
620 v[4] = delx*fz;
621 v[5] = dely*fz;
622
623 v_tally_thr(pair, i, j, nlocal, newton_pair, v, thr);
624 }
625 }
626
627
628 /* ----------------------------------------------------------------------
629 tally eng_vdwl and virial into global and per-atom accumulators
630 for virial, have delx,dely,delz and fx,fy,fz
631 called when using full neighbor lists
632 ------------------------------------------------------------------------- */
633
ev_tally_xyz_full_thr(Pair * const pair,const int i,const double evdwl,const double ecoul,const double fx,const double fy,const double fz,const double delx,const double dely,const double delz,ThrData * const thr)634 void ThrOMP::ev_tally_xyz_full_thr(Pair * const pair, const int i,
635 const double evdwl, const double ecoul,
636 const double fx, const double fy,
637 const double fz, const double delx,
638 const double dely, const double delz,
639 ThrData * const thr)
640 {
641
642 if (pair->eflag_either)
643 e_tally_thr(pair,i,i,i+1,0,0.5*evdwl,ecoul,thr);
644
645 if (pair->vflag_either) {
646 double v[6];
647 v[0] = 0.5*delx*fx;
648 v[1] = 0.5*dely*fy;
649 v[2] = 0.5*delz*fz;
650 v[3] = 0.5*delx*fy;
651 v[4] = 0.5*delx*fz;
652 v[5] = 0.5*dely*fz;
653
654 v_tally_thr(pair,i,i,i+1,0,v,thr);
655 }
656 }
657
658 /* ----------------------------------------------------------------------
659 tally eng_vdwl and virial into global and per-atom accumulators
660 called by SW and hbond potentials, newton_pair is always on
661 virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk
662 ------------------------------------------------------------------------- */
663
ev_tally3_thr(Pair * const pair,const int i,const int j,const int k,const double evdwl,const double ecoul,const double * const fj,const double * const fk,const double * const drji,const double * const drki,ThrData * const thr)664 void ThrOMP::ev_tally3_thr(Pair * const pair, const int i, const int j, const int k,
665 const double evdwl, const double ecoul,
666 const double * const fj, const double * const fk,
667 const double * const drji, const double * const drki,
668 ThrData * const thr)
669 {
670 if (pair->eflag_either) {
671 if (pair->eflag_global) {
672 thr->eng_vdwl += evdwl;
673 thr->eng_coul += ecoul;
674 }
675 if (pair->eflag_atom) {
676 const double epairthird = THIRD * (evdwl + ecoul);
677 thr->eatom_pair[i] += epairthird;
678 thr->eatom_pair[j] += epairthird;
679 thr->eatom_pair[k] += epairthird;
680 }
681 }
682
683 if (pair->vflag_either) {
684 double v[6];
685
686 v[0] = drji[0]*fj[0] + drki[0]*fk[0];
687 v[1] = drji[1]*fj[1] + drki[1]*fk[1];
688 v[2] = drji[2]*fj[2] + drki[2]*fk[2];
689 v[3] = drji[0]*fj[1] + drki[0]*fk[1];
690 v[4] = drji[0]*fj[2] + drki[0]*fk[2];
691 v[5] = drji[1]*fj[2] + drki[1]*fk[2];
692
693 if (pair->vflag_global) v_tally(thr->virial_pair,v);
694
695 if (pair->vflag_atom) {
696 v_tally(thr->vatom_pair[i],THIRD,v);
697 v_tally(thr->vatom_pair[j],THIRD,v);
698 v_tally(thr->vatom_pair[k],THIRD,v);
699 }
700 }
701 }
702
703 /* ----------------------------------------------------------------------
704 tally eng_vdwl and virial into global and per-atom accumulators
705 called by AIREBO potential, newton_pair is always on
706 ------------------------------------------------------------------------- */
707
ev_tally4_thr(Pair * const pair,const int i,const int j,const int k,const int m,const double evdwl,const double * const fi,const double * const fj,const double * const fk,const double * const drim,const double * const drjm,const double * const drkm,ThrData * const thr)708 void ThrOMP::ev_tally4_thr(Pair * const pair, const int i, const int j,
709 const int k, const int m, const double evdwl,
710 const double * const fi, const double * const fj,
711 const double * const fk, const double * const drim,
712 const double * const drjm, const double * const drkm,
713 ThrData * const thr)
714 {
715 double v[6];
716
717 if (pair->eflag_either) {
718 if (pair->eflag_global) thr->eng_vdwl += evdwl;
719 if (pair->eflag_atom) {
720 const double epairfourth = 0.25 * evdwl;
721 thr->eatom_pair[i] += epairfourth;
722 thr->eatom_pair[j] += epairfourth;
723 thr->eatom_pair[k] += epairfourth;
724 thr->eatom_pair[m] += epairfourth;
725 }
726 }
727
728 if (pair->vflag_either) {
729 v[0] = (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
730 v[1] = (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
731 v[2] = (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
732 v[3] = (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
733 v[4] = (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
734 v[5] = (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
735 if (pair->vflag_global) v_tally(thr->virial_pair,v);
736
737 if (pair->vflag_atom) {
738 v[0] *= 0.25;
739 v[1] *= 0.25;
740 v[2] *= 0.25;
741 v[3] *= 0.25;
742 v[4] *= 0.25;
743 v[5] *= 0.25;
744 v_tally(thr->vatom_pair[i],v);
745 v_tally(thr->vatom_pair[j],v);
746 v_tally(thr->vatom_pair[k],v);
747 v_tally(thr->vatom_pair[m],v);
748 }
749 }
750 }
751
752 /* ----------------------------------------------------------------------
753 tally ecoul and virial into each of n atoms in list
754 called by TIP4P potential, newton_pair is always on
755 changes v values by dividing by n
756 ------------------------------------------------------------------------- */
757
ev_tally_list_thr(Pair * const pair,const int key,const int * const list,const double * const v,const double ecoul,const double alpha,ThrData * const thr)758 void ThrOMP::ev_tally_list_thr(Pair * const pair, const int key,
759 const int * const list, const double * const v,
760 const double ecoul, const double alpha,
761 ThrData * const thr)
762 {
763 int i;
764 if (pair->eflag_either) {
765 if (pair->eflag_global) thr->eng_coul += ecoul;
766 if (pair->eflag_atom) {
767 if (key == 0) {
768 thr->eatom_pair[list[0]] += 0.5*ecoul;
769 thr->eatom_pair[list[1]] += 0.5*ecoul;
770 } else if (key == 1) {
771 thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha);
772 thr->eatom_pair[list[1]] += 0.25*ecoul*alpha;
773 thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
774 thr->eatom_pair[list[3]] += 0.5*ecoul;
775 } else if (key == 2) {
776 thr->eatom_pair[list[0]] += 0.5*ecoul;
777 thr->eatom_pair[list[1]] += 0.5*ecoul*(1-alpha);
778 thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
779 thr->eatom_pair[list[3]] += 0.25*ecoul*alpha;
780 } else {
781 thr->eatom_pair[list[0]] += 0.5*ecoul*(1-alpha);
782 thr->eatom_pair[list[1]] += 0.25*ecoul*alpha;
783 thr->eatom_pair[list[2]] += 0.25*ecoul*alpha;
784 thr->eatom_pair[list[3]] += 0.5*ecoul*(1-alpha);
785 thr->eatom_pair[list[4]] += 0.25*ecoul*alpha;
786 thr->eatom_pair[list[5]] += 0.25*ecoul*alpha;
787 }
788 }
789 }
790
791 if (pair->vflag_either) {
792 if (pair->vflag_global)
793 v_tally(thr->virial_pair,v);
794
795 if (pair->vflag_atom) {
796 if (key == 0) {
797 for (i = 0; i <= 5; i++) {
798 thr->vatom_pair[list[0]][i] += 0.5*v[i];
799 thr->vatom_pair[list[1]][i] += 0.5*v[i];
800 }
801 } else if (key == 1) {
802 for (i = 0; i <= 5; i++) {
803 thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha);
804 thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha;
805 thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
806 thr->vatom_pair[list[3]][i] += 0.5*v[i];
807 }
808 } else if (key == 2) {
809 for (i = 0; i <= 5; i++) {
810 thr->vatom_pair[list[0]][i] += 0.5*v[i];
811 thr->vatom_pair[list[1]][i] += 0.5*v[i]*(1-alpha);
812 thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
813 thr->vatom_pair[list[3]][i] += 0.25*v[i]*alpha;
814 }
815 } else {
816 for (i = 0; i <= 5; i++) {
817 thr->vatom_pair[list[0]][i] += 0.5*v[i]*(1-alpha);
818 thr->vatom_pair[list[1]][i] += 0.25*v[i]*alpha;
819 thr->vatom_pair[list[2]][i] += 0.25*v[i]*alpha;
820 thr->vatom_pair[list[3]][i] += 0.5*v[i]*(1-alpha);
821 thr->vatom_pair[list[4]][i] += 0.25*v[i]*alpha;
822 thr->vatom_pair[list[5]][i] += 0.25*v[i]*alpha;
823 }
824 }
825 }
826 }
827 }
828
829 /* ----------------------------------------------------------------------
830 tally energy and virial into global and per-atom accumulators
831 ------------------------------------------------------------------------- */
832
ev_tally_thr(Bond * const bond,const int i,const int j,const int nlocal,const int newton_bond,const double ebond,const double fbond,const double delx,const double dely,const double delz,ThrData * const thr)833 void ThrOMP::ev_tally_thr(Bond * const bond, const int i, const int j, const int nlocal,
834 const int newton_bond, const double ebond, const double fbond,
835 const double delx, const double dely, const double delz,
836 ThrData * const thr)
837 {
838 if (bond->eflag_either) {
839 const double ebondhalf = 0.5*ebond;
840 if (newton_bond) {
841 if (bond->eflag_global)
842 thr->eng_bond += ebond;
843 if (bond->eflag_atom) {
844 thr->eatom_bond[i] += ebondhalf;
845 thr->eatom_bond[j] += ebondhalf;
846 }
847 } else {
848 if (bond->eflag_global) {
849 if (i < nlocal) thr->eng_bond += ebondhalf;
850 if (j < nlocal) thr->eng_bond += ebondhalf;
851 }
852 if (bond->eflag_atom) {
853 if (i < nlocal) thr->eatom_bond[i] += ebondhalf;
854 if (j < nlocal) thr->eatom_bond[j] += ebondhalf;
855 }
856 }
857 }
858
859 if (bond->vflag_either) {
860 double v[6];
861
862 v[0] = delx*delx*fbond;
863 v[1] = dely*dely*fbond;
864 v[2] = delz*delz*fbond;
865 v[3] = delx*dely*fbond;
866 v[4] = delx*delz*fbond;
867 v[5] = dely*delz*fbond;
868
869 if (bond->vflag_global) {
870 if (newton_bond)
871 v_tally(thr->virial_bond,v);
872 else {
873 if (i < nlocal)
874 v_tally(thr->virial_bond,0.5,v);
875 if (j < nlocal)
876 v_tally(thr->virial_bond,0.5,v);
877 }
878 }
879
880 if (bond->vflag_atom) {
881 v[0] *= 0.5;
882 v[1] *= 0.5;
883 v[2] *= 0.5;
884 v[3] *= 0.5;
885 v[4] *= 0.5;
886 v[5] *= 0.5;
887
888 if (newton_bond) {
889 v_tally(thr->vatom_bond[i],v);
890 v_tally(thr->vatom_bond[j],v);
891 } else {
892 if (i < nlocal)
893 v_tally(thr->vatom_bond[i],v);
894 if (j < nlocal)
895 v_tally(thr->vatom_bond[j],v);
896 }
897 }
898 }
899 }
900
901 /* ----------------------------------------------------------------------
902 tally energy and virial into global and per-atom accumulators
903 virial = r1F1 + r2F2 + r3F3 = (r1-r2) F1 + (r3-r2) F3 = del1*f1 + del2*f3
904 ------------------------------------------------------------------------- */
905
ev_tally_thr(Angle * const angle,const int i,const int j,const int k,const int nlocal,const int newton_bond,const double eangle,const double * const f1,const double * const f3,const double delx1,const double dely1,const double delz1,const double delx2,const double dely2,const double delz2,ThrData * const thr)906 void ThrOMP::ev_tally_thr(Angle * const angle, const int i, const int j, const int k,
907 const int nlocal, const int newton_bond, const double eangle,
908 const double * const f1, const double * const f3,
909 const double delx1, const double dely1, const double delz1,
910 const double delx2, const double dely2, const double delz2,
911 ThrData * const thr)
912 {
913 if (angle->eflag_either) {
914 const double eanglethird = THIRD*eangle;
915 if (newton_bond) {
916 if (angle->eflag_global)
917 thr->eng_angle += eangle;
918 if (angle->eflag_atom) {
919 thr->eatom_angle[i] += eanglethird;
920 thr->eatom_angle[j] += eanglethird;
921 thr->eatom_angle[k] += eanglethird;
922 }
923 } else {
924 if (angle->eflag_global) {
925 if (i < nlocal) thr->eng_angle += eanglethird;
926 if (j < nlocal) thr->eng_angle += eanglethird;
927 if (k < nlocal) thr->eng_angle += eanglethird;
928 }
929 if (angle->eflag_atom) {
930 if (i < nlocal) thr->eatom_angle[i] += eanglethird;
931 if (j < nlocal) thr->eatom_angle[j] += eanglethird;
932 if (k < nlocal) thr->eatom_angle[k] += eanglethird;
933 }
934 }
935 }
936
937 if (angle->vflag_either) {
938 double v[6];
939
940 v[0] = delx1*f1[0] + delx2*f3[0];
941 v[1] = dely1*f1[1] + dely2*f3[1];
942 v[2] = delz1*f1[2] + delz2*f3[2];
943 v[3] = delx1*f1[1] + delx2*f3[1];
944 v[4] = delx1*f1[2] + delx2*f3[2];
945 v[5] = dely1*f1[2] + dely2*f3[2];
946
947 if (angle->vflag_global) {
948 if (newton_bond) {
949 v_tally(thr->virial_angle,v);
950 } else {
951 int cnt = 0;
952 if (i < nlocal) ++cnt;
953 if (j < nlocal) ++cnt;
954 if (k < nlocal) ++cnt;
955 v_tally(thr->virial_angle,cnt*THIRD,v);
956 }
957 }
958
959 if (angle->vflag_atom) {
960 v[0] *= THIRD;
961 v[1] *= THIRD;
962 v[2] *= THIRD;
963 v[3] *= THIRD;
964 v[4] *= THIRD;
965 v[5] *= THIRD;
966
967 if (newton_bond) {
968 v_tally(thr->vatom_angle[i],v);
969 v_tally(thr->vatom_angle[j],v);
970 v_tally(thr->vatom_angle[k],v);
971 } else {
972 if (i < nlocal) v_tally(thr->vatom_angle[i],v);
973 if (j < nlocal) v_tally(thr->vatom_angle[j],v);
974 if (k < nlocal) v_tally(thr->vatom_angle[k],v);
975 }
976 }
977 }
978
979 // per-atom centroid virial
980 if (angle->cvflag_atom) {
981 double f2[3], v1[9], v2[9], v3[9];
982 double a1[3], a2[3], a3[3];
983
984 // r0 = (r1+r2+r3)/3
985 // rij = ri-rj
986 // total virial = r10*f1 + r20*f2 + r30*f3
987 // del1: r12
988 // del2: r32
989
990 // a1 = r10 = (2*r12 - r32)/3
991 a1[0] = THIRD*(2*delx1-delx2);
992 a1[1] = THIRD*(2*dely1-dely2);
993 a1[2] = THIRD*(2*delz1-delz2);
994
995 // a2 = r20 = ( -r12 - r32)/3
996 a2[0] = THIRD*(-delx1-delx2);
997 a2[1] = THIRD*(-dely1-dely2);
998 a2[2] = THIRD*(-delz1-delz2);
999
1000 // a3 = r30 = ( -r12 + 2*r32)/3
1001 a3[0] = THIRD*(-delx1+2*delx2);
1002 a3[1] = THIRD*(-dely1+2*dely2);
1003 a3[2] = THIRD*(-delz1+2*delz2);
1004
1005 f2[0] = - f1[0] - f3[0];
1006 f2[1] = - f1[1] - f3[1];
1007 f2[2] = - f1[2] - f3[2];
1008
1009 v1[0] = a1[0]*f1[0];
1010 v1[1] = a1[1]*f1[1];
1011 v1[2] = a1[2]*f1[2];
1012 v1[3] = a1[0]*f1[1];
1013 v1[4] = a1[0]*f1[2];
1014 v1[5] = a1[1]*f1[2];
1015 v1[6] = a1[1]*f1[0];
1016 v1[7] = a1[2]*f1[0];
1017 v1[8] = a1[2]*f1[1];
1018
1019 v2[0] = a2[0]*f2[0];
1020 v2[1] = a2[1]*f2[1];
1021 v2[2] = a2[2]*f2[2];
1022 v2[3] = a2[0]*f2[1];
1023 v2[4] = a2[0]*f2[2];
1024 v2[5] = a2[1]*f2[2];
1025 v2[6] = a2[1]*f2[0];
1026 v2[7] = a2[2]*f2[0];
1027 v2[8] = a2[2]*f2[1];
1028
1029 v3[0] = a3[0]*f3[0];
1030 v3[1] = a3[1]*f3[1];
1031 v3[2] = a3[2]*f3[2];
1032 v3[3] = a3[0]*f3[1];
1033 v3[4] = a3[0]*f3[2];
1034 v3[5] = a3[1]*f3[2];
1035 v3[6] = a3[1]*f3[0];
1036 v3[7] = a3[2]*f3[0];
1037 v3[8] = a3[2]*f3[1];
1038
1039 if (newton_bond) {
1040 v_tally9(thr->cvatom_angle[i],v1);
1041 v_tally9(thr->cvatom_angle[j],v2);
1042 v_tally9(thr->cvatom_angle[k],v3);
1043 } else {
1044 if (i < nlocal) v_tally9(thr->cvatom_angle[i],v1);
1045 if (j < nlocal) v_tally9(thr->cvatom_angle[j],v2);
1046 if (k < nlocal) v_tally9(thr->cvatom_angle[k],v3);
1047 }
1048 }
1049 }
1050
1051 /* ----------------------------------------------------------------------
1052 tally energy and virial from 1-3 repulsion of SDK angle into accumulators
1053 ------------------------------------------------------------------------- */
1054
ev_tally13_thr(Angle * const angle,const int i1,const int i3,const int nlocal,const int newton_bond,const double epair,const double fpair,const double delx,const double dely,const double delz,ThrData * const thr)1055 void ThrOMP::ev_tally13_thr(Angle * const angle, const int i1, const int i3,
1056 const int nlocal, const int newton_bond,
1057 const double epair, const double fpair,
1058 const double delx, const double dely,
1059 const double delz, ThrData * const thr)
1060 {
1061
1062 if (angle->eflag_either) {
1063 const double epairhalf = 0.5 * epair;
1064
1065 if (angle->eflag_global) {
1066 if (newton_bond || i1 < nlocal)
1067 thr->eng_angle += epairhalf;
1068 if (newton_bond || i3 < nlocal)
1069 thr->eng_angle += epairhalf;
1070 }
1071
1072 if (angle->eflag_atom) {
1073 if (newton_bond || i1 < nlocal) thr->eatom_angle[i1] += epairhalf;
1074 if (newton_bond || i3 < nlocal) thr->eatom_angle[i3] += epairhalf;
1075 }
1076 }
1077
1078 if (angle->vflag_either) {
1079 double v[6];
1080 v[0] = delx*delx*fpair;
1081 v[1] = dely*dely*fpair;
1082 v[2] = delz*delz*fpair;
1083 v[3] = delx*dely*fpair;
1084 v[4] = delx*delz*fpair;
1085 v[5] = dely*delz*fpair;
1086
1087 if (angle->vflag_global) {
1088 double * const va = thr->virial_angle;
1089 if (newton_bond || i1 < nlocal) v_tally(va,0.5,v);
1090 if (newton_bond || i3 < nlocal) v_tally(va,0.5,v);
1091 }
1092
1093 if (angle->vflag_atom) {
1094 if (newton_bond || i1 < nlocal) {
1095 double * const va = thr->vatom_angle[i1];
1096 v_tally(va,0.5,v);
1097 }
1098 if (newton_bond || i3 < nlocal) {
1099 double * const va = thr->vatom_angle[i3];
1100 v_tally(va,0.5,v);
1101 }
1102 }
1103 }
1104 }
1105
1106
1107 /* ----------------------------------------------------------------------
1108 tally energy and virial into global and per-atom accumulators
1109 virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
1110 = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
1111 = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
1112 ------------------------------------------------------------------------- */
1113
ev_tally_thr(Dihedral * const dihed,const int i1,const int i2,const int i3,const int i4,const int nlocal,const int newton_bond,const double edihedral,const double * const f1,const double * const f3,const double * const f4,const double vb1x,const double vb1y,const double vb1z,const double vb2x,const double vb2y,const double vb2z,const double vb3x,const double vb3y,const double vb3z,ThrData * const thr)1114 void ThrOMP::ev_tally_thr(Dihedral * const dihed, const int i1, const int i2,
1115 const int i3, const int i4, const int nlocal,
1116 const int newton_bond, const double edihedral,
1117 const double * const f1, const double * const f3,
1118 const double * const f4, const double vb1x,
1119 const double vb1y, const double vb1z, const double vb2x,
1120 const double vb2y, const double vb2z, const double vb3x,
1121 const double vb3y, const double vb3z, ThrData * const thr)
1122 {
1123
1124 if (dihed->eflag_either) {
1125 if (dihed->eflag_global) {
1126 if (newton_bond) {
1127 thr->eng_dihed += edihedral;
1128 } else {
1129 const double edihedralquarter = 0.25*edihedral;
1130 int cnt = 0;
1131 if (i1 < nlocal) ++cnt;
1132 if (i2 < nlocal) ++cnt;
1133 if (i3 < nlocal) ++cnt;
1134 if (i4 < nlocal) ++cnt;
1135 thr->eng_dihed += static_cast<double>(cnt)*edihedralquarter;
1136 }
1137 }
1138 if (dihed->eflag_atom) {
1139 const double edihedralquarter = 0.25*edihedral;
1140 if (newton_bond) {
1141 thr->eatom_dihed[i1] += edihedralquarter;
1142 thr->eatom_dihed[i2] += edihedralquarter;
1143 thr->eatom_dihed[i3] += edihedralquarter;
1144 thr->eatom_dihed[i4] += edihedralquarter;
1145 } else {
1146 if (i1 < nlocal) thr->eatom_dihed[i1] += edihedralquarter;
1147 if (i2 < nlocal) thr->eatom_dihed[i2] += edihedralquarter;
1148 if (i3 < nlocal) thr->eatom_dihed[i3] += edihedralquarter;
1149 if (i4 < nlocal) thr->eatom_dihed[i4] += edihedralquarter;
1150 }
1151 }
1152 }
1153
1154 if (dihed->vflag_either) {
1155 double v[6];
1156 v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
1157 v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
1158 v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
1159 v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
1160 v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
1161 v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
1162
1163 if (dihed->vflag_global) {
1164 if (newton_bond) {
1165 v_tally(thr->virial_dihed,v);
1166 } else {
1167 int cnt = 0;
1168 if (i1 < nlocal) ++cnt;
1169 if (i2 < nlocal) ++cnt;
1170 if (i3 < nlocal) ++cnt;
1171 if (i4 < nlocal) ++cnt;
1172 v_tally(thr->virial_dihed,0.25*static_cast<double>(cnt),v);
1173 }
1174 }
1175
1176 v[0] *= 0.25;
1177 v[1] *= 0.25;
1178 v[2] *= 0.25;
1179 v[3] *= 0.25;
1180 v[4] *= 0.25;
1181 v[5] *= 0.25;
1182
1183 if (dihed->vflag_atom) {
1184 if (newton_bond) {
1185 v_tally(thr->vatom_dihed[i1],v);
1186 v_tally(thr->vatom_dihed[i2],v);
1187 v_tally(thr->vatom_dihed[i3],v);
1188 v_tally(thr->vatom_dihed[i4],v);
1189 } else {
1190 if (i1 < nlocal) v_tally(thr->vatom_dihed[i1],v);
1191 if (i2 < nlocal) v_tally(thr->vatom_dihed[i2],v);
1192 if (i3 < nlocal) v_tally(thr->vatom_dihed[i3],v);
1193 if (i4 < nlocal) v_tally(thr->vatom_dihed[i4],v);
1194 }
1195 }
1196 }
1197
1198 // per-atom centroid virial
1199 if (dihed->cvflag_atom) {
1200 double f2[3], v1[9], v2[9], v3[9], v4[9];
1201 double a1[3], a2[3], a3[3], a4[3];
1202
1203 // r0 = (r1+r2+r3+r4)/4
1204 // rij = ri-rj
1205 // total virial = r10*f1 + r20*f2 + r30*f3 + r40*f4
1206 // vb1: r12
1207 // vb2: r32
1208 // vb3: r43
1209
1210 // a1 = r10 = (3*r12 - 2*r32 - r43)/4
1211 a1[0] = 0.25*(3*vb1x - 2*vb2x - vb3x);
1212 a1[1] = 0.25*(3*vb1y - 2*vb2y - vb3y);
1213 a1[2] = 0.25*(3*vb1z - 2*vb2z - vb3z);
1214
1215 // a2 = r20 = ( -r12 - 2*r32 - r43)/4
1216 a2[0] = 0.25*(-vb1x - 2*vb2x - vb3x);
1217 a2[1] = 0.25*(-vb1y - 2*vb2y - vb3y);
1218 a2[2] = 0.25*(-vb1z - 2*vb2z - vb3z);
1219
1220 // a3 = r30 = ( -r12 + 2*r32 - r43)/4
1221 a3[0] = 0.25*(-vb1x + 2*vb2x - vb3x);
1222 a3[1] = 0.25*(-vb1y + 2*vb2y - vb3y);
1223 a3[2] = 0.25*(-vb1z + 2*vb2z - vb3z);
1224
1225 // a4 = r40 = ( -r12 + 2*r32 + 3*r43)/4
1226 a4[0] = 0.25*(-vb1x + 2*vb2x + 3*vb3x);
1227 a4[1] = 0.25*(-vb1y + 2*vb2y + 3*vb3y);
1228 a4[2] = 0.25*(-vb1z + 2*vb2z + 3*vb3z);
1229
1230 f2[0] = - f1[0] - f3[0] - f4[0];
1231 f2[1] = - f1[1] - f3[1] - f4[1];
1232 f2[2] = - f1[2] - f3[2] - f4[2];
1233
1234 v1[0] = a1[0]*f1[0];
1235 v1[1] = a1[1]*f1[1];
1236 v1[2] = a1[2]*f1[2];
1237 v1[3] = a1[0]*f1[1];
1238 v1[4] = a1[0]*f1[2];
1239 v1[5] = a1[1]*f1[2];
1240 v1[6] = a1[1]*f1[0];
1241 v1[7] = a1[2]*f1[0];
1242 v1[8] = a1[2]*f1[1];
1243
1244 v2[0] = a2[0]*f2[0];
1245 v2[1] = a2[1]*f2[1];
1246 v2[2] = a2[2]*f2[2];
1247 v2[3] = a2[0]*f2[1];
1248 v2[4] = a2[0]*f2[2];
1249 v2[5] = a2[1]*f2[2];
1250 v2[6] = a2[1]*f2[0];
1251 v2[7] = a2[2]*f2[0];
1252 v2[8] = a2[2]*f2[1];
1253
1254 v3[0] = a3[0]*f3[0];
1255 v3[1] = a3[1]*f3[1];
1256 v3[2] = a3[2]*f3[2];
1257 v3[3] = a3[0]*f3[1];
1258 v3[4] = a3[0]*f3[2];
1259 v3[5] = a3[1]*f3[2];
1260 v3[6] = a3[1]*f3[0];
1261 v3[7] = a3[2]*f3[0];
1262 v3[8] = a3[2]*f3[1];
1263
1264 v4[0] = a4[0]*f4[0];
1265 v4[1] = a4[1]*f4[1];
1266 v4[2] = a4[2]*f4[2];
1267 v4[3] = a4[0]*f4[1];
1268 v4[4] = a4[0]*f4[2];
1269 v4[5] = a4[1]*f4[2];
1270 v4[6] = a4[1]*f4[0];
1271 v4[7] = a4[2]*f4[0];
1272 v4[8] = a4[2]*f4[1];
1273
1274 if (newton_bond) {
1275 v_tally9(thr->cvatom_dihed[i1],v1);
1276 v_tally9(thr->cvatom_dihed[i2],v2);
1277 v_tally9(thr->cvatom_dihed[i3],v3);
1278 v_tally9(thr->cvatom_dihed[i4],v4);
1279 } else {
1280 if (i1 < nlocal) v_tally9(thr->cvatom_dihed[i1],v1);
1281 if (i2 < nlocal) v_tally9(thr->cvatom_dihed[i2],v2);
1282 if (i3 < nlocal) v_tally9(thr->cvatom_dihed[i3],v3);
1283 if (i4 < nlocal) v_tally9(thr->cvatom_dihed[i4],v4);
1284 }
1285 }
1286
1287 }
1288
1289 /* ----------------------------------------------------------------------
1290 tally energy and virial into global and per-atom accumulators
1291 virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
1292 = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
1293 = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
1294 ------------------------------------------------------------------------- */
1295
ev_tally_thr(Improper * const imprp,const int i1,const int i2,const int i3,const int i4,const int nlocal,const int newton_bond,const double eimproper,const double * const f1,const double * const f3,const double * const f4,const double vb1x,const double vb1y,const double vb1z,const double vb2x,const double vb2y,const double vb2z,const double vb3x,const double vb3y,const double vb3z,ThrData * const thr)1296 void ThrOMP::ev_tally_thr(Improper * const imprp, const int i1, const int i2,
1297 const int i3, const int i4, const int nlocal,
1298 const int newton_bond, const double eimproper,
1299 const double * const f1, const double * const f3,
1300 const double * const f4, const double vb1x,
1301 const double vb1y, const double vb1z, const double vb2x,
1302 const double vb2y, const double vb2z, const double vb3x,
1303 const double vb3y, const double vb3z, ThrData * const thr)
1304 {
1305
1306 if (imprp->eflag_either) {
1307 if (imprp->eflag_global) {
1308 if (newton_bond) {
1309 thr->eng_imprp += eimproper;
1310 } else {
1311 const double eimproperquarter = 0.25*eimproper;
1312 int cnt = 0;
1313 if (i1 < nlocal) ++cnt;
1314 if (i2 < nlocal) ++cnt;
1315 if (i3 < nlocal) ++cnt;
1316 if (i4 < nlocal) ++cnt;
1317 thr->eng_imprp += static_cast<double>(cnt)*eimproperquarter;
1318 }
1319 }
1320 if (imprp->eflag_atom) {
1321 const double eimproperquarter = 0.25*eimproper;
1322 if (newton_bond) {
1323 thr->eatom_imprp[i1] += eimproperquarter;
1324 thr->eatom_imprp[i2] += eimproperquarter;
1325 thr->eatom_imprp[i3] += eimproperquarter;
1326 thr->eatom_imprp[i4] += eimproperquarter;
1327 } else {
1328 if (i1 < nlocal) thr->eatom_imprp[i1] += eimproperquarter;
1329 if (i2 < nlocal) thr->eatom_imprp[i2] += eimproperquarter;
1330 if (i3 < nlocal) thr->eatom_imprp[i3] += eimproperquarter;
1331 if (i4 < nlocal) thr->eatom_imprp[i4] += eimproperquarter;
1332 }
1333 }
1334 }
1335
1336 if (imprp->vflag_either) {
1337 double v[6];
1338 v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
1339 v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
1340 v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
1341 v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
1342 v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
1343 v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
1344
1345 if (imprp->vflag_global) {
1346 if (newton_bond) {
1347 v_tally(thr->virial_imprp,v);
1348 } else {
1349 int cnt = 0;
1350 if (i1 < nlocal) ++cnt;
1351 if (i2 < nlocal) ++cnt;
1352 if (i3 < nlocal) ++cnt;
1353 if (i4 < nlocal) ++cnt;
1354 v_tally(thr->virial_imprp,0.25*static_cast<double>(cnt),v);
1355 }
1356 }
1357
1358 v[0] *= 0.25;
1359 v[1] *= 0.25;
1360 v[2] *= 0.25;
1361 v[3] *= 0.25;
1362 v[4] *= 0.25;
1363 v[5] *= 0.25;
1364
1365 if (imprp->vflag_atom) {
1366 if (newton_bond) {
1367 v_tally(thr->vatom_imprp[i1],v);
1368 v_tally(thr->vatom_imprp[i2],v);
1369 v_tally(thr->vatom_imprp[i3],v);
1370 v_tally(thr->vatom_imprp[i4],v);
1371 } else {
1372 if (i1 < nlocal) v_tally(thr->vatom_imprp[i1],v);
1373 if (i2 < nlocal) v_tally(thr->vatom_imprp[i2],v);
1374 if (i3 < nlocal) v_tally(thr->vatom_imprp[i3],v);
1375 if (i4 < nlocal) v_tally(thr->vatom_imprp[i4],v);
1376 }
1377 }
1378 }
1379
1380 // per-atom centroid virial
1381 if (imprp->cvflag_atom) {
1382 double f2[3], v1[9], v2[9], v3[9], v4[9];
1383 double a1[3], a2[3], a3[3], a4[3];
1384
1385 // r0 = (r1+r2+r3+r4)/4
1386 // rij = ri-rj
1387 // total virial = r10*f1 + r20*f2 + r30*f3 + r40*f4
1388 // vb1: r12
1389 // vb2: r32
1390 // vb3: r43
1391
1392 // a1 = r10 = (3*r12 - 2*r32 - r43)/4
1393 a1[0] = 0.25*(3*vb1x - 2*vb2x - vb3x);
1394 a1[1] = 0.25*(3*vb1y - 2*vb2y - vb3y);
1395 a1[2] = 0.25*(3*vb1z - 2*vb2z - vb3z);
1396
1397 // a2 = r20 = ( -r12 - 2*r32 - r43)/4
1398 a2[0] = 0.25*(-vb1x - 2*vb2x - vb3x);
1399 a2[1] = 0.25*(-vb1y - 2*vb2y - vb3y);
1400 a2[2] = 0.25*(-vb1z - 2*vb2z - vb3z);
1401
1402 // a3 = r30 = ( -r12 + 2*r32 - r43)/4
1403 a3[0] = 0.25*(-vb1x + 2*vb2x - vb3x);
1404 a3[1] = 0.25*(-vb1y + 2*vb2y - vb3y);
1405 a3[2] = 0.25*(-vb1z + 2*vb2z - vb3z);
1406
1407 // a4 = r40 = ( -r12 + 2*r32 + 3*r43)/4
1408 a4[0] = 0.25*(-vb1x + 2*vb2x + 3*vb3x);
1409 a4[1] = 0.25*(-vb1y + 2*vb2y + 3*vb3y);
1410 a4[2] = 0.25*(-vb1z + 2*vb2z + 3*vb3z);
1411
1412 f2[0] = - f1[0] - f3[0] - f4[0];
1413 f2[1] = - f1[1] - f3[1] - f4[1];
1414 f2[2] = - f1[2] - f3[2] - f4[2];
1415
1416 v1[0] = a1[0]*f1[0];
1417 v1[1] = a1[1]*f1[1];
1418 v1[2] = a1[2]*f1[2];
1419 v1[3] = a1[0]*f1[1];
1420 v1[4] = a1[0]*f1[2];
1421 v1[5] = a1[1]*f1[2];
1422 v1[6] = a1[1]*f1[0];
1423 v1[7] = a1[2]*f1[0];
1424 v1[8] = a1[2]*f1[1];
1425
1426 v2[0] = a2[0]*f2[0];
1427 v2[1] = a2[1]*f2[1];
1428 v2[2] = a2[2]*f2[2];
1429 v2[3] = a2[0]*f2[1];
1430 v2[4] = a2[0]*f2[2];
1431 v2[5] = a2[1]*f2[2];
1432 v2[6] = a2[1]*f2[0];
1433 v2[7] = a2[2]*f2[0];
1434 v2[8] = a2[2]*f2[1];
1435
1436 v3[0] = a3[0]*f3[0];
1437 v3[1] = a3[1]*f3[1];
1438 v3[2] = a3[2]*f3[2];
1439 v3[3] = a3[0]*f3[1];
1440 v3[4] = a3[0]*f3[2];
1441 v3[5] = a3[1]*f3[2];
1442 v3[6] = a3[1]*f3[0];
1443 v3[7] = a3[2]*f3[0];
1444 v3[8] = a3[2]*f3[1];
1445
1446 v4[0] = a4[0]*f4[0];
1447 v4[1] = a4[1]*f4[1];
1448 v4[2] = a4[2]*f4[2];
1449 v4[3] = a4[0]*f4[1];
1450 v4[4] = a4[0]*f4[2];
1451 v4[5] = a4[1]*f4[2];
1452 v4[6] = a4[1]*f4[0];
1453 v4[7] = a4[2]*f4[0];
1454 v4[8] = a4[2]*f4[1];
1455
1456 if (newton_bond) {
1457 v_tally9(thr->cvatom_imprp[i1],v1);
1458 v_tally9(thr->cvatom_imprp[i2],v2);
1459 v_tally9(thr->cvatom_imprp[i3],v3);
1460 v_tally9(thr->cvatom_imprp[i4],v4);
1461 } else {
1462 if (i1 < nlocal) v_tally9(thr->cvatom_imprp[i1],v1);
1463 if (i2 < nlocal) v_tally9(thr->cvatom_imprp[i2],v2);
1464 if (i3 < nlocal) v_tally9(thr->cvatom_imprp[i3],v3);
1465 if (i4 < nlocal) v_tally9(thr->cvatom_imprp[i4],v4);
1466 }
1467 }
1468
1469 }
1470
1471 /* ----------------------------------------------------------------------
1472 tally virial into per-atom accumulators
1473 called by AIREBO potential, newton_pair is always on
1474 fpair is magnitude of force on atom I
1475 ------------------------------------------------------------------------- */
1476
v_tally2_thr(Pair * const pair,const int i,const int j,const double fpair,const double * const drij,ThrData * const thr)1477 void ThrOMP::v_tally2_thr(Pair *const pair, const int i, const int j, const double fpair,
1478 const double * const drij, ThrData * const thr)
1479 {
1480 double v[6];
1481
1482 v[0] = drij[0]*drij[0]*fpair;
1483 v[1] = drij[1]*drij[1]*fpair;
1484 v[2] = drij[2]*drij[2]*fpair;
1485 v[3] = drij[0]*drij[1]*fpair;
1486 v[4] = drij[0]*drij[2]*fpair;
1487 v[5] = drij[1]*drij[2]*fpair;
1488 if (pair->vflag_global) v_tally(thr->virial_pair,v);
1489
1490 if (pair->vflag_atom) {
1491 v[0] *= 0.5;
1492 v[1] *= 0.5;
1493 v[2] *= 0.5;
1494 v[3] *= 0.5;
1495 v[4] *= 0.5;
1496 v[5] *= 0.5;
1497 v_tally(thr->vatom_pair[i],v);
1498 v_tally(thr->vatom_pair[j],v);
1499 }
1500 }
1501
1502 /* ----------------------------------------------------------------------
1503 tally virial into per-atom accumulators
1504 called by RexaFF potential, newton_pair is always on
1505 fi is magnitude of force on atom i, deli is the direction
1506 note that the other atom (j) is not updated, due to newton on
1507 ------------------------------------------------------------------------- */
1508
v_tally2_newton_thr(Pair * const pair,const int i,const double * const fi,const double * const deli,ThrData * const thr)1509 void ThrOMP::v_tally2_newton_thr(Pair *const pair, const int i, const double * const fi,
1510 const double * const deli, ThrData * const thr)
1511 {
1512 double v[6];
1513
1514 v[0] = deli[0]*fi[0];
1515 v[1] = deli[1]*fi[1];
1516 v[2] = deli[2]*fi[2];
1517 v[3] = deli[0]*fi[1];
1518 v[4] = deli[0]*fi[2];
1519 v[5] = deli[1]*fi[2];
1520 if (pair->vflag_global) v_tally(thr->virial_pair,v);
1521 if (pair->vflag_atom) v_tally(thr->vatom_pair[i],v);
1522 }
1523
1524 /* ----------------------------------------------------------------------
1525 tally virial into per-atom accumulators
1526 called by AIREBO and Tersoff potential, newton_pair is always on
1527 ------------------------------------------------------------------------- */
1528
v_tally3_thr(Pair * const pair,const int i,const int j,const int k,const double * const fi,const double * const fj,const double * const drik,const double * const drjk,ThrData * const thr)1529 void ThrOMP::v_tally3_thr(Pair *const pair, const int i, const int j, const int k,
1530 const double * const fi, const double * const fj,
1531 const double * const drik, const double * const drjk,
1532 ThrData * const thr)
1533 {
1534 double v[6];
1535
1536 v[0] = (drik[0]*fi[0] + drjk[0]*fj[0]);
1537 v[1] = (drik[1]*fi[1] + drjk[1]*fj[1]);
1538 v[2] = (drik[2]*fi[2] + drjk[2]*fj[2]);
1539 v[3] = (drik[0]*fi[1] + drjk[0]*fj[1]);
1540 v[4] = (drik[0]*fi[2] + drjk[0]*fj[2]);
1541 v[5] = (drik[1]*fi[2] + drjk[1]*fj[2]);
1542 if (pair->vflag_global) v_tally(thr->virial_pair,v);
1543
1544 if (pair->vflag_atom) {
1545 v[0] *= THIRD;
1546 v[1] *= THIRD;
1547 v[2] *= THIRD;
1548 v[3] *= THIRD;
1549 v[4] *= THIRD;
1550 v[5] *= THIRD;
1551 v_tally(thr->vatom_pair[i],v);
1552 v_tally(thr->vatom_pair[j],v);
1553 v_tally(thr->vatom_pair[k],v);
1554 }
1555 }
1556
1557 /* ----------------------------------------------------------------------
1558 tally virial into per-atom accumulators
1559 called by AIREBO potential, newton_pair is always on
1560 ------------------------------------------------------------------------- */
1561
v_tally4_thr(Pair * const pair,const int i,const int j,const int k,const int m,const double * const fi,const double * const fj,const double * const fk,const double * const drim,const double * const drjm,const double * const drkm,ThrData * const thr)1562 void ThrOMP::v_tally4_thr(Pair *const pair, const int i, const int j, const int k, const int m,
1563 const double * const fi, const double * const fj,
1564 const double * const fk, const double * const drim,
1565 const double * const drjm, const double * const drkm,
1566 ThrData * const thr)
1567 {
1568 double v[6];
1569
1570 v[0] = (drim[0]*fi[0] + drjm[0]*fj[0] + drkm[0]*fk[0]);
1571 v[1] = (drim[1]*fi[1] + drjm[1]*fj[1] + drkm[1]*fk[1]);
1572 v[2] = (drim[2]*fi[2] + drjm[2]*fj[2] + drkm[2]*fk[2]);
1573 v[3] = (drim[0]*fi[1] + drjm[0]*fj[1] + drkm[0]*fk[1]);
1574 v[4] = (drim[0]*fi[2] + drjm[0]*fj[2] + drkm[0]*fk[2]);
1575 v[5] = (drim[1]*fi[2] + drjm[1]*fj[2] + drkm[1]*fk[2]);
1576 if (pair->vflag_global) v_tally(thr->virial_pair,v);
1577
1578 if (pair->vflag_atom) {
1579 v[0] *= 0.25;
1580 v[1] *= 0.25;
1581 v[2] *= 0.25;
1582 v[3] *= 0.25;
1583 v[4] *= 0.25;
1584 v[5] *= 0.25;
1585 v_tally(thr->vatom_pair[i],v);
1586 v_tally(thr->vatom_pair[j],v);
1587 v_tally(thr->vatom_pair[k],v);
1588 v_tally(thr->vatom_pair[m],v);
1589 }
1590 }
1591
1592 /* ---------------------------------------------------------------------- */
1593
memory_usage_thr()1594 double ThrOMP::memory_usage_thr()
1595 {
1596 double bytes=0.0;
1597
1598 return bytes;
1599 }
1600