1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2011-2019,2020,2021, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
10 *
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
15 *
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 *
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
33 *
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
36 */
37 /*! \internal \file
38 *
39 * \brief Implements the integrator for normal molecular dynamics simulations
40 *
41 * \author David van der Spoel <david.vanderspoel@icm.uu.se>
42 * \ingroup module_mdrun
43 */
44 #include "gmxpre.h"
45
46 #include <cinttypes>
47 #include <cmath>
48 #include <cstdio>
49 #include <cstdlib>
50
51 #include <algorithm>
52 #include <memory>
53 #include <numeric>
54
55 #include "gromacs/applied_forces/awh/awh.h"
56 #include "gromacs/commandline/filenm.h"
57 #include "gromacs/domdec/collect.h"
58 #include "gromacs/domdec/dlbtiming.h"
59 #include "gromacs/domdec/domdec.h"
60 #include "gromacs/domdec/domdec_network.h"
61 #include "gromacs/domdec/domdec_struct.h"
62 #include "gromacs/domdec/gpuhaloexchange.h"
63 #include "gromacs/domdec/mdsetup.h"
64 #include "gromacs/domdec/partition.h"
65 #include "gromacs/essentialdynamics/edsam.h"
66 #include "gromacs/ewald/pme_load_balancing.h"
67 #include "gromacs/ewald/pme_pp.h"
68 #include "gromacs/fileio/trxio.h"
69 #include "gromacs/gmxlib/network.h"
70 #include "gromacs/gmxlib/nrnb.h"
71 #include "gromacs/gpu_utils/device_stream_manager.h"
72 #include "gromacs/gpu_utils/gpu_utils.h"
73 #include "gromacs/math/units.h"
74 #include "gromacs/imd/imd.h"
75 #include "gromacs/listed_forces/listed_forces.h"
76 #include "gromacs/math/functions.h"
77 #include "gromacs/math/invertmatrix.h"
78 #include "gromacs/math/vec.h"
79 #include "gromacs/math/vectypes.h"
80 #include "gromacs/mdlib/checkpointhandler.h"
81 #include "gromacs/mdlib/compute_io.h"
82 #include "gromacs/mdlib/constr.h"
83 #include "gromacs/mdlib/coupling.h"
84 #include "gromacs/mdlib/ebin.h"
85 #include "gromacs/mdlib/enerdata_utils.h"
86 #include "gromacs/mdlib/energyoutput.h"
87 #include "gromacs/mdlib/expanded.h"
88 #include "gromacs/mdlib/force.h"
89 #include "gromacs/mdlib/force_flags.h"
90 #include "gromacs/mdlib/forcerec.h"
91 #include "gromacs/mdlib/freeenergyparameters.h"
92 #include "gromacs/mdlib/md_support.h"
93 #include "gromacs/mdlib/mdatoms.h"
94 #include "gromacs/mdlib/mdoutf.h"
95 #include "gromacs/mdlib/membed.h"
96 #include "gromacs/mdlib/resethandler.h"
97 #include "gromacs/mdlib/sighandler.h"
98 #include "gromacs/mdlib/simulationsignal.h"
99 #include "gromacs/mdlib/stat.h"
100 #include "gromacs/mdlib/stophandler.h"
101 #include "gromacs/mdlib/tgroup.h"
102 #include "gromacs/mdlib/trajectory_writing.h"
103 #include "gromacs/mdlib/update.h"
104 #include "gromacs/mdlib/update_constrain_gpu.h"
105 #include "gromacs/mdlib/vcm.h"
106 #include "gromacs/mdlib/vsite.h"
107 #include "gromacs/mdrunutility/handlerestart.h"
108 #include "gromacs/mdrunutility/multisim.h"
109 #include "gromacs/mdrunutility/printtime.h"
110 #include "gromacs/mdtypes/awh_history.h"
111 #include "gromacs/mdtypes/awh_params.h"
112 #include "gromacs/mdtypes/commrec.h"
113 #include "gromacs/mdtypes/df_history.h"
114 #include "gromacs/mdtypes/energyhistory.h"
115 #include "gromacs/mdtypes/fcdata.h"
116 #include "gromacs/mdtypes/forcebuffers.h"
117 #include "gromacs/mdtypes/forcerec.h"
118 #include "gromacs/mdtypes/group.h"
119 #include "gromacs/mdtypes/inputrec.h"
120 #include "gromacs/mdtypes/interaction_const.h"
121 #include "gromacs/mdtypes/md_enums.h"
122 #include "gromacs/mdtypes/mdatom.h"
123 #include "gromacs/mdtypes/mdrunoptions.h"
124 #include "gromacs/mdtypes/multipletimestepping.h"
125 #include "gromacs/mdtypes/observableshistory.h"
126 #include "gromacs/mdtypes/pullhistory.h"
127 #include "gromacs/mdtypes/simulation_workload.h"
128 #include "gromacs/mdtypes/state.h"
129 #include "gromacs/mdtypes/state_propagator_data_gpu.h"
130 #include "gromacs/modularsimulator/energydata.h"
131 #include "gromacs/nbnxm/gpu_data_mgmt.h"
132 #include "gromacs/nbnxm/nbnxm.h"
133 #include "gromacs/pbcutil/pbc.h"
134 #include "gromacs/pulling/output.h"
135 #include "gromacs/pulling/pull.h"
136 #include "gromacs/swap/swapcoords.h"
137 #include "gromacs/timing/wallcycle.h"
138 #include "gromacs/timing/walltime_accounting.h"
139 #include "gromacs/topology/atoms.h"
140 #include "gromacs/topology/idef.h"
141 #include "gromacs/topology/mtop_util.h"
142 #include "gromacs/topology/topology.h"
143 #include "gromacs/trajectory/trajectoryframe.h"
144 #include "gromacs/utility/basedefinitions.h"
145 #include "gromacs/utility/cstringutil.h"
146 #include "gromacs/utility/fatalerror.h"
147 #include "gromacs/utility/logger.h"
148 #include "gromacs/utility/real.h"
149 #include "gromacs/utility/smalloc.h"
150
151 #include "legacysimulator.h"
152 #include "replicaexchange.h"
153 #include "shellfc.h"
154
155 /* PLUMED */
156 #include "../../../Plumed.h"
157 extern int plumedswitch;
158 extern plumed plumedmain;
159 /* END PLUMED */
160
161 /* PLUMED HREX */
162 extern int plumed_hrex;
163 /* END PLUMED HREX */
164
165 using gmx::SimulationSignaller;
166
do_md()167 void gmx::LegacySimulator::do_md()
168 {
169 // TODO Historically, the EM and MD "integrators" used different
170 // names for the t_inputrec *parameter, but these must have the
171 // same name, now that it's a member of a struct. We use this ir
172 // alias to avoid a large ripple of nearly useless changes.
173 // t_inputrec is being replaced by IMdpOptionsProvider, so this
174 // will go away eventually.
175 t_inputrec* ir = inputrec;
176 int64_t step, step_rel;
177 double t, t0 = ir->init_t;
178 gmx_bool bGStatEveryStep, bGStat, bCalcVir, bCalcEnerStep, bCalcEner;
179 gmx_bool bNS = FALSE, bNStList, bStopCM, bFirstStep, bInitStep, bLastStep = FALSE;
180 gmx_bool bDoDHDL = FALSE, bDoFEP = FALSE, bDoExpanded = FALSE;
181 gmx_bool do_ene, do_log, do_verbose;
182 gmx_bool bMasterState;
183 unsigned int force_flags;
184 tensor force_vir = { { 0 } }, shake_vir = { { 0 } }, total_vir = { { 0 } }, pres = { { 0 } };
185 int i, m;
186 rvec mu_tot;
187 matrix pressureCouplingMu, M;
188 gmx_repl_ex_t repl_ex = nullptr;
189 gmx_global_stat_t gstat;
190 gmx_shellfc_t* shellfc;
191 gmx_bool bSumEkinhOld, bDoReplEx, bExchanged, bNeedRepartition;
192 gmx_bool bTemp, bPres, bTrotter;
193 real dvdl_constr;
194 std::vector<RVec> cbuf;
195 matrix lastbox;
196 int lamnew = 0;
197 /* for FEP */
198 int nstfep = 0;
199 double cycles;
200 real saved_conserved_quantity = 0;
201 real last_ekin = 0;
202 t_extmass MassQ;
203 char sbuf[STEPSTRSIZE], sbuf2[STEPSTRSIZE];
204
205 /* PME load balancing data for GPU kernels */
206 gmx_bool bPMETune = FALSE;
207 gmx_bool bPMETunePrinting = FALSE;
208
209 bool bInteractiveMDstep = false;
210
211 /* PLUMED */
212 int plumedNeedsEnergy=0;
213 int plumedWantsToStop=0;
214 matrix plumed_vir;
215 real lambdaForce=0;
216 real realFepState=0;
217 /* END PLUMED */
218
219 /* Domain decomposition could incorrectly miss a bonded
220 interaction, but checking for that requires a global
221 communication stage, which does not otherwise happen in DD
222 code. So we do that alongside the first global energy reduction
223 after a new DD is made. These variables handle whether the
224 check happens, and the result it returns. */
225 bool shouldCheckNumberOfBondedInteractions = false;
226 int totalNumberOfBondedInteractions = -1;
227
228 SimulationSignals signals;
229 // Most global communnication stages don't propagate mdrun
230 // signals, and will use this object to achieve that.
231 SimulationSignaller nullSignaller(nullptr, nullptr, nullptr, false, false);
232
233 if (!mdrunOptions.writeConfout)
234 {
235 // This is on by default, and the main known use case for
236 // turning it off is for convenience in benchmarking, which is
237 // something that should not show up in the general user
238 // interface.
239 GMX_LOG(mdlog.info)
240 .asParagraph()
241 .appendText(
242 "The -noconfout functionality is deprecated, and may be removed in a "
243 "future version.");
244 }
245
246 /* md-vv uses averaged full step velocities for T-control
247 md-vv-avek uses averaged half step velocities for T-control (but full step ekin for P control)
248 md uses averaged half step kinetic energies to determine temperature unless defined otherwise by GMX_EKIN_AVE_VEL; */
249 bTrotter = (EI_VV(ir->eI)
250 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir) || inputrecNvtTrotter(ir)));
251
252 const bool bRerunMD = false;
253
254 int nstglobalcomm = computeGlobalCommunicationPeriod(mdlog, ir, cr);
255 bGStatEveryStep = (nstglobalcomm == 1);
256
257 const SimulationGroups* groups = &top_global->groups;
258
259 std::unique_ptr<EssentialDynamics> ed = nullptr;
260 if (opt2bSet("-ei", nfile, fnm))
261 {
262 /* Initialize essential dynamics sampling */
263 ed = init_edsam(mdlog, opt2fn_null("-ei", nfile, fnm), opt2fn("-eo", nfile, fnm), top_global,
264 ir, cr, constr, state_global, observablesHistory, oenv, startingBehavior);
265 }
266 else if (observablesHistory->edsamHistory)
267 {
268 gmx_fatal(FARGS,
269 "The checkpoint is from a run with essential dynamics sampling, "
270 "but the current run did not specify the -ei option. "
271 "Either specify the -ei option to mdrun, or do not use this checkpoint file.");
272 }
273
274 int* fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
275 gmx::ArrayRef<real> lambda = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
276 initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
277 Update upd(*ir, deform);
278 const bool doSimulatedAnnealing = initSimulatedAnnealing(ir, &upd);
279 const bool useReplicaExchange = (replExParams.exchangeInterval > 0);
280
281 const t_fcdata& fcdata = *fr->fcdata;
282
283 bool simulationsShareState = false;
284 int nstSignalComm = nstglobalcomm;
285 {
286 // TODO This implementation of ensemble orientation restraints is nasty because
287 // a user can't just do multi-sim with single-sim orientation restraints.
288 bool usingEnsembleRestraints =
289 (fcdata.disres->nsystems > 1) || ((ms != nullptr) && (fcdata.orires->nr != 0));
290 bool awhUsesMultiSim = (ir->bDoAwh && ir->awhParams->shareBiasMultisim && (ms != nullptr));
291
292 // Replica exchange, ensemble restraints and AWH need all
293 // simulations to remain synchronized, so they need
294 // checkpoints and stop conditions to act on the same step, so
295 // the propagation of such signals must take place between
296 // simulations, not just within simulations.
297 // TODO: Make algorithm initializers set these flags.
298 simulationsShareState = useReplicaExchange || usingEnsembleRestraints || awhUsesMultiSim || (plumedswitch && ms); // PLUMED hack, if we have multiple sim and plumed we usually want them to be in sync
299
300 if (simulationsShareState)
301 {
302 // Inter-simulation signal communication does not need to happen
303 // often, so we use a minimum of 200 steps to reduce overhead.
304 const int c_minimumInterSimulationSignallingInterval = 200;
305 nstSignalComm = ((c_minimumInterSimulationSignallingInterval + nstglobalcomm - 1) / nstglobalcomm)
306 * nstglobalcomm;
307 }
308 }
309
310 if (startingBehavior != StartingBehavior::RestartWithAppending)
311 {
312 pleaseCiteCouplingAlgorithms(fplog, *ir);
313 }
314 gmx_mdoutf* outf =
315 init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider, mdModulesNotifier, ir,
316 top_global, oenv, wcycle, startingBehavior, simulationsShareState, ms);
317 gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, ir, pull_work,
318 mdoutf_get_fp_dhdl(outf), false, startingBehavior,
319 simulationsShareState, mdModulesNotifier);
320
321 gstat = global_stat_init(ir);
322
323 const auto& simulationWork = runScheduleWork->simulationWork;
324 const bool useGpuForPme = simulationWork.useGpuPme;
325 const bool useGpuForNonbonded = simulationWork.useGpuNonbonded;
326 const bool useGpuForBufferOps = simulationWork.useGpuBufferOps;
327 const bool useGpuForUpdate = simulationWork.useGpuUpdate;
328
329 /* Check for polarizable models and flexible constraints */
330 shellfc = init_shell_flexcon(fplog, top_global, constr ? constr->numFlexibleConstraints() : 0,
331 ir->nstcalcenergy, DOMAINDECOMP(cr), useGpuForPme);
332
333 {
334 double io = compute_io(ir, top_global->natoms, *groups, energyOutput.numEnergyTerms(), 1);
335 if ((io > 2000) && MASTER(cr))
336 {
337 fprintf(stderr, "\nWARNING: This run will generate roughly %.0f Mb of data\n\n", io);
338 }
339 }
340
341 // Local state only becomes valid now.
342 std::unique_ptr<t_state> stateInstance;
343 t_state* state;
344
345 gmx_localtop_t top(top_global->ffparams);
346
347 auto mdatoms = mdAtoms->mdatoms();
348
349 ForceBuffers f(fr->useMts, ((useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
350 ? PinningPolicy::PinnedIfSupported
351 : PinningPolicy::CannotBePinned);
352 if (DOMAINDECOMP(cr))
353 {
354 stateInstance = std::make_unique<t_state>();
355 state = stateInstance.get();
356 dd_init_local_state(cr->dd, state_global, state);
357
358 /* Distribute the charge groups over the nodes from the master node */
359 dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
360 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
361 nrnb, nullptr, FALSE);
362 shouldCheckNumberOfBondedInteractions = true;
363 upd.setNumAtoms(state->natoms);
364 }
365 else
366 {
367 state_change_natoms(state_global, state_global->natoms);
368 /* Copy the pointer to the global state */
369 state = state_global;
370
371 /* Generate and initialize new topology */
372 mdAlgorithmsSetupAtomData(cr, ir, *top_global, &top, fr, &f, mdAtoms, constr, vsite, shellfc);
373
374 upd.setNumAtoms(state->natoms);
375 }
376
377 std::unique_ptr<UpdateConstrainGpu> integrator;
378
379 StatePropagatorDataGpu* stateGpu = fr->stateGpu;
380
381 // TODO: the assertions below should be handled by UpdateConstraintsBuilder.
382 if (useGpuForUpdate)
383 {
384 GMX_RELEASE_ASSERT(!DOMAINDECOMP(cr) || ddUsesUpdateGroups(*cr->dd) || constr == nullptr
385 || constr->numConstraintsTotal() == 0,
386 "Constraints in domain decomposition are only supported with update "
387 "groups if using GPU update.\n");
388 GMX_RELEASE_ASSERT(ir->eConstrAlg != econtSHAKE || constr == nullptr
389 || constr->numConstraintsTotal() == 0,
390 "SHAKE is not supported with GPU update.");
391 GMX_RELEASE_ASSERT(useGpuForPme || (useGpuForNonbonded && simulationWork.useGpuBufferOps),
392 "Either PME or short-ranged non-bonded interaction tasks must run on "
393 "the GPU to use GPU update.\n");
394 GMX_RELEASE_ASSERT(ir->eI == eiMD,
395 "Only the md integrator is supported with the GPU update.\n");
396 GMX_RELEASE_ASSERT(
397 ir->etc != etcNOSEHOOVER,
398 "Nose-Hoover temperature coupling is not supported with the GPU update.\n");
399 GMX_RELEASE_ASSERT(
400 ir->epc == epcNO || ir->epc == epcPARRINELLORAHMAN || ir->epc == epcBERENDSEN
401 || ir->epc == epcCRESCALE,
402 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are supported "
403 "with the GPU update.\n");
404 GMX_RELEASE_ASSERT(!mdatoms->haveVsites,
405 "Virtual sites are not supported with the GPU update.\n");
406 GMX_RELEASE_ASSERT(ed == nullptr,
407 "Essential dynamics is not supported with the GPU update.\n");
408 GMX_RELEASE_ASSERT(!ir->bPull || !pull_have_constraint(*ir->pull),
409 "Constraints pulling is not supported with the GPU update.\n");
410 GMX_RELEASE_ASSERT(fcdata.orires->nr == 0,
411 "Orientation restraints are not supported with the GPU update.\n");
412 GMX_RELEASE_ASSERT(
413 ir->efep == efepNO
414 || (!haveFepPerturbedMasses(*top_global) && !havePerturbedConstraints(*top_global)),
415 "Free energy perturbation of masses and constraints are not supported with the GPU "
416 "update.");
417
418 if (constr != nullptr && constr->numConstraintsTotal() > 0)
419 {
420 GMX_LOG(mdlog.info)
421 .asParagraph()
422 .appendText("Updating coordinates and applying constraints on the GPU.");
423 }
424 else
425 {
426 GMX_LOG(mdlog.info).asParagraph().appendText("Updating coordinates on the GPU.");
427 }
428 GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
429 "Device stream manager should be initialized in order to use GPU "
430 "update-constraints.");
431 GMX_RELEASE_ASSERT(
432 fr->deviceStreamManager->streamIsValid(gmx::DeviceStreamType::UpdateAndConstraints),
433 "Update stream should be initialized in order to use GPU "
434 "update-constraints.");
435 integrator = std::make_unique<UpdateConstrainGpu>(
436 *ir, *top_global, fr->deviceStreamManager->context(),
437 fr->deviceStreamManager->stream(gmx::DeviceStreamType::UpdateAndConstraints),
438 stateGpu->xUpdatedOnDevice(), wcycle);
439
440 integrator->setPbc(PbcType::Xyz, state->box);
441 }
442
443 if (useGpuForPme || (useGpuForNonbonded && useGpuForBufferOps) || useGpuForUpdate)
444 {
445 changePinningPolicy(&state->x, PinningPolicy::PinnedIfSupported);
446 }
447 if (useGpuForUpdate)
448 {
449 changePinningPolicy(&state->v, PinningPolicy::PinnedIfSupported);
450 }
451
452 // NOTE: The global state is no longer used at this point.
453 // But state_global is still used as temporary storage space for writing
454 // the global state to file and potentially for replica exchange.
455 // (Global topology should persist.)
456
457 update_mdatoms(mdatoms, state->lambda[efptMASS]);
458
459 if (ir->bExpanded)
460 {
461 /* Check nstexpanded here, because the grompp check was broken */
462 if (ir->expandedvals->nstexpanded % ir->nstcalcenergy != 0)
463 {
464 gmx_fatal(FARGS,
465 "With expanded ensemble, nstexpanded should be a multiple of nstcalcenergy");
466 }
467 init_expanded_ensemble(startingBehavior != StartingBehavior::NewSimulation, ir, state->dfhist);
468 }
469
470 if (MASTER(cr))
471 {
472 EnergyData::initializeEnergyHistory(startingBehavior, observablesHistory, &energyOutput);
473 }
474
475 preparePrevStepPullCom(ir, pull_work, mdatoms->massT, state, state_global, cr,
476 startingBehavior != StartingBehavior::NewSimulation);
477
478 // TODO: Remove this by converting AWH into a ForceProvider
479 auto awh = prepareAwhModule(fplog, *ir, state_global, cr, ms,
480 startingBehavior != StartingBehavior::NewSimulation,
481 shellfc != nullptr, opt2fn("-awh", nfile, fnm), pull_work);
482
483 if (useReplicaExchange && MASTER(cr))
484 {
485 repl_ex = init_replica_exchange(fplog, ms, top_global->natoms, ir, replExParams);
486 }
487 /* PME tuning is only supported in the Verlet scheme, with PME for
488 * Coulomb. It is not supported with only LJ PME. */
489 bPMETune = (mdrunOptions.tunePme && EEL_PME(fr->ic->eeltype) && !mdrunOptions.reproducible
490 && ir->cutoff_scheme != ecutsGROUP);
491
492 pme_load_balancing_t* pme_loadbal = nullptr;
493 if (bPMETune)
494 {
495 pme_loadbal_init(&pme_loadbal, cr, mdlog, *ir, state->box, *fr->ic, *fr->nbv, fr->pmedata,
496 fr->nbv->useGpu());
497 }
498
499 if (!ir->bContinuation)
500 {
501 if (state->flags & (1U << estV))
502 {
503 auto v = makeArrayRef(state->v);
504 /* Set the velocities of vsites, shells and frozen atoms to zero */
505 for (i = 0; i < mdatoms->homenr; i++)
506 {
507 if (mdatoms->ptype[i] == eptVSite || mdatoms->ptype[i] == eptShell)
508 {
509 clear_rvec(v[i]);
510 }
511 else if (mdatoms->cFREEZE)
512 {
513 for (m = 0; m < DIM; m++)
514 {
515 if (ir->opts.nFreeze[mdatoms->cFREEZE[i]][m])
516 {
517 v[i][m] = 0;
518 }
519 }
520 }
521 }
522 }
523
524 if (constr)
525 {
526 /* Constrain the initial coordinates and velocities */
527 do_constrain_first(fplog, constr, ir, mdatoms->nr, mdatoms->homenr,
528 state->x.arrayRefWithPadding(), state->v.arrayRefWithPadding(),
529 state->box, state->lambda[efptBONDED]);
530 }
531 if (vsite)
532 {
533 /* Construct the virtual sites for the initial configuration */
534 vsite->construct(state->x, ir->delta_t, {}, state->box);
535 }
536 }
537
538 if (ir->efep != efepNO)
539 {
540 /* Set free energy calculation frequency as the greatest common
541 * denominator of nstdhdl and repl_ex_nst. */
542 nstfep = ir->fepvals->nstdhdl;
543 if (ir->bExpanded)
544 {
545 nstfep = std::gcd(ir->expandedvals->nstexpanded, nstfep);
546 }
547 if (useReplicaExchange)
548 {
549 nstfep = std::gcd(replExParams.exchangeInterval, nstfep);
550 }
551 if (ir->bDoAwh)
552 {
553 nstfep = std::gcd(ir->awhParams->nstSampleCoord, nstfep);
554 }
555 }
556
557 /* Be REALLY careful about what flags you set here. You CANNOT assume
558 * this is the first step, since we might be restarting from a checkpoint,
559 * and in that case we should not do any modifications to the state.
560 */
561 bStopCM = (ir->comm_mode != ecmNO && !ir->bContinuation);
562
563 // When restarting from a checkpoint, it can be appropriate to
564 // initialize ekind from quantities in the checkpoint. Otherwise,
565 // compute_globals must initialize ekind before the simulation
566 // starts/restarts. However, only the master rank knows what was
567 // found in the checkpoint file, so we have to communicate in
568 // order to coordinate the restart.
569 //
570 // TODO Consider removing this communication if/when checkpoint
571 // reading directly follows .tpr reading, because all ranks can
572 // agree on hasReadEkinState at that time.
573 bool hasReadEkinState = MASTER(cr) ? state_global->ekinstate.hasReadEkinState : false;
574 if (PAR(cr))
575 {
576 gmx_bcast(sizeof(hasReadEkinState), &hasReadEkinState, cr->mpi_comm_mygroup);
577 }
578 if (hasReadEkinState)
579 {
580 restore_ekinstate_from_state(cr, ekind, &state_global->ekinstate);
581 }
582
583 unsigned int cglo_flags =
584 (CGLO_TEMPERATURE | CGLO_GSTAT | (EI_VV(ir->eI) ? CGLO_PRESSURE : 0)
585 | (EI_VV(ir->eI) ? CGLO_CONSTRAINT : 0) | (hasReadEkinState ? CGLO_READEKIN : 0));
586
587 bSumEkinhOld = FALSE;
588
589 t_vcm vcm(top_global->groups, *ir);
590 reportComRemovalInfo(fplog, vcm);
591
592 /* To minimize communication, compute_globals computes the COM velocity
593 * and the kinetic energy for the velocities without COM motion removed.
594 * Thus to get the kinetic energy without the COM contribution, we need
595 * to call compute_globals twice.
596 */
597 for (int cgloIteration = 0; cgloIteration < (bStopCM ? 2 : 1); cgloIteration++)
598 {
599 unsigned int cglo_flags_iteration = cglo_flags;
600 if (bStopCM && cgloIteration == 0)
601 {
602 cglo_flags_iteration |= CGLO_STOPCM;
603 cglo_flags_iteration &= ~CGLO_TEMPERATURE;
604 }
605 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
606 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr,
607 enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
608 state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
609 cglo_flags_iteration
610 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
611 : 0));
612 if (cglo_flags_iteration & CGLO_STOPCM)
613 {
614 /* At initialization, do not pass x with acceleration-correction mode
615 * to avoid (incorrect) correction of the initial coordinates.
616 */
617 auto x = (vcm.mode == ecmLINEAR_ACCELERATION_CORRECTION) ? ArrayRef<RVec>()
618 : makeArrayRef(state->x);
619 process_and_stopcm_grp(fplog, &vcm, *mdatoms, x, makeArrayRef(state->v));
620 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
621 }
622 }
623 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global, &top,
624 makeConstArrayRef(state->x), state->box,
625 &shouldCheckNumberOfBondedInteractions);
626 if (ir->eI == eiVVAK)
627 {
628 /* a second call to get the half step temperature initialized as well */
629 /* we do the same call as above, but turn the pressure off -- internally to
630 compute_globals, this is recognized as a velocity verlet half-step
631 kinetic energy calculation. This minimized excess variables, but
632 perhaps loses some logic?*/
633
634 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
635 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, nullptr,
636 enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
637 state->box, nullptr, &bSumEkinhOld, cglo_flags & ~CGLO_PRESSURE);
638 }
639
640 /* Calculate the initial half step temperature, and save the ekinh_old */
641 if (startingBehavior == StartingBehavior::NewSimulation)
642 {
643 for (i = 0; (i < ir->opts.ngtc); i++)
644 {
645 copy_mat(ekind->tcstat[i].ekinh, ekind->tcstat[i].ekinh_old);
646 }
647 }
648
649 /* need to make an initiation call to get the Trotter variables set, as well as other constants
650 for non-trotter temperature control */
651 auto trotter_seq = init_npt_vars(ir, state, &MassQ, bTrotter);
652
653 if (MASTER(cr))
654 {
655 if (!ir->bContinuation)
656 {
657 if (constr && ir->eConstrAlg == econtLINCS)
658 {
659 fprintf(fplog, "RMS relative constraint deviation after constraining: %.2e\n",
660 constr->rmsd());
661 }
662 if (EI_STATE_VELOCITY(ir->eI))
663 {
664 real temp = enerd->term[F_TEMP];
665 if (ir->eI != eiVV)
666 {
667 /* Result of Ekin averaged over velocities of -half
668 * and +half step, while we only have -half step here.
669 */
670 temp *= 2;
671 }
672 fprintf(fplog, "Initial temperature: %g K\n", temp);
673 }
674 }
675
676 char tbuf[20];
677 fprintf(stderr, "starting mdrun '%s'\n", *(top_global->name));
678 if (ir->nsteps >= 0)
679 {
680 sprintf(tbuf, "%8.1f", (ir->init_step + ir->nsteps) * ir->delta_t);
681 }
682 else
683 {
684 sprintf(tbuf, "%s", "infinite");
685 }
686 if (ir->init_step > 0)
687 {
688 fprintf(stderr, "%s steps, %s ps (continuing from step %s, %8.1f ps).\n",
689 gmx_step_str(ir->init_step + ir->nsteps, sbuf), tbuf,
690 gmx_step_str(ir->init_step, sbuf2), ir->init_step * ir->delta_t);
691 }
692 else
693 {
694 fprintf(stderr, "%s steps, %s ps.\n", gmx_step_str(ir->nsteps, sbuf), tbuf);
695 }
696 fprintf(fplog, "\n");
697 }
698
699 /* PLUMED */
700 if(plumedswitch){
701 /* detect plumed API version */
702 int pversion=0;
703 plumed_cmd(plumedmain,"getApiVersion",&pversion);
704 /* setting kbT is only implemented with api>1) */
705 real kbT=ir->opts.ref_t[0]*BOLTZ;
706 if(pversion>1) plumed_cmd(plumedmain,"setKbT",&kbT);
707 if(pversion>2){
708 int res=1;
709 if( (startingBehavior != StartingBehavior::NewSimulation) ) plumed_cmd(plumedmain,"setRestart",&res);
710 }
711
712 if(ms && ms->numSimulations_>1) {
713 if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mastersComm_);
714 if(PAR(cr)){
715 if(DOMAINDECOMP(cr)) {
716 plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
717 }else{
718 plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
719 }
720 }
721 plumed_cmd(plumedmain,"GREX init",nullptr);
722 }
723 if(PAR(cr)){
724 if(DOMAINDECOMP(cr)) {
725 plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
726 }
727 }
728 plumed_cmd(plumedmain,"setNatoms",&top_global->natoms);
729 plumed_cmd(plumedmain,"setMDEngine","gromacs");
730 plumed_cmd(plumedmain,"setLog",fplog);
731 real real_delta_t=ir->delta_t;
732 plumed_cmd(plumedmain,"setTimestep",&real_delta_t);
733 plumed_cmd(plumedmain,"init",nullptr);
734
735 if(PAR(cr)){
736 if(DOMAINDECOMP(cr)) {
737 int nat_home = dd_numHomeAtoms(*cr->dd);
738 plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
739 plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
740 }
741 }
742 realFepState = state->fep_state;
743 plumed_cmd(plumedmain, "setExtraCV lambda", &realFepState);
744 plumed_cmd(plumedmain, "setExtraCVForce lambda", &lambdaForce);
745 }
746 /* END PLUMED */
747
748 walltime_accounting_start_time(walltime_accounting);
749 wallcycle_start(wcycle, ewcRUN);
750 print_start(fplog, cr, walltime_accounting, "mdrun");
751
752 /***********************************************************
753 *
754 * Loop over MD steps
755 *
756 ************************************************************/
757
758 bFirstStep = TRUE;
759 /* Skip the first Nose-Hoover integration when we get the state from tpx */
760 bInitStep = startingBehavior == StartingBehavior::NewSimulation || EI_VV(ir->eI);
761 bSumEkinhOld = FALSE;
762 bExchanged = FALSE;
763 bNeedRepartition = FALSE;
764
765 step = ir->init_step;
766 step_rel = 0;
767
768 auto stopHandler = stopHandlerBuilder->getStopHandlerMD(
769 compat::not_null<SimulationSignal*>(&signals[eglsSTOPCOND]), simulationsShareState,
770 MASTER(cr), ir->nstlist, mdrunOptions.reproducible, nstSignalComm,
771 mdrunOptions.maximumHoursToRun, ir->nstlist == 0, fplog, step, bNS, walltime_accounting);
772
773 auto checkpointHandler = std::make_unique<CheckpointHandler>(
774 compat::make_not_null<SimulationSignal*>(&signals[eglsCHKPT]), simulationsShareState,
775 ir->nstlist == 0, MASTER(cr), mdrunOptions.writeConfout,
776 mdrunOptions.checkpointOptions.period);
777
778 const bool resetCountersIsLocal = true;
779 auto resetHandler = std::make_unique<ResetHandler>(
780 compat::make_not_null<SimulationSignal*>(&signals[eglsRESETCOUNTERS]),
781 !resetCountersIsLocal, ir->nsteps, MASTER(cr), mdrunOptions.timingOptions.resetHalfway,
782 mdrunOptions.maximumHoursToRun, mdlog, wcycle, walltime_accounting);
783
784 const DDBalanceRegionHandler ddBalanceRegionHandler(cr);
785
786 if (MASTER(cr) && isMultiSim(ms) && !useReplicaExchange)
787 {
788 logInitialMultisimStatus(ms, cr, mdlog, simulationsShareState, ir->nsteps, ir->init_step);
789 }
790
791 /* and stop now if we should */
792 bLastStep = (bLastStep || (ir->nsteps >= 0 && step_rel > ir->nsteps));
793 while (!bLastStep)
794 {
795
796 /* Determine if this is a neighbor search step */
797 bNStList = (ir->nstlist > 0 && step % ir->nstlist == 0);
798
799 if (bPMETune && bNStList)
800 {
801 // This has to be here because PME load balancing is called so early.
802 // TODO: Move to after all booleans are defined.
803 if (useGpuForUpdate && !bFirstStep)
804 {
805 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
806 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
807 }
808 /* PME grid + cut-off optimization with GPUs or PME nodes */
809 pme_loadbal_do(pme_loadbal, cr, (mdrunOptions.verbose && MASTER(cr)) ? stderr : nullptr,
810 fplog, mdlog, *ir, fr, state->box, state->x, wcycle, step, step_rel,
811 &bPMETunePrinting, simulationWork.useGpuPmePpCommunication);
812 }
813
814 wallcycle_start(wcycle, ewcSTEP);
815
816 bLastStep = (step_rel == ir->nsteps);
817 t = t0 + step * ir->delta_t;
818
819 // TODO Refactor this, so that nstfep does not need a default value of zero
820 if (ir->efep != efepNO || ir->bSimTemp)
821 {
822 /* find and set the current lambdas */
823 state->lambda = currentLambdas(step, *(ir->fepvals), state->fep_state);
824
825 bDoDHDL = do_per_step(step, ir->fepvals->nstdhdl);
826 bDoFEP = ((ir->efep != efepNO) && do_per_step(step, nstfep));
827 bDoExpanded = (do_per_step(step, ir->expandedvals->nstexpanded) && (ir->bExpanded)
828 && (!bFirstStep));
829 }
830
831 bDoReplEx = (useReplicaExchange && (step > 0) && !bLastStep
832 && do_per_step(step, replExParams.exchangeInterval));
833
834 if (doSimulatedAnnealing)
835 {
836 update_annealing_target_temp(ir, t, &upd);
837 }
838
839 /* Stop Center of Mass motion */
840 bStopCM = (ir->comm_mode != ecmNO && do_per_step(step, ir->nstcomm));
841
842 /* Determine whether or not to do Neighbour Searching */
843 bNS = (bFirstStep || bNStList || bExchanged || bNeedRepartition);
844
845 /* Note that the stopHandler will cause termination at nstglobalcomm
846 * steps. Since this concides with nstcalcenergy, nsttcouple and/or
847 * nstpcouple steps, we have computed the half-step kinetic energy
848 * of the previous step and can always output energies at the last step.
849 */
850 bLastStep = bLastStep || stopHandler->stoppingAfterCurrentStep(bNS);
851
852 /* do_log triggers energy and virial calculation. Because this leads
853 * to different code paths, forces can be different. Thus for exact
854 * continuation we should avoid extra log output.
855 * Note that the || bLastStep can result in non-exact continuation
856 * beyond the last step. But we don't consider that to be an issue.
857 */
858 do_log = (do_per_step(step, ir->nstlog)
859 || (bFirstStep && startingBehavior == StartingBehavior::NewSimulation) || bLastStep);
860 do_verbose = mdrunOptions.verbose
861 && (step % mdrunOptions.verboseStepPrintInterval == 0 || bFirstStep || bLastStep);
862
863 if (useGpuForUpdate && !bFirstStep && bNS)
864 {
865 // Copy velocities from the GPU on search steps to keep a copy on host (device buffers are reinitialized).
866 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
867 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
868 // Copy coordinate from the GPU when needed at the search step.
869 // NOTE: The cases when coordinates needed on CPU for force evaluation are handled in sim_utils.
870 // NOTE: If the coordinates are to be written into output file they are also copied separately before the output.
871 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
872 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
873 }
874
875 if (bNS && !(bFirstStep && ir->bContinuation))
876 {
877 bMasterState = FALSE;
878 /* Correct the new box if it is too skewed */
879 if (inputrecDynamicBox(ir))
880 {
881 if (correct_box(fplog, step, state->box))
882 {
883 bMasterState = TRUE;
884 // If update is offloaded, it should be informed about the box size change
885 if (useGpuForUpdate)
886 {
887 integrator->setPbc(PbcType::Xyz, state->box);
888 }
889 }
890 }
891 if (DOMAINDECOMP(cr) && bMasterState)
892 {
893 dd_collect_state(cr->dd, state, state_global);
894 }
895
896 if (DOMAINDECOMP(cr))
897 {
898 /* Repartition the domain decomposition */
899 dd_partition_system(fplog, mdlog, step, cr, bMasterState, nstglobalcomm, state_global,
900 *top_global, ir, imdSession, pull_work, state, &f, mdAtoms, &top,
901 fr, vsite, constr, nrnb, wcycle, do_verbose && !bPMETunePrinting);
902 shouldCheckNumberOfBondedInteractions = true;
903 upd.setNumAtoms(state->natoms);
904
905 /* PLUMED */
906 if(plumedswitch){
907 int nat_home = dd_numHomeAtoms(*cr->dd);
908 plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
909 plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
910 }
911 /* END PLUMED */
912 }
913 }
914
915 // Allocate or re-size GPU halo exchange object, if necessary
916 if (bNS && havePPDomainDecomposition(cr) && simulationWork.useGpuHaloExchange)
917 {
918 GMX_RELEASE_ASSERT(fr->deviceStreamManager != nullptr,
919 "GPU device manager has to be initialized to use GPU "
920 "version of halo exchange.");
921 constructGpuHaloExchange(mdlog, *cr, *fr->deviceStreamManager, wcycle);
922 }
923
924 if (MASTER(cr) && do_log)
925 {
926 gmx::EnergyOutput::printHeader(fplog, step,
927 t); /* can we improve the information printed here? */
928 }
929
930 if (ir->efep != efepNO)
931 {
932 update_mdatoms(mdatoms, state->lambda[efptMASS]);
933 }
934
935 if (bExchanged)
936 {
937
938 /* We need the kinetic energy at minus the half step for determining
939 * the full step kinetic energy and possibly for T-coupling.*/
940 /* This may not be quite working correctly yet . . . . */
941 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
942 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
943 enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller,
944 state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
945 CGLO_GSTAT | CGLO_TEMPERATURE | CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS);
946 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions, top_global,
947 &top, makeConstArrayRef(state->x), state->box,
948 &shouldCheckNumberOfBondedInteractions);
949 }
950 clear_mat(force_vir);
951
952 /* PLUMED HREX */
953 gmx_bool bHREX = bDoReplEx && plumed_hrex;
954
955 if (plumedswitch && bHREX) {
956 // gmx_enerdata_t *hrex_enerd;
957 int nlambda = enerd->foreignLambdaTerms.numLambdas();
958 gmx_enerdata_t hrex_enerd(enerd->grpp.nener, nlambda == 0 ? 0 : nlambda - 1);
959 int repl = -1;
960 int nrepl = -1;
961 if (MASTER(cr)){
962 repl = replica_exchange_get_repl(repl_ex);
963 nrepl = replica_exchange_get_nrepl(repl_ex);
964 }
965
966 if (DOMAINDECOMP(cr)) {
967 dd_collect_state(cr->dd,state,state_global);
968 } else {
969 copy_state_serial(state, state_global);
970 }
971
972 if(MASTER(cr)){
973 if(repl%2==step/replExParams.exchangeInterval%2){
974 if(repl-1>=0) exchange_state(ms,repl-1,state_global);
975 }else{
976 if(repl+1<nrepl) exchange_state(ms,repl+1,state_global);
977 }
978 }
979 if (!DOMAINDECOMP(cr)) {
980 copy_state_serial(state_global, state);
981 }
982 if(PAR(cr)){
983 if (DOMAINDECOMP(cr)) {
984 dd_partition_system(fplog,mdlog,step,cr,TRUE,1,
985 state_global,*top_global,ir,
986 imdSession, pull_work,
987 state,&f,mdAtoms,&top,fr,vsite,constr,
988 nrnb,wcycle,FALSE);
989 }
990 }
991 do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
992 nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
993 &f.view(), force_vir, mdatoms, &hrex_enerd, state->lambda,
994 fr, runScheduleWork, vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
995 GMX_FORCE_STATECHANGED |
996 GMX_FORCE_DYNAMICBOX |
997 GMX_FORCE_ALLFORCES |
998 GMX_FORCE_VIRIAL |
999 GMX_FORCE_ENERGY |
1000 GMX_FORCE_DHDL |
1001 GMX_FORCE_NS,
1002 ddBalanceRegionHandler);
1003
1004 plumed_cmd(plumedmain,"GREX cacheLocalUSwap",&(&hrex_enerd)->term[F_EPOT]);
1005
1006 /* exchange back */
1007 if (DOMAINDECOMP(cr)) {
1008 dd_collect_state(cr->dd,state,state_global);
1009 } else {
1010 copy_state_serial(state, state_global);
1011 }
1012
1013 if(MASTER(cr)){
1014 if(repl%2==step/replExParams.exchangeInterval%2){
1015 if(repl-1>=0) exchange_state(ms,repl-1,state_global);
1016 }else{
1017 if(repl+1<nrepl) exchange_state(ms,repl+1,state_global);
1018 }
1019 }
1020
1021 if (!DOMAINDECOMP(cr)) {
1022 copy_state_serial(state_global, state);
1023 }
1024 if(PAR(cr)){
1025 if (DOMAINDECOMP(cr)) {
1026 dd_partition_system(fplog,mdlog,step,cr,TRUE,1,
1027 state_global,*top_global,ir,
1028 imdSession, pull_work,
1029 state,&f,mdAtoms,&top,fr,vsite,constr,
1030 nrnb,wcycle,FALSE);
1031 int nat_home = dd_numHomeAtoms(*cr->dd);
1032 plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
1033 plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
1034 }
1035 }
1036 bNS=true;
1037 }
1038 /* END PLUMED HREX */
1039
1040 checkpointHandler->decideIfCheckpointingThisStep(bNS, bFirstStep, bLastStep);
1041
1042 /* Determine the energy and pressure:
1043 * at nstcalcenergy steps and at energy output steps (set below).
1044 */
1045 if (EI_VV(ir->eI) && (!bInitStep))
1046 {
1047 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1048 bCalcVir = bCalcEnerStep
1049 || (ir->epc != epcNO
1050 && (do_per_step(step, ir->nstpcouple) || do_per_step(step - 1, ir->nstpcouple)));
1051 }
1052 else
1053 {
1054 bCalcEnerStep = do_per_step(step, ir->nstcalcenergy);
1055 bCalcVir = bCalcEnerStep || (ir->epc != epcNO && do_per_step(step, ir->nstpcouple));
1056 }
1057 bCalcEner = bCalcEnerStep;
1058
1059 do_ene = (do_per_step(step, ir->nstenergy) || bLastStep);
1060
1061 if (do_ene || do_log || bDoReplEx)
1062 {
1063 bCalcVir = TRUE;
1064 bCalcEner = TRUE;
1065 }
1066
1067 /* Do we need global communication ? */
1068 bGStat = (bCalcVir || bCalcEner || bStopCM || do_per_step(step, nstglobalcomm)
1069 || (EI_VV(ir->eI) && inputrecNvtTrotter(ir) && do_per_step(step - 1, nstglobalcomm)));
1070
1071 force_flags = (GMX_FORCE_STATECHANGED | ((inputrecDynamicBox(ir)) ? GMX_FORCE_DYNAMICBOX : 0)
1072 | GMX_FORCE_ALLFORCES | (bCalcVir ? GMX_FORCE_VIRIAL : 0)
1073 | (bCalcEner ? GMX_FORCE_ENERGY : 0) | (bDoFEP ? GMX_FORCE_DHDL : 0));
1074 if (fr->useMts && !do_per_step(step, ir->nstfout))
1075 {
1076 force_flags |= GMX_FORCE_DO_NOT_NEED_NORMAL_FORCE;
1077 }
1078
1079 if (shellfc)
1080 {
1081 /* Now is the time to relax the shells */
1082 relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, enforcedRotation, step, ir,
1083 imdSession, pull_work, bNS, force_flags, &top, constr, enerd,
1084 state->natoms, state->x.arrayRefWithPadding(),
1085 state->v.arrayRefWithPadding(), state->box, state->lambda,
1086 &state->hist, &f.view(), force_vir, mdatoms, nrnb, wcycle, shellfc,
1087 fr, runScheduleWork, t, mu_tot, vsite, ddBalanceRegionHandler);
1088 }
1089 else
1090 {
1091 /* The AWH history need to be saved _before_ doing force calculations where the AWH bias
1092 is updated (or the AWH update will be performed twice for one step when continuing).
1093 It would be best to call this update function from do_md_trajectory_writing but that
1094 would occur after do_force. One would have to divide the update_awh function into one
1095 function applying the AWH force and one doing the AWH bias update. The update AWH
1096 bias function could then be called after do_md_trajectory_writing (then containing
1097 update_awh_history). The checkpointing will in the future probably moved to the start
1098 of the md loop which will rid of this issue. */
1099 if (awh && checkpointHandler->isCheckpointingStep() && MASTER(cr))
1100 {
1101 awh->updateHistory(state_global->awhHistory.get());
1102 }
1103
1104 /* The coordinates (x) are shifted (to get whole molecules)
1105 * in do_force.
1106 * This is parallellized as well, and does communication too.
1107 * Check comments in sim_util.c
1108 */
1109
1110 /* PLUMED */
1111 plumedNeedsEnergy=0;
1112 if(plumedswitch){
1113 int pversion=0;
1114 plumed_cmd(plumedmain,"getApiVersion",&pversion);
1115 long int lstep=step; plumed_cmd(plumedmain,"setStepLong",&lstep);
1116 plumed_cmd(plumedmain,"setPositions",&state->x[0][0]);
1117 plumed_cmd(plumedmain,"setMasses",&mdatoms->massT[0]);
1118 plumed_cmd(plumedmain,"setCharges",&mdatoms->chargeA[0]);
1119 plumed_cmd(plumedmain,"setBox",&state->box[0][0]);
1120 plumed_cmd(plumedmain,"prepareCalc",nullptr);
1121 plumed_cmd(plumedmain,"setStopFlag",&plumedWantsToStop);
1122 int checkp=0; if(checkpointHandler->isCheckpointingStep()) checkp=1;
1123 if(pversion>3) plumed_cmd(plumedmain,"doCheckPoint",&checkp);
1124 plumed_cmd(plumedmain,"setForces",&f.view().force()[0][0]);
1125 plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
1126 if(plumedNeedsEnergy) force_flags |= GMX_FORCE_ENERGY | GMX_FORCE_VIRIAL;
1127 clear_mat(plumed_vir);
1128 plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]);
1129 }
1130 /* END PLUMED */
1131 do_force(fplog, cr, ms, ir, awh.get(), enforcedRotation, imdSession, pull_work, step,
1132 nrnb, wcycle, &top, state->box, state->x.arrayRefWithPadding(), &state->hist,
1133 &f.view(), force_vir, mdatoms, enerd, state->lambda, fr, runScheduleWork,
1134 vsite, mu_tot, t, ed ? ed->getLegacyED() : nullptr,
1135 (bNS ? GMX_FORCE_NS : 0) | force_flags, ddBalanceRegionHandler);
1136 /* PLUMED */
1137 if(plumedswitch){
1138 if(plumedNeedsEnergy){
1139 msmul(force_vir,2.0,plumed_vir);
1140 plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]);
1141 plumed_cmd(plumedmain,"performCalc",nullptr);
1142 msmul(plumed_vir,0.5,force_vir);
1143 } else {
1144 msmul(plumed_vir,0.5,plumed_vir);
1145 m_add(force_vir,plumed_vir,force_vir);
1146 }
1147 if(bDoReplEx) plumed_cmd(plumedmain,"GREX savePositions",nullptr);
1148 if(plumedWantsToStop) ir->nsteps=step_rel+1;
1149 if(bHREX) plumed_cmd(plumedmain,"GREX cacheLocalUNow",&enerd->term[F_EPOT]);
1150 }
1151 /* END PLUMED */
1152 }
1153
1154 // VV integrators do not need the following velocity half step
1155 // if it is the first step after starting from a checkpoint.
1156 // That is, the half step is needed on all other steps, and
1157 // also the first step when starting from a .tpr file.
1158 if (EI_VV(ir->eI) && (!bFirstStep || startingBehavior == StartingBehavior::NewSimulation))
1159 /* ############### START FIRST UPDATE HALF-STEP FOR VV METHODS############### */
1160 {
1161 rvec* vbuf = nullptr;
1162
1163 wallcycle_start(wcycle, ewcUPDATE);
1164 if (ir->eI == eiVV && bInitStep)
1165 {
1166 /* if using velocity verlet with full time step Ekin,
1167 * take the first half step only to compute the
1168 * virial for the first step. From there,
1169 * revert back to the initial coordinates
1170 * so that the input is actually the initial step.
1171 */
1172 snew(vbuf, state->natoms);
1173 copy_rvecn(state->v.rvec_array(), vbuf, 0,
1174 state->natoms); /* should make this better for parallelizing? */
1175 }
1176 else
1177 {
1178 /* this is for NHC in the Ekin(t+dt/2) version of vv */
1179 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
1180 trotter_seq, ettTSEQ1);
1181 }
1182
1183 upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind,
1184 M, etrtVELOCITY1, cr, constr != nullptr);
1185
1186 wallcycle_stop(wcycle, ewcUPDATE);
1187 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, bCalcVir, shake_vir);
1188 wallcycle_start(wcycle, ewcUPDATE);
1189 /* if VV, compute the pressure and constraints */
1190 /* For VV2, we strictly only need this if using pressure
1191 * control, but we really would like to have accurate pressures
1192 * printed out.
1193 * Think about ways around this in the future?
1194 * For now, keep this choice in comments.
1195 */
1196 /*bPres = (ir->eI==eiVV || inputrecNptTrotter(ir)); */
1197 /*bTemp = ((ir->eI==eiVV &&(!bInitStep)) || (ir->eI==eiVVAK && inputrecNptTrotter(ir)));*/
1198 bPres = TRUE;
1199 bTemp = ((ir->eI == eiVV && (!bInitStep)) || (ir->eI == eiVVAK));
1200 if (bCalcEner && ir->eI == eiVVAK)
1201 {
1202 bSumEkinhOld = TRUE;
1203 }
1204 /* for vv, the first half of the integration actually corresponds to the previous step.
1205 So we need information from the last step in the first half of the integration */
1206 if (bGStat || do_per_step(step - 1, nstglobalcomm))
1207 {
1208 wallcycle_stop(wcycle, ewcUPDATE);
1209 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1210 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
1211 enerd, force_vir, shake_vir, total_vir, pres, constr, &nullSignaller,
1212 state->box, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1213 (bGStat ? CGLO_GSTAT : 0) | (bCalcEner ? CGLO_ENERGY : 0)
1214 | (bTemp ? CGLO_TEMPERATURE : 0) | (bPres ? CGLO_PRESSURE : 0)
1215 | (bPres ? CGLO_CONSTRAINT : 0) | (bStopCM ? CGLO_STOPCM : 0)
1216 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1217 : 0)
1218 | CGLO_SCALEEKIN);
1219 /* explanation of above:
1220 a) We compute Ekin at the full time step
1221 if 1) we are using the AveVel Ekin, and it's not the
1222 initial step, or 2) if we are using AveEkin, but need the full
1223 time step kinetic energy for the pressure (always true now, since we want accurate statistics).
1224 b) If we are using EkinAveEkin for the kinetic energy for the temperature control, we still feed in
1225 EkinAveVel because it's needed for the pressure */
1226 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1227 top_global, &top, makeConstArrayRef(state->x),
1228 state->box, &shouldCheckNumberOfBondedInteractions);
1229 if (bStopCM)
1230 {
1231 process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1232 makeArrayRef(state->v));
1233 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1234 }
1235 wallcycle_start(wcycle, ewcUPDATE);
1236 }
1237 /* temperature scaling and pressure scaling to produce the extended variables at t+dt */
1238 if (!bInitStep)
1239 {
1240 if (bTrotter)
1241 {
1242 m_add(force_vir, shake_vir,
1243 total_vir); /* we need the un-dispersion corrected total vir here */
1244 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ,
1245 trotter_seq, ettTSEQ2);
1246
1247 /* TODO This is only needed when we're about to write
1248 * a checkpoint, because we use it after the restart
1249 * (in a kludge?). But what should we be doing if
1250 * the startingBehavior is NewSimulation or bInitStep are true? */
1251 if (inputrecNptTrotter(ir) || inputrecNphTrotter(ir))
1252 {
1253 copy_mat(shake_vir, state->svir_prev);
1254 copy_mat(force_vir, state->fvir_prev);
1255 }
1256 if ((inputrecNptTrotter(ir) || inputrecNvtTrotter(ir)) && ir->eI == eiVV)
1257 {
1258 /* update temperature and kinetic energy now that step is over - this is the v(t+dt) point */
1259 enerd->term[F_TEMP] =
1260 sum_ekin(&(ir->opts), ekind, nullptr, (ir->eI == eiVV), FALSE);
1261 enerd->term[F_EKIN] = trace(ekind->ekin);
1262 }
1263 }
1264 else if (bExchanged)
1265 {
1266 wallcycle_stop(wcycle, ewcUPDATE);
1267 /* We need the kinetic energy at minus the half step for determining
1268 * the full step kinetic energy and possibly for T-coupling.*/
1269 /* This may not be quite working correctly yet . . . . */
1270 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1271 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle,
1272 enerd, nullptr, nullptr, nullptr, nullptr, constr, &nullSignaller,
1273 state->box, nullptr, &bSumEkinhOld, CGLO_GSTAT | CGLO_TEMPERATURE);
1274 wallcycle_start(wcycle, ewcUPDATE);
1275 }
1276 }
1277 /* if it's the initial step, we performed this first step just to get the constraint virial */
1278 if (ir->eI == eiVV && bInitStep)
1279 {
1280 copy_rvecn(vbuf, state->v.rvec_array(), 0, state->natoms);
1281 sfree(vbuf);
1282 }
1283 wallcycle_stop(wcycle, ewcUPDATE);
1284 }
1285
1286 /* compute the conserved quantity */
1287 if (EI_VV(ir->eI))
1288 {
1289 saved_conserved_quantity = NPT_energy(ir, state, &MassQ);
1290 if (ir->eI == eiVV)
1291 {
1292 last_ekin = enerd->term[F_EKIN];
1293 }
1294 if ((ir->eDispCorr != edispcEnerPres) && (ir->eDispCorr != edispcAllEnerPres))
1295 {
1296 saved_conserved_quantity -= enerd->term[F_DISPCORR];
1297 }
1298 /* sum up the foreign kinetic energy and dK/dl terms for vv. currently done every step so that dhdl is correct in the .edr */
1299 if (ir->efep != efepNO)
1300 {
1301 accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1302 }
1303 }
1304
1305 /* ######## END FIRST UPDATE STEP ############## */
1306 /* ######## If doing VV, we now have v(dt) ###### */
1307 if (bDoExpanded)
1308 {
1309 /* perform extended ensemble sampling in lambda - we don't
1310 actually move to the new state before outputting
1311 statistics, but if performing simulated tempering, we
1312 do update the velocities and the tau_t. */
1313
1314 lamnew = ExpandedEnsembleDynamics(fplog, ir, enerd, state, &MassQ, state->fep_state,
1315 state->dfhist, step, state->v.rvec_array(), mdatoms, &realFepState);
1316 /* history is maintained in state->dfhist, but state_global is what is sent to trajectory and log output */
1317 if (MASTER(cr))
1318 {
1319 copy_df_history(state_global->dfhist, state->dfhist);
1320 }
1321 }
1322
1323 // Copy coordinate from the GPU for the output/checkpointing if the update is offloaded and
1324 // coordinates have not already been copied for i) search or ii) CPU force tasks.
1325 if (useGpuForUpdate && !bNS && !runScheduleWork->domainWork.haveCpuLocalForceWork
1326 && (do_per_step(step, ir->nstxout) || do_per_step(step, ir->nstxout_compressed)
1327 || checkpointHandler->isCheckpointingStep()))
1328 {
1329 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1330 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1331 }
1332 // Copy velocities if needed for the output/checkpointing.
1333 // NOTE: Copy on the search steps is done at the beginning of the step.
1334 if (useGpuForUpdate && !bNS
1335 && (do_per_step(step, ir->nstvout) || checkpointHandler->isCheckpointingStep()))
1336 {
1337 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1338 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1339 }
1340 // Copy forces for the output if the forces were reduced on the GPU (not the case on virial steps)
1341 // and update is offloaded hence forces are kept on the GPU for update and have not been
1342 // already transferred in do_force().
1343 // TODO: There should be an improved, explicit mechanism that ensures this copy is only executed
1344 // when the forces are ready on the GPU -- the same synchronizer should be used as the one
1345 // prior to GPU update.
1346 // TODO: When the output flags will be included in step workload, this copy can be combined with the
1347 // copy call in do_force(...).
1348 // NOTE: The forces should not be copied here if the vsites are present, since they were modified
1349 // on host after the D2H copy in do_force(...).
1350 if (runScheduleWork->stepWork.useGpuFBufferOps && (simulationWork.useGpuUpdate && !vsite)
1351 && do_per_step(step, ir->nstfout))
1352 {
1353 stateGpu->copyForcesFromGpu(f.view().force(), AtomLocality::Local);
1354 stateGpu->waitForcesReadyOnHost(AtomLocality::Local);
1355 }
1356 /* Now we have the energies and forces corresponding to the
1357 * coordinates at time t. We must output all of this before
1358 * the update.
1359 */
1360 do_md_trajectory_writing(fplog, cr, nfile, fnm, step, step_rel, t, ir, state, state_global,
1361 observablesHistory, top_global, fr, outf, energyOutput, ekind,
1362 f.view().force(), checkpointHandler->isCheckpointingStep(),
1363 bRerunMD, bLastStep, mdrunOptions.writeConfout, bSumEkinhOld);
1364 /* Check if IMD step and do IMD communication, if bIMD is TRUE. */
1365 bInteractiveMDstep = imdSession->run(step, bNS, state->box, state->x.rvec_array(), t);
1366
1367 /* kludge -- virial is lost with restart for MTTK NPT control. Must reload (saved earlier). */
1368 if (startingBehavior != StartingBehavior::NewSimulation && bFirstStep
1369 && (inputrecNptTrotter(ir) || inputrecNphTrotter(ir)))
1370 {
1371 copy_mat(state->svir_prev, shake_vir);
1372 copy_mat(state->fvir_prev, force_vir);
1373 }
1374
1375 stopHandler->setSignal();
1376 resetHandler->setSignal(walltime_accounting);
1377
1378 if (bGStat || !PAR(cr))
1379 {
1380 /* In parallel we only have to check for checkpointing in steps
1381 * where we do global communication,
1382 * otherwise the other nodes don't know.
1383 */
1384 checkpointHandler->setSignal(walltime_accounting);
1385 }
1386
1387 /* ######### START SECOND UPDATE STEP ################# */
1388
1389 /* at the start of step, randomize or scale the velocities ((if vv. Restriction of Andersen
1390 controlled in preprocessing */
1391
1392 if (ETC_ANDERSEN(ir->etc)) /* keep this outside of update_tcouple because of the extra info required to pass */
1393 {
1394 gmx_bool bIfRandomize;
1395 bIfRandomize = update_randomize_velocities(ir, step, cr, mdatoms, state->v, &upd, constr);
1396 /* if we have constraints, we have to remove the kinetic energy parallel to the bonds */
1397 if (constr && bIfRandomize)
1398 {
1399 constrain_velocities(constr, do_log, do_ene, step, state, nullptr, false, nullptr);
1400 }
1401 }
1402 /* Box is changed in update() when we do pressure coupling,
1403 * but we should still use the old box for energy corrections and when
1404 * writing it to the energy file, so it matches the trajectory files for
1405 * the same timestep above. Make a copy in a separate array.
1406 */
1407 copy_mat(state->box, lastbox);
1408
1409 dvdl_constr = 0;
1410
1411 if (!useGpuForUpdate)
1412 {
1413 wallcycle_start(wcycle, ewcUPDATE);
1414 }
1415 /* UPDATE PRESSURE VARIABLES IN TROTTER FORMULATION WITH CONSTRAINTS */
1416 if (bTrotter)
1417 {
1418 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ3);
1419 /* We can only do Berendsen coupling after we have summed
1420 * the kinetic energy or virial. Since the happens
1421 * in global_state after update, we should only do it at
1422 * step % nstlist = 1 with bGStatEveryStep=FALSE.
1423 */
1424 }
1425 else
1426 {
1427 update_tcouple(step, ir, state, ekind, &MassQ, mdatoms);
1428 update_pcouple_before_coordinates(fplog, step, ir, state, pressureCouplingMu, M, bInitStep);
1429 }
1430
1431 if (EI_VV(ir->eI))
1432 {
1433 /* velocity half-step update */
1434 upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind,
1435 M, etrtVELOCITY2, cr, constr != nullptr);
1436 }
1437
1438 /* Above, initialize just copies ekinh into ekin,
1439 * it doesn't copy position (for VV),
1440 * and entire integrator for MD.
1441 */
1442
1443 if (ir->eI == eiVVAK)
1444 {
1445 cbuf.resize(state->x.size());
1446 std::copy(state->x.begin(), state->x.end(), cbuf.begin());
1447 }
1448
1449 /* With leap-frog type integrators we compute the kinetic energy
1450 * at a whole time step as the average of the half-time step kinetic
1451 * energies of two subsequent steps. Therefore we need to compute the
1452 * half step kinetic energy also if we need energies at the next step.
1453 */
1454 const bool needHalfStepKineticEnergy =
1455 (!EI_VV(ir->eI) && (do_per_step(step + 1, nstglobalcomm) || step_rel + 1 == ir->nsteps));
1456
1457 // Parrinello-Rahman requires the pressure to be availible before the update to compute
1458 // the velocity scaling matrix. Hence, it runs one step after the nstpcouple step.
1459 const bool doParrinelloRahman = (ir->epc == epcPARRINELLORAHMAN
1460 && do_per_step(step + ir->nstpcouple - 1, ir->nstpcouple));
1461
1462 if (useGpuForUpdate)
1463 {
1464 if (bNS && (bFirstStep || DOMAINDECOMP(cr)))
1465 {
1466 integrator->set(stateGpu->getCoordinates(), stateGpu->getVelocities(),
1467 stateGpu->getForces(), top.idef, *mdatoms, ekind->ngtc);
1468
1469 // Copy data to the GPU after buffers might have being reinitialized
1470 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1471 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1472 }
1473
1474 if (simulationWork.useGpuPme && !runScheduleWork->simulationWork.useGpuPmePpCommunication
1475 && !thisRankHasDuty(cr, DUTY_PME))
1476 {
1477 // The PME forces were recieved to the host, so have to be copied
1478 stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::All);
1479 }
1480 else if (!runScheduleWork->stepWork.useGpuFBufferOps)
1481 {
1482 // The buffer ops were not offloaded this step, so the forces are on the
1483 // host and have to be copied
1484 stateGpu->copyForcesToGpu(f.view().force(), AtomLocality::Local);
1485 }
1486
1487 const bool doTemperatureScaling =
1488 (ir->etc != etcNO && do_per_step(step + ir->nsttcouple - 1, ir->nsttcouple));
1489
1490 // This applies Leap-Frog, LINCS and SETTLE in succession
1491 integrator->integrate(stateGpu->getForcesReadyOnDeviceEvent(
1492 AtomLocality::Local, runScheduleWork->stepWork.useGpuFBufferOps),
1493 ir->delta_t, true, bCalcVir, shake_vir, doTemperatureScaling,
1494 ekind->tcstat, doParrinelloRahman, ir->nstpcouple * ir->delta_t, M);
1495
1496 // Copy velocities D2H after update if:
1497 // - Globals are computed this step (includes the energy output steps).
1498 // - Temperature is needed for the next step.
1499 if (bGStat || needHalfStepKineticEnergy)
1500 {
1501 stateGpu->copyVelocitiesFromGpu(state->v, AtomLocality::Local);
1502 stateGpu->waitVelocitiesReadyOnHost(AtomLocality::Local);
1503 }
1504 }
1505 else
1506 {
1507 /* With multiple time stepping we need to do an additional normal
1508 * update step to obtain the virial, as the actual MTS integration
1509 * using an acceleration where the slow forces are multiplied by mtsFactor.
1510 * Using that acceleration would result in a virial with the slow
1511 * force contribution would be a factor mtsFactor too large.
1512 */
1513 if (fr->useMts && bCalcVir && constr != nullptr)
1514 {
1515 upd.update_for_constraint_virial(*ir, *mdatoms, *state, f.view().forceWithPadding(), *ekind);
1516
1517 constrain_coordinates(constr, do_log, do_ene, step, state,
1518 upd.xp()->arrayRefWithPadding(), &dvdl_constr, bCalcVir, shake_vir);
1519 }
1520
1521 ArrayRefWithPadding<const RVec> forceCombined =
1522 (fr->useMts && step % ir->mtsLevels[1].stepFactor == 0)
1523 ? f.view().forceMtsCombinedWithPadding()
1524 : f.view().forceWithPadding();
1525 upd.update_coords(*ir, step, mdatoms, state, forceCombined, fcdata, ekind, M,
1526 etrtPOSITION, cr, constr != nullptr);
1527
1528 wallcycle_stop(wcycle, ewcUPDATE);
1529
1530 constrain_coordinates(constr, do_log, do_ene, step, state, upd.xp()->arrayRefWithPadding(),
1531 &dvdl_constr, bCalcVir && !fr->useMts, shake_vir);
1532
1533 upd.update_sd_second_half(*ir, step, &dvdl_constr, mdatoms, state, cr, nrnb, wcycle,
1534 constr, do_log, do_ene);
1535 upd.finish_update(*ir, mdatoms, state, wcycle, constr != nullptr);
1536 }
1537
1538 if (ir->bPull && ir->pull->bSetPbcRefToPrevStepCOM)
1539 {
1540 updatePrevStepPullCom(pull_work, state);
1541 }
1542
1543 if (ir->eI == eiVVAK)
1544 {
1545 /* erase F_EKIN and F_TEMP here? */
1546 /* just compute the kinetic energy at the half step to perform a trotter step */
1547 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1548 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm, wcycle, enerd,
1549 force_vir, shake_vir, total_vir, pres, constr, &nullSignaller, lastbox,
1550 nullptr, &bSumEkinhOld, (bGStat ? CGLO_GSTAT : 0) | CGLO_TEMPERATURE);
1551 wallcycle_start(wcycle, ewcUPDATE);
1552 trotter_update(ir, step, ekind, enerd, state, total_vir, mdatoms, &MassQ, trotter_seq, ettTSEQ4);
1553 /* now we know the scaling, we can compute the positions again */
1554 std::copy(cbuf.begin(), cbuf.end(), state->x.begin());
1555
1556 upd.update_coords(*ir, step, mdatoms, state, f.view().forceWithPadding(), fcdata, ekind,
1557 M, etrtPOSITION, cr, constr != nullptr);
1558 wallcycle_stop(wcycle, ewcUPDATE);
1559
1560 /* do we need an extra constraint here? just need to copy out of as_rvec_array(state->v.data()) to upd->xp? */
1561 /* are the small terms in the shake_vir here due
1562 * to numerical errors, or are they important
1563 * physically? I'm thinking they are just errors, but not completely sure.
1564 * For now, will call without actually constraining, constr=nullptr*/
1565 upd.finish_update(*ir, mdatoms, state, wcycle, false);
1566 }
1567 if (EI_VV(ir->eI))
1568 {
1569 /* this factor or 2 correction is necessary
1570 because half of the constraint force is removed
1571 in the vv step, so we have to double it. See
1572 the Issue #1255. It is not yet clear
1573 if the factor of 2 is exact, or just a very
1574 good approximation, and this will be
1575 investigated. The next step is to see if this
1576 can be done adding a dhdl contribution from the
1577 rattle step, but this is somewhat more
1578 complicated with the current code. Will be
1579 investigated, hopefully for 4.6.3. However,
1580 this current solution is much better than
1581 having it completely wrong.
1582 */
1583 enerd->term[F_DVDL_CONSTR] += 2 * dvdl_constr;
1584 }
1585 else
1586 {
1587 enerd->term[F_DVDL_CONSTR] += dvdl_constr;
1588 }
1589
1590 if (vsite != nullptr)
1591 {
1592 wallcycle_start(wcycle, ewcVSITECONSTR);
1593 vsite->construct(state->x, ir->delta_t, state->v, state->box);
1594 wallcycle_stop(wcycle, ewcVSITECONSTR);
1595 }
1596
1597 /* ############## IF NOT VV, Calculate globals HERE ############ */
1598 /* With Leap-Frog we can skip compute_globals at
1599 * non-communication steps, but we need to calculate
1600 * the kinetic energy one step before communication.
1601 */
1602 {
1603 // Organize to do inter-simulation signalling on steps if
1604 // and when algorithms require it.
1605 const bool doInterSimSignal = (simulationsShareState && do_per_step(step, nstSignalComm));
1606
1607 if (bGStat || needHalfStepKineticEnergy || doInterSimSignal)
1608 {
1609 // Copy coordinates when needed to stop the CM motion.
1610 if (useGpuForUpdate && !EI_VV(ir->eI) && bStopCM)
1611 {
1612 stateGpu->copyCoordinatesFromGpu(state->x, AtomLocality::Local);
1613 stateGpu->waitCoordinatesReadyOnHost(AtomLocality::Local);
1614 }
1615 // Since we're already communicating at this step, we
1616 // can propagate intra-simulation signals. Note that
1617 // check_nstglobalcomm has the responsibility for
1618 // choosing the value of nstglobalcomm that is one way
1619 // bGStat becomes true, so we can't get into a
1620 // situation where e.g. checkpointing can't be
1621 // signalled.
1622 bool doIntraSimSignal = true;
1623 SimulationSignaller signaller(&signals, cr, ms, doInterSimSignal, doIntraSimSignal);
1624
1625 compute_globals(gstat, cr, ir, fr, ekind, makeConstArrayRef(state->x),
1626 makeConstArrayRef(state->v), state->box, mdatoms, nrnb, &vcm,
1627 wcycle, enerd, force_vir, shake_vir, total_vir, pres, constr,
1628 &signaller, lastbox, &totalNumberOfBondedInteractions, &bSumEkinhOld,
1629 (bGStat ? CGLO_GSTAT : 0) | (!EI_VV(ir->eI) && bCalcEner ? CGLO_ENERGY : 0)
1630 | (!EI_VV(ir->eI) && bStopCM ? CGLO_STOPCM : 0)
1631 | (!EI_VV(ir->eI) ? CGLO_TEMPERATURE : 0)
1632 | (!EI_VV(ir->eI) ? CGLO_PRESSURE : 0) | CGLO_CONSTRAINT
1633 | (shouldCheckNumberOfBondedInteractions ? CGLO_CHECK_NUMBER_OF_BONDED_INTERACTIONS
1634 : 0));
1635 checkNumberOfBondedInteractions(mdlog, cr, totalNumberOfBondedInteractions,
1636 top_global, &top, makeConstArrayRef(state->x),
1637 state->box, &shouldCheckNumberOfBondedInteractions);
1638 if (!EI_VV(ir->eI) && bStopCM)
1639 {
1640 process_and_stopcm_grp(fplog, &vcm, *mdatoms, makeArrayRef(state->x),
1641 makeArrayRef(state->v));
1642 inc_nrnb(nrnb, eNR_STOPCM, mdatoms->homenr);
1643
1644 // TODO: The special case of removing CM motion should be dealt more gracefully
1645 if (useGpuForUpdate)
1646 {
1647 stateGpu->copyCoordinatesToGpu(state->x, AtomLocality::Local);
1648 // Here we block until the H2D copy completes because event sync with the
1649 // force kernels that use the coordinates on the next steps is not implemented
1650 // (not because of a race on state->x being modified on the CPU while H2D is in progress).
1651 stateGpu->waitCoordinatesCopiedToDevice(AtomLocality::Local);
1652 // If the COM removal changed the velocities on the CPU, this has to be accounted for.
1653 if (vcm.mode != ecmNO)
1654 {
1655 stateGpu->copyVelocitiesToGpu(state->v, AtomLocality::Local);
1656 }
1657 }
1658 }
1659 }
1660 }
1661
1662 /* ############# END CALC EKIN AND PRESSURE ################# */
1663
1664 /* Note: this is OK, but there are some numerical precision issues with using the convergence of
1665 the virial that should probably be addressed eventually. state->veta has better properies,
1666 but what we actually need entering the new cycle is the new shake_vir value. Ideally, we could
1667 generate the new shake_vir, but test the veta value for convergence. This will take some thought. */
1668
1669 if (ir->efep != efepNO && !EI_VV(ir->eI))
1670 {
1671 /* Sum up the foreign energy and dK/dl terms for md and sd.
1672 Currently done every step so that dH/dl is correct in the .edr */
1673 accumulateKineticLambdaComponents(enerd, state->lambda, *ir->fepvals);
1674 }
1675
1676 update_pcouple_after_coordinates(fplog, step, ir, mdatoms, pres, force_vir, shake_vir,
1677 pressureCouplingMu, state, nrnb, upd.deform(), !useGpuForUpdate);
1678
1679 const bool doBerendsenPressureCoupling =
1680 (inputrec->epc == epcBERENDSEN && do_per_step(step, inputrec->nstpcouple));
1681 const bool doCRescalePressureCoupling =
1682 (inputrec->epc == epcCRESCALE && do_per_step(step, inputrec->nstpcouple));
1683 if (useGpuForUpdate
1684 && (doBerendsenPressureCoupling || doCRescalePressureCoupling || doParrinelloRahman))
1685 {
1686 integrator->scaleCoordinates(pressureCouplingMu);
1687 if (doCRescalePressureCoupling)
1688 {
1689 matrix pressureCouplingInvMu;
1690 gmx::invertBoxMatrix(pressureCouplingMu, pressureCouplingInvMu);
1691 integrator->scaleVelocities(pressureCouplingInvMu);
1692 }
1693 integrator->setPbc(PbcType::Xyz, state->box);
1694 }
1695
1696 /* ################# END UPDATE STEP 2 ################# */
1697 /* #### We now have r(t+dt) and v(t+dt/2) ############# */
1698
1699 /* The coordinates (x) were unshifted in update */
1700 if (!bGStat)
1701 {
1702 /* We will not sum ekinh_old,
1703 * so signal that we still have to do it.
1704 */
1705 bSumEkinhOld = TRUE;
1706 }
1707
1708 if (bCalcEner)
1709 {
1710 /* ######### BEGIN PREPARING EDR OUTPUT ########### */
1711
1712 /* use the directly determined last velocity, not actually the averaged half steps */
1713 if (bTrotter && ir->eI == eiVV)
1714 {
1715 enerd->term[F_EKIN] = last_ekin;
1716 }
1717 enerd->term[F_ETOT] = enerd->term[F_EPOT] + enerd->term[F_EKIN];
1718
1719 if (integratorHasConservedEnergyQuantity(ir))
1720 {
1721 if (EI_VV(ir->eI))
1722 {
1723 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + saved_conserved_quantity;
1724 }
1725 else
1726 {
1727 enerd->term[F_ECONSERVED] = enerd->term[F_ETOT] + NPT_energy(ir, state, &MassQ);
1728 }
1729 }
1730 /* ######### END PREPARING EDR OUTPUT ########### */
1731 }
1732
1733 /* Output stuff */
1734 if (MASTER(cr))
1735 {
1736 if (fplog && do_log && bDoExpanded)
1737 {
1738 /* only needed if doing expanded ensemble */
1739 PrintFreeEnergyInfoToFile(fplog, ir->fepvals, ir->expandedvals,
1740 ir->bSimTemp ? ir->simtempvals : nullptr,
1741 state_global->dfhist, state->fep_state, ir->nstlog, step);
1742 }
1743 if (bCalcEner)
1744 {
1745 energyOutput.addDataAtEnergyStep(
1746 bDoDHDL, bCalcEnerStep, t, mdatoms->tmass, enerd, ir->fepvals,
1747 ir->expandedvals, lastbox,
1748 PTCouplingArrays{ state->boxv, state->nosehoover_xi, state->nosehoover_vxi,
1749 state->nhpres_xi, state->nhpres_vxi },
1750 state->fep_state, shake_vir, force_vir, total_vir, pres, ekind, mu_tot, constr);
1751 }
1752 else
1753 {
1754 energyOutput.recordNonEnergyStep();
1755 }
1756
1757 gmx_bool do_dr = do_per_step(step, ir->nstdisreout);
1758 gmx_bool do_or = do_per_step(step, ir->nstorireout);
1759
1760 if (doSimulatedAnnealing)
1761 {
1762 gmx::EnergyOutput::printAnnealingTemperatures(do_log ? fplog : nullptr, groups,
1763 &(ir->opts));
1764 }
1765 if (do_log || do_ene || do_dr || do_or)
1766 {
1767 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, do_dr, do_or,
1768 do_log ? fplog : nullptr, step, t,
1769 fr->fcdata.get(), awh.get());
1770 }
1771 if (do_log && ir->bDoAwh && awh->hasFepLambdaDimension())
1772 {
1773 const bool isInitialOutput = false;
1774 printLambdaStateToLog(fplog, state->lambda, isInitialOutput);
1775 }
1776
1777 if (ir->bPull)
1778 {
1779 pull_print_output(pull_work, step, t);
1780 }
1781
1782 if (do_per_step(step, ir->nstlog))
1783 {
1784 if (fflush(fplog) != 0)
1785 {
1786 gmx_fatal(FARGS, "Cannot flush logfile - maybe you are out of disk space?");
1787 }
1788 }
1789 }
1790 if (bDoExpanded)
1791 {
1792 /* Have to do this part _after_ outputting the logfile and the edr file */
1793 /* Gets written into the state at the beginning of next loop*/
1794 state->fep_state = lamnew;
1795 if(plumedswitch)
1796 {
1797 realFepState = state->fep_state;
1798 }
1799 }
1800 else if (ir->bDoAwh && awh->needForeignEnergyDifferences(step))
1801 {
1802 state->fep_state = awh->fepLambdaState();
1803 }
1804 /* Print the remaining wall clock time for the run */
1805 if (isMasterSimMasterRank(ms, MASTER(cr)) && (do_verbose || gmx_got_usr_signal()) && !bPMETunePrinting)
1806 {
1807 if (shellfc)
1808 {
1809 fprintf(stderr, "\n");
1810 }
1811 print_time(stderr, walltime_accounting, step, ir, cr);
1812 }
1813
1814 /* Ion/water position swapping.
1815 * Not done in last step since trajectory writing happens before this call
1816 * in the MD loop and exchanges would be lost anyway. */
1817 bNeedRepartition = FALSE;
1818 if ((ir->eSwapCoords != eswapNO) && (step > 0) && !bLastStep && do_per_step(step, ir->swap->nstswap))
1819 {
1820 bNeedRepartition =
1821 do_swapcoords(cr, step, t, ir, swap, wcycle, as_rvec_array(state->x.data()),
1822 state->box, MASTER(cr) && mdrunOptions.verbose, bRerunMD);
1823
1824 if (bNeedRepartition && DOMAINDECOMP(cr))
1825 {
1826 dd_collect_state(cr->dd, state, state_global);
1827 }
1828 }
1829
1830 /* Replica exchange */
1831 bExchanged = FALSE;
1832 if (bDoReplEx)
1833 {
1834 bExchanged = replica_exchange(fplog, cr, ms, repl_ex, state_global, enerd, state, step, t);
1835 }
1836
1837 if ((bExchanged || bNeedRepartition) && DOMAINDECOMP(cr))
1838 {
1839 dd_partition_system(fplog, mdlog, step, cr, TRUE, 1, state_global, *top_global, ir,
1840 imdSession, pull_work, state, &f, mdAtoms, &top, fr, vsite, constr,
1841 nrnb, wcycle, FALSE);
1842 shouldCheckNumberOfBondedInteractions = true;
1843 upd.setNumAtoms(state->natoms);
1844 }
1845
1846 bFirstStep = FALSE;
1847 bInitStep = FALSE;
1848
1849 /* ####### SET VARIABLES FOR NEXT ITERATION IF THEY STILL NEED IT ###### */
1850 /* With all integrators, except VV, we need to retain the pressure
1851 * at the current step for coupling at the next step.
1852 */
1853 if ((state->flags & (1U << estPRES_PREV))
1854 && (bGStatEveryStep || (ir->nstpcouple > 0 && step % ir->nstpcouple == 0)))
1855 {
1856 /* Store the pressure in t_state for pressure coupling
1857 * at the next MD step.
1858 */
1859 copy_mat(pres, state->pres_prev);
1860 }
1861
1862 /* ####### END SET VARIABLES FOR NEXT ITERATION ###### */
1863
1864 if ((membed != nullptr) && (!bLastStep))
1865 {
1866 rescale_membed(step_rel, membed, as_rvec_array(state_global->x.data()));
1867 }
1868
1869 cycles = wallcycle_stop(wcycle, ewcSTEP);
1870 if (DOMAINDECOMP(cr) && wcycle)
1871 {
1872 dd_cycles_add(cr->dd, cycles, ddCyclStep);
1873 }
1874
1875 /* increase the MD step number */
1876 step++;
1877 step_rel++;
1878
1879 #if GMX_FAHCORE
1880 if (MASTER(cr))
1881 {
1882 fcReportProgress(ir->nsteps + ir->init_step, step);
1883 }
1884 #endif
1885
1886 resetHandler->resetCounters(step, step_rel, mdlog, fplog, cr, fr->nbv.get(), nrnb,
1887 fr->pmedata, pme_loadbal, wcycle, walltime_accounting);
1888
1889 /* If bIMD is TRUE, the master updates the IMD energy record and sends positions to VMD client */
1890 imdSession->updateEnergyRecordAndSendPositionsAndEnergies(bInteractiveMDstep, step, bCalcEner);
1891 }
1892 /* End of main MD loop */
1893
1894 /* Closing TNG files can include compressing data. Therefore it is good to do that
1895 * before stopping the time measurements. */
1896 mdoutf_tng_close(outf);
1897
1898 /* Stop measuring walltime */
1899 walltime_accounting_end_time(walltime_accounting);
1900
1901 if (!thisRankHasDuty(cr, DUTY_PME))
1902 {
1903 /* Tell the PME only node to finish */
1904 gmx_pme_send_finish(cr);
1905 }
1906
1907 if (MASTER(cr))
1908 {
1909 if (ir->nstcalcenergy > 0)
1910 {
1911 energyOutput.printEnergyConservation(fplog, ir->simulation_part, EI_MD(ir->eI));
1912
1913 gmx::EnergyOutput::printAnnealingTemperatures(fplog, groups, &(ir->opts));
1914 energyOutput.printAverages(fplog, groups);
1915 }
1916 }
1917 done_mdoutf(outf);
1918
1919 if (bPMETune)
1920 {
1921 pme_loadbal_done(pme_loadbal, fplog, mdlog, fr->nbv->useGpu());
1922 }
1923
1924 done_shellfc(fplog, shellfc, step_rel);
1925
1926 if (useReplicaExchange && MASTER(cr))
1927 {
1928 print_replica_exchange_statistics(fplog, repl_ex);
1929 }
1930
1931 walltime_accounting_set_nsteps_done(walltime_accounting, step_rel);
1932
1933 global_stat_destroy(gstat);
1934 }
1935