1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015,2016,2017 The GROMACS development team.
7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9  * and including many others, as listed in the AUTHORS file in the
10  * top-level source directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 /*! \internal \file
39  *
40  * \brief This file defines integrators for energy minimization
41  *
42  * \author Berk Hess <hess@kth.se>
43  * \author Erik Lindahl <erik@kth.se>
44  * \ingroup module_mdrun
45  */
46 #include "gmxpre.h"
47 
48 #include "config.h"
49 
50 #include <cmath>
51 #include <cstring>
52 #include <ctime>
53 
54 #include <algorithm>
55 #include <vector>
56 
57 #include "gromacs/commandline/filenm.h"
58 #include "gromacs/domdec/collect.h"
59 #include "gromacs/domdec/dlbtiming.h"
60 #include "gromacs/domdec/domdec.h"
61 #include "gromacs/domdec/domdec_struct.h"
62 #include "gromacs/domdec/mdsetup.h"
63 #include "gromacs/domdec/partition.h"
64 #include "gromacs/ewald/pme_pp.h"
65 #include "gromacs/fileio/confio.h"
66 #include "gromacs/fileio/mtxio.h"
67 #include "gromacs/gmxlib/network.h"
68 #include "gromacs/gmxlib/nrnb.h"
69 #include "gromacs/imd/imd.h"
70 #include "gromacs/linearalgebra/sparsematrix.h"
71 #include "gromacs/listed_forces/listed_forces.h"
72 #include "gromacs/math/functions.h"
73 #include "gromacs/math/vec.h"
74 #include "gromacs/mdlib/constr.h"
75 #include "gromacs/mdlib/coupling.h"
76 #include "gromacs/mdlib/dispersioncorrection.h"
77 #include "gromacs/mdlib/ebin.h"
78 #include "gromacs/mdlib/enerdata_utils.h"
79 #include "gromacs/mdlib/energyoutput.h"
80 #include "gromacs/mdlib/force.h"
81 #include "gromacs/mdlib/force_flags.h"
82 #include "gromacs/mdlib/forcerec.h"
83 #include "gromacs/mdlib/gmx_omp_nthreads.h"
84 #include "gromacs/mdlib/md_support.h"
85 #include "gromacs/mdlib/mdatoms.h"
86 #include "gromacs/mdlib/stat.h"
87 #include "gromacs/mdlib/tgroup.h"
88 #include "gromacs/mdlib/trajectory_writing.h"
89 #include "gromacs/mdlib/update.h"
90 #include "gromacs/mdlib/vsite.h"
91 #include "gromacs/mdrunutility/handlerestart.h"
92 #include "gromacs/mdrunutility/multisim.h" /*PLUMED*/
93 #include "gromacs/mdrunutility/printtime.h"
94 #include "gromacs/mdtypes/checkpointdata.h"
95 #include "gromacs/mdtypes/commrec.h"
96 #include "gromacs/mdtypes/forcebuffers.h"
97 #include "gromacs/mdtypes/forcerec.h"
98 #include "gromacs/mdtypes/inputrec.h"
99 #include "gromacs/mdtypes/interaction_const.h"
100 #include "gromacs/mdtypes/md_enums.h"
101 #include "gromacs/mdtypes/mdatom.h"
102 #include "gromacs/mdtypes/mdrunoptions.h"
103 #include "gromacs/mdtypes/state.h"
104 #include "gromacs/pbcutil/pbc.h"
105 #include "gromacs/timing/wallcycle.h"
106 #include "gromacs/timing/walltime_accounting.h"
107 #include "gromacs/topology/mtop_util.h"
108 #include "gromacs/topology/topology.h"
109 #include "gromacs/utility/cstringutil.h"
110 #include "gromacs/utility/exceptions.h"
111 #include "gromacs/utility/fatalerror.h"
112 #include "gromacs/utility/logger.h"
113 #include "gromacs/utility/smalloc.h"
114 
115 #include "legacysimulator.h"
116 #include "shellfc.h"
117 
118 using gmx::ArrayRef;
119 using gmx::MdrunScheduleWorkload;
120 using gmx::RVec;
121 using gmx::VirtualSitesHandler;
122 
123 /* PLUMED */
124 #include "../../../Plumed.h"
125 extern int    plumedswitch;
126 extern plumed plumedmain;
127 /* END PLUMED */
128 
129 //! Utility structure for manipulating states during EM
130 typedef struct em_state
131 {
132     //! Copy of the global state
133     t_state s;
134     //! Force array
135     gmx::ForceBuffers f;
136     //! Potential energy
137     real epot;
138     //! Norm of the force
139     real fnorm;
140     //! Maximum force
141     real fmax;
142     //! Direction
143     int a_fmax;
144 } em_state_t;
145 
146 //! Print the EM starting conditions
print_em_start(FILE * fplog,const t_commrec * cr,gmx_walltime_accounting_t walltime_accounting,gmx_wallcycle_t wcycle,const char * name)147 static void print_em_start(FILE*                     fplog,
148                            const t_commrec*          cr,
149                            gmx_walltime_accounting_t walltime_accounting,
150                            gmx_wallcycle_t           wcycle,
151                            const char*               name)
152 {
153     walltime_accounting_start_time(walltime_accounting);
154     wallcycle_start(wcycle, ewcRUN);
155     print_start(fplog, cr, walltime_accounting, name);
156 }
157 
158 //! Stop counting time for EM
em_time_end(gmx_walltime_accounting_t walltime_accounting,gmx_wallcycle_t wcycle)159 static void em_time_end(gmx_walltime_accounting_t walltime_accounting, gmx_wallcycle_t wcycle)
160 {
161     wallcycle_stop(wcycle, ewcRUN);
162 
163     walltime_accounting_end_time(walltime_accounting);
164 }
165 
166 //! Printing a log file and console header
sp_header(FILE * out,const char * minimizer,real ftol,int nsteps)167 static void sp_header(FILE* out, const char* minimizer, real ftol, int nsteps)
168 {
169     fprintf(out, "\n");
170     fprintf(out, "%s:\n", minimizer);
171     fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
172     fprintf(out, "   Number of steps    = %12d\n", nsteps);
173 }
174 
175 //! Print warning message
warn_step(FILE * fp,real ftol,real fmax,gmx_bool bLastStep,gmx_bool bConstrain)176 static void warn_step(FILE* fp, real ftol, real fmax, gmx_bool bLastStep, gmx_bool bConstrain)
177 {
178     constexpr bool realIsDouble = GMX_DOUBLE;
179     char           buffer[2048];
180 
181     if (!std::isfinite(fmax))
182     {
183         sprintf(buffer,
184                 "\nEnergy minimization has stopped because the force "
185                 "on at least one atom is not finite. This usually means "
186                 "atoms are overlapping. Modify the input coordinates to "
187                 "remove atom overlap or use soft-core potentials with "
188                 "the free energy code to avoid infinite forces.\n%s",
189                 !realIsDouble ? "You could also be lucky that switching to double precision "
190                                 "is sufficient to obtain finite forces.\n"
191                               : "");
192     }
193     else if (bLastStep)
194     {
195         sprintf(buffer,
196                 "\nEnergy minimization reached the maximum number "
197                 "of steps before the forces reached the requested "
198                 "precision Fmax < %g.\n",
199                 ftol);
200     }
201     else
202     {
203         sprintf(buffer,
204                 "\nEnergy minimization has stopped, but the forces have "
205                 "not converged to the requested precision Fmax < %g (which "
206                 "may not be possible for your system). It stopped "
207                 "because the algorithm tried to make a new step whose size "
208                 "was too small, or there was no change in the energy since "
209                 "last step. Either way, we regard the minimization as "
210                 "converged to within the available machine precision, "
211                 "given your starting configuration and EM parameters.\n%s%s",
212                 ftol,
213                 !realIsDouble ? "\nDouble precision normally gives you higher accuracy, but "
214                                 "this is often not needed for preparing to run molecular "
215                                 "dynamics.\n"
216                               : "",
217                 bConstrain ? "You might need to increase your constraint accuracy, or turn\n"
218                              "off constraints altogether (set constraints = none in mdp file)\n"
219                            : "");
220     }
221 
222     fputs(wrap_lines(buffer, 78, 0, FALSE), stderr);
223     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
224 }
225 
226 //! Print message about convergence of the EM
print_converged(FILE * fp,const char * alg,real ftol,int64_t count,gmx_bool bDone,int64_t nsteps,const em_state_t * ems,double sqrtNumAtoms)227 static void print_converged(FILE*             fp,
228                             const char*       alg,
229                             real              ftol,
230                             int64_t           count,
231                             gmx_bool          bDone,
232                             int64_t           nsteps,
233                             const em_state_t* ems,
234                             double            sqrtNumAtoms)
235 {
236     char buf[STEPSTRSIZE];
237 
238     if (bDone)
239     {
240         fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n", alg, ftol, gmx_step_str(count, buf));
241     }
242     else if (count < nsteps)
243     {
244         fprintf(fp,
245                 "\n%s converged to machine precision in %s steps,\n"
246                 "but did not reach the requested Fmax < %g.\n",
247                 alg, gmx_step_str(count, buf), ftol);
248     }
249     else
250     {
251         fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n", alg, ftol,
252                 gmx_step_str(count, buf));
253     }
254 
255 #if GMX_DOUBLE
256     fprintf(fp, "Potential Energy  = %21.14e\n", ems->epot);
257     fprintf(fp, "Maximum force     = %21.14e on atom %d\n", ems->fmax, ems->a_fmax + 1);
258     fprintf(fp, "Norm of force     = %21.14e\n", ems->fnorm / sqrtNumAtoms);
259 #else
260     fprintf(fp, "Potential Energy  = %14.7e\n", ems->epot);
261     fprintf(fp, "Maximum force     = %14.7e on atom %d\n", ems->fmax, ems->a_fmax + 1);
262     fprintf(fp, "Norm of force     = %14.7e\n", ems->fnorm / sqrtNumAtoms);
263 #endif
264 }
265 
266 //! Compute the norm and max of the force array in parallel
get_f_norm_max(const t_commrec * cr,t_grpopts * opts,t_mdatoms * mdatoms,gmx::ArrayRef<const gmx::RVec> f,real * fnorm,real * fmax,int * a_fmax)267 static void get_f_norm_max(const t_commrec*               cr,
268                            t_grpopts*                     opts,
269                            t_mdatoms*                     mdatoms,
270                            gmx::ArrayRef<const gmx::RVec> f,
271                            real*                          fnorm,
272                            real*                          fmax,
273                            int*                           a_fmax)
274 {
275     double fnorm2, *sum;
276     real   fmax2, fam;
277     int    la_max, a_max, start, end, i, m, gf;
278 
279     /* This routine finds the largest force and returns it.
280      * On parallel machines the global max is taken.
281      */
282     fnorm2 = 0;
283     fmax2  = 0;
284     la_max = -1;
285     start  = 0;
286     end    = mdatoms->homenr;
287     if (mdatoms->cFREEZE)
288     {
289         for (i = start; i < end; i++)
290         {
291             gf  = mdatoms->cFREEZE[i];
292             fam = 0;
293             for (m = 0; m < DIM; m++)
294             {
295                 if (!opts->nFreeze[gf][m])
296                 {
297                     fam += gmx::square(f[i][m]);
298                 }
299             }
300             fnorm2 += fam;
301             if (fam > fmax2)
302             {
303                 fmax2  = fam;
304                 la_max = i;
305             }
306         }
307     }
308     else
309     {
310         for (i = start; i < end; i++)
311         {
312             fam = norm2(f[i]);
313             fnorm2 += fam;
314             if (fam > fmax2)
315             {
316                 fmax2  = fam;
317                 la_max = i;
318             }
319         }
320     }
321 
322     if (la_max >= 0 && DOMAINDECOMP(cr))
323     {
324         a_max = cr->dd->globalAtomIndices[la_max];
325     }
326     else
327     {
328         a_max = la_max;
329     }
330     if (PAR(cr))
331     {
332         snew(sum, 2 * cr->nnodes + 1);
333         sum[2 * cr->nodeid]     = fmax2;
334         sum[2 * cr->nodeid + 1] = a_max;
335         sum[2 * cr->nnodes]     = fnorm2;
336         gmx_sumd(2 * cr->nnodes + 1, sum, cr);
337         fnorm2 = sum[2 * cr->nnodes];
338         /* Determine the global maximum */
339         for (i = 0; i < cr->nnodes; i++)
340         {
341             if (sum[2 * i] > fmax2)
342             {
343                 fmax2 = sum[2 * i];
344                 a_max = gmx::roundToInt(sum[2 * i + 1]);
345             }
346         }
347         sfree(sum);
348     }
349 
350     if (fnorm)
351     {
352         *fnorm = sqrt(fnorm2);
353     }
354     if (fmax)
355     {
356         *fmax = sqrt(fmax2);
357     }
358     if (a_fmax)
359     {
360         *a_fmax = a_max;
361     }
362 }
363 
364 //! Compute the norm of the force
get_state_f_norm_max(const t_commrec * cr,t_grpopts * opts,t_mdatoms * mdatoms,em_state_t * ems)365 static void get_state_f_norm_max(const t_commrec* cr, t_grpopts* opts, t_mdatoms* mdatoms, em_state_t* ems)
366 {
367     get_f_norm_max(cr, opts, mdatoms, ems->f.view().force(), &ems->fnorm, &ems->fmax, &ems->a_fmax);
368 }
369 
370 //! Initialize the energy minimization
init_em(FILE * fplog,const gmx::MDLogger & mdlog,const char * title,const t_commrec * cr,const gmx_multisim_t * ms,t_inputrec * ir,gmx::ImdSession * imdSession,pull_t * pull_work,t_state * state_global,const gmx_mtop_t * top_global,em_state_t * ems,gmx_localtop_t * top,t_nrnb * nrnb,t_forcerec * fr,gmx::MDAtoms * mdAtoms,gmx_global_stat_t * gstat,VirtualSitesHandler * vsite,gmx::Constraints * constr,gmx_shellfc_t ** shellfc)371 static void init_em(FILE*                fplog,
372                     const gmx::MDLogger& mdlog,
373                     const char*          title,
374                     const t_commrec*     cr,
375                     const gmx_multisim_t *ms, /* PLUMED */
376                     t_inputrec*          ir,
377                     gmx::ImdSession*     imdSession,
378                     pull_t*              pull_work,
379                     t_state*             state_global,
380                     const gmx_mtop_t*    top_global,
381                     em_state_t*          ems,
382                     gmx_localtop_t*      top,
383                     t_nrnb*              nrnb,
384                     t_forcerec*          fr,
385                     gmx::MDAtoms*        mdAtoms,
386                     gmx_global_stat_t*   gstat,
387                     VirtualSitesHandler* vsite,
388                     gmx::Constraints*    constr,
389                     gmx_shellfc_t**      shellfc)
390 {
391     real dvdl_constr;
392 
393     if (fplog)
394     {
395         fprintf(fplog, "Initiating %s\n", title);
396     }
397 
398     if (MASTER(cr))
399     {
400         state_global->ngtc = 0;
401     }
402     int*                fep_state = MASTER(cr) ? &state_global->fep_state : nullptr;
403     gmx::ArrayRef<real> lambda    = MASTER(cr) ? state_global->lambda : gmx::ArrayRef<real>();
404     initialize_lambdas(fplog, *ir, MASTER(cr), fep_state, lambda);
405 
406     if (ir->eI == eiNM)
407     {
408         GMX_ASSERT(shellfc != nullptr, "With NM we always support shells");
409 
410         *shellfc =
411                 init_shell_flexcon(stdout, top_global, constr ? constr->numFlexibleConstraints() : 0,
412                                    ir->nstcalcenergy, DOMAINDECOMP(cr), thisRankHasDuty(cr, DUTY_PME));
413     }
414     else
415     {
416         GMX_ASSERT(EI_ENERGY_MINIMIZATION(ir->eI),
417                    "This else currently only handles energy minimizers, consider if your algorithm "
418                    "needs shell/flexible-constraint support");
419 
420         /* With energy minimization, shells and flexible constraints are
421          * automatically minimized when treated like normal DOFS.
422          */
423         if (shellfc != nullptr)
424         {
425             *shellfc = nullptr;
426         }
427     }
428 
429     if (DOMAINDECOMP(cr))
430     {
431         dd_init_local_state(cr->dd, state_global, &ems->s);
432 
433         /* Distribute the charge groups over the nodes from the master node */
434         dd_partition_system(fplog, mdlog, ir->init_step, cr, TRUE, 1, state_global, *top_global, ir,
435                             imdSession, pull_work, &ems->s, &ems->f, mdAtoms, top, fr, vsite,
436                             constr, nrnb, nullptr, FALSE);
437         dd_store_state(cr->dd, &ems->s);
438     }
439     else
440     {
441         state_change_natoms(state_global, state_global->natoms);
442         /* Just copy the state */
443         ems->s = *state_global;
444         state_change_natoms(&ems->s, ems->s.natoms);
445 
446         mdAlgorithmsSetupAtomData(cr, ir, *top_global, top, fr, &ems->f, mdAtoms, constr, vsite,
447                                   shellfc ? *shellfc : nullptr);
448     }
449 
450     update_mdatoms(mdAtoms->mdatoms(), ems->s.lambda[efptMASS]);
451 
452     if (constr)
453     {
454         // TODO how should this cross-module support dependency be managed?
455         if (ir->eConstrAlg == econtSHAKE && gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
456         {
457             gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
458                       econstr_names[econtSHAKE], econstr_names[econtLINCS]);
459         }
460 
461         if (!ir->bContinuation)
462         {
463             /* Constrain the starting coordinates */
464             bool needsLogging  = true;
465             bool computeEnergy = true;
466             bool computeVirial = false;
467             dvdl_constr        = 0;
468             constr->apply(needsLogging, computeEnergy, -1, 0, 1.0, ems->s.x.arrayRefWithPadding(),
469                           ems->s.x.arrayRefWithPadding(), ArrayRef<RVec>(), ems->s.box,
470                           ems->s.lambda[efptFEP], &dvdl_constr, gmx::ArrayRefWithPadding<RVec>(),
471                           computeVirial, nullptr, gmx::ConstraintVariable::Positions);
472         }
473     }
474 
475     if (PAR(cr))
476     {
477         *gstat = global_stat_init(ir);
478     }
479     else
480     {
481         *gstat = nullptr;
482     }
483 
484     calc_shifts(ems->s.box, fr->shift_vec);
485 
486     /* PLUMED */
487     if(plumedswitch){
488       if(ms && ms->numSimulations_>1) {
489         if(MASTER(cr)) plumed_cmd(plumedmain,"GREX setMPIIntercomm",&ms->mastersComm_);
490         if(PAR(cr)){
491           if(DOMAINDECOMP(cr)) {
492             plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->dd->mpi_comm_all);
493           }else{
494             plumed_cmd(plumedmain,"GREX setMPIIntracomm",&cr->mpi_comm_mysim);
495           }
496         }
497         plumed_cmd(plumedmain,"GREX init",nullptr);
498       }
499       if(PAR(cr)){
500         if(DOMAINDECOMP(cr)) {
501           plumed_cmd(plumedmain,"setMPIComm",&cr->dd->mpi_comm_all);
502         }else{
503           plumed_cmd(plumedmain,"setMPIComm",&cr->mpi_comm_mysim);
504         }
505       }
506       plumed_cmd(plumedmain,"setNatoms",&top_global->natoms);
507       plumed_cmd(plumedmain,"setMDEngine","gromacs");
508       plumed_cmd(plumedmain,"setLog",fplog);
509       real real_delta_t;
510       real_delta_t=ir->delta_t;
511       plumed_cmd(plumedmain,"setTimestep",&real_delta_t);
512       plumed_cmd(plumedmain,"init",nullptr);
513 
514       if(PAR(cr)){
515         if(DOMAINDECOMP(cr)) {
516           int nat_home = dd_numHomeAtoms(*cr->dd);
517           plumed_cmd(plumedmain,"setAtomsNlocal",&nat_home);
518           plumed_cmd(plumedmain,"setAtomsGatindex",cr->dd->globalAtomIndices.data());
519 
520         }
521       }
522     }
523     /* END PLUMED */
524 }
525 
526 //! Finalize the minimization
finish_em(const t_commrec * cr,gmx_mdoutf_t outf,gmx_walltime_accounting_t walltime_accounting,gmx_wallcycle_t wcycle)527 static void finish_em(const t_commrec*          cr,
528                       gmx_mdoutf_t              outf,
529                       gmx_walltime_accounting_t walltime_accounting,
530                       gmx_wallcycle_t           wcycle)
531 {
532     if (!thisRankHasDuty(cr, DUTY_PME))
533     {
534         /* Tell the PME only node to finish */
535         gmx_pme_send_finish(cr);
536     }
537 
538     done_mdoutf(outf);
539 
540     em_time_end(walltime_accounting, wcycle);
541 }
542 
543 //! Swap two different EM states during minimization
swap_em_state(em_state_t ** ems1,em_state_t ** ems2)544 static void swap_em_state(em_state_t** ems1, em_state_t** ems2)
545 {
546     em_state_t* tmp;
547 
548     tmp   = *ems1;
549     *ems1 = *ems2;
550     *ems2 = tmp;
551 }
552 
553 //! Save the EM trajectory
write_em_traj(FILE * fplog,const t_commrec * cr,gmx_mdoutf_t outf,gmx_bool bX,gmx_bool bF,const char * confout,const gmx_mtop_t * top_global,t_inputrec * ir,int64_t step,em_state_t * state,t_state * state_global,ObservablesHistory * observablesHistory)554 static void write_em_traj(FILE*               fplog,
555                           const t_commrec*    cr,
556                           gmx_mdoutf_t        outf,
557                           gmx_bool            bX,
558                           gmx_bool            bF,
559                           const char*         confout,
560                           const gmx_mtop_t*   top_global,
561                           t_inputrec*         ir,
562                           int64_t             step,
563                           em_state_t*         state,
564                           t_state*            state_global,
565                           ObservablesHistory* observablesHistory)
566 {
567     int mdof_flags = 0;
568 
569     if (bX)
570     {
571         mdof_flags |= MDOF_X;
572     }
573     if (bF)
574     {
575         mdof_flags |= MDOF_F;
576     }
577 
578     /* If we want IMD output, set appropriate MDOF flag */
579     if (ir->bIMD)
580     {
581         mdof_flags |= MDOF_IMD;
582     }
583 
584     gmx::WriteCheckpointDataHolder checkpointDataHolder;
585     mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step,
586                                      static_cast<double>(step), &state->s, state_global,
587                                      observablesHistory, state->f.view().force(), &checkpointDataHolder);
588 
589     if (confout != nullptr)
590     {
591         if (DOMAINDECOMP(cr))
592         {
593             /* If bX=true, x was collected to state_global in the call above */
594             if (!bX)
595             {
596                 auto globalXRef = MASTER(cr) ? state_global->x : gmx::ArrayRef<gmx::RVec>();
597                 dd_collect_vec(cr->dd, state->s.ddp_count, state->s.ddp_count_cg_gl, state->s.cg_gl,
598                                state->s.x, globalXRef);
599             }
600         }
601         else
602         {
603             /* Copy the local state pointer */
604             state_global = &state->s;
605         }
606 
607         if (MASTER(cr))
608         {
609             if (ir->pbcType != PbcType::No && !ir->bPeriodicMols && DOMAINDECOMP(cr))
610             {
611                 /* Make molecules whole only for confout writing */
612                 do_pbc_mtop(ir->pbcType, state->s.box, top_global, state_global->x.rvec_array());
613             }
614 
615             write_sto_conf_mtop(confout, *top_global->name, top_global,
616                                 state_global->x.rvec_array(), nullptr, ir->pbcType, state->s.box);
617         }
618     }
619 }
620 
621 //! \brief Do one minimization step
622 //
623 // \returns true when the step succeeded, false when a constraint error occurred
do_em_step(const t_commrec * cr,t_inputrec * ir,t_mdatoms * md,em_state_t * ems1,real a,gmx::ArrayRefWithPadding<const gmx::RVec> force,em_state_t * ems2,gmx::Constraints * constr,int64_t count)624 static bool do_em_step(const t_commrec*                          cr,
625                        t_inputrec*                               ir,
626                        t_mdatoms*                                md,
627                        em_state_t*                               ems1,
628                        real                                      a,
629                        gmx::ArrayRefWithPadding<const gmx::RVec> force,
630                        em_state_t*                               ems2,
631                        gmx::Constraints*                         constr,
632                        int64_t                                   count)
633 
634 {
635     t_state *s1, *s2;
636     int      start, end;
637     real     dvdl_constr;
638     int nthreads gmx_unused;
639 
640     bool validStep = true;
641 
642     s1 = &ems1->s;
643     s2 = &ems2->s;
644 
645     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
646     {
647         gmx_incons("state mismatch in do_em_step");
648     }
649 
650     s2->flags = s1->flags;
651 
652     if (s2->natoms != s1->natoms)
653     {
654         state_change_natoms(s2, s1->natoms);
655         ems2->f.resize(s2->natoms);
656     }
657     if (DOMAINDECOMP(cr) && s2->cg_gl.size() != s1->cg_gl.size())
658     {
659         s2->cg_gl.resize(s1->cg_gl.size());
660     }
661 
662     copy_mat(s1->box, s2->box);
663     /* Copy free energy state */
664     s2->lambda = s1->lambda;
665     copy_mat(s1->box, s2->box);
666 
667     start = 0;
668     end   = md->homenr;
669 
670     nthreads = gmx_omp_nthreads_get(emntUpdate);
671 #pragma omp parallel num_threads(nthreads)
672     {
673         const rvec* x1 = s1->x.rvec_array();
674         rvec*       x2 = s2->x.rvec_array();
675         const rvec* f  = as_rvec_array(force.unpaddedArrayRef().data());
676 
677         int gf = 0;
678 #pragma omp for schedule(static) nowait
679         for (int i = start; i < end; i++)
680         {
681             try
682             {
683                 if (md->cFREEZE)
684                 {
685                     gf = md->cFREEZE[i];
686                 }
687                 for (int m = 0; m < DIM; m++)
688                 {
689                     if (ir->opts.nFreeze[gf][m])
690                     {
691                         x2[i][m] = x1[i][m];
692                     }
693                     else
694                     {
695                         x2[i][m] = x1[i][m] + a * f[i][m];
696                     }
697                 }
698             }
699             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
700         }
701 
702         if (s2->flags & (1 << estCGP))
703         {
704             /* Copy the CG p vector */
705             const rvec* p1 = s1->cg_p.rvec_array();
706             rvec*       p2 = s2->cg_p.rvec_array();
707 #pragma omp for schedule(static) nowait
708             for (int i = start; i < end; i++)
709             {
710                 // Trivial OpenMP block that does not throw
711                 copy_rvec(p1[i], p2[i]);
712             }
713         }
714 
715         if (DOMAINDECOMP(cr))
716         {
717             /* OpenMP does not supported unsigned loop variables */
718 #pragma omp for schedule(static) nowait
719             for (gmx::index i = 0; i < gmx::ssize(s2->cg_gl); i++)
720             {
721                 s2->cg_gl[i] = s1->cg_gl[i];
722             }
723         }
724     }
725 
726     if (DOMAINDECOMP(cr))
727     {
728         s2->ddp_count       = s1->ddp_count;
729         s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
730     }
731 
732     if (constr)
733     {
734         dvdl_constr = 0;
735         validStep   = constr->apply(
736                 TRUE, TRUE, count, 0, 1.0, s1->x.arrayRefWithPadding(), s2->x.arrayRefWithPadding(),
737                 ArrayRef<RVec>(), s2->box, s2->lambda[efptBONDED], &dvdl_constr,
738                 gmx::ArrayRefWithPadding<RVec>(), false, nullptr, gmx::ConstraintVariable::Positions);
739 
740         if (cr->nnodes > 1)
741         {
742             /* This global reduction will affect performance at high
743              * parallelization, but we can not really avoid it.
744              * But usually EM is not run at high parallelization.
745              */
746             int reductionBuffer = static_cast<int>(!validStep);
747             gmx_sumi(1, &reductionBuffer, cr);
748             validStep = (reductionBuffer == 0);
749         }
750 
751         // We should move this check to the different minimizers
752         if (!validStep && ir->eI != eiSteep)
753         {
754             gmx_fatal(FARGS,
755                       "The coordinates could not be constrained. Minimizer '%s' can not handle "
756                       "constraint failures, use minimizer '%s' before using '%s'.",
757                       EI(ir->eI), EI(eiSteep), EI(ir->eI));
758         }
759     }
760 
761     return validStep;
762 }
763 
764 //! Prepare EM for using domain decomposition parallellization
em_dd_partition_system(FILE * fplog,const gmx::MDLogger & mdlog,int step,const t_commrec * cr,const gmx_mtop_t * top_global,t_inputrec * ir,gmx::ImdSession * imdSession,pull_t * pull_work,em_state_t * ems,gmx_localtop_t * top,gmx::MDAtoms * mdAtoms,t_forcerec * fr,VirtualSitesHandler * vsite,gmx::Constraints * constr,t_nrnb * nrnb,gmx_wallcycle_t wcycle)765 static void em_dd_partition_system(FILE*                fplog,
766                                    const gmx::MDLogger& mdlog,
767                                    int                  step,
768                                    const t_commrec*     cr,
769                                    const gmx_mtop_t*    top_global,
770                                    t_inputrec*          ir,
771                                    gmx::ImdSession*     imdSession,
772                                    pull_t*              pull_work,
773                                    em_state_t*          ems,
774                                    gmx_localtop_t*      top,
775                                    gmx::MDAtoms*        mdAtoms,
776                                    t_forcerec*          fr,
777                                    VirtualSitesHandler* vsite,
778                                    gmx::Constraints*    constr,
779                                    t_nrnb*              nrnb,
780                                    gmx_wallcycle_t      wcycle)
781 {
782     /* Repartition the domain decomposition */
783     dd_partition_system(fplog, mdlog, step, cr, FALSE, 1, nullptr, *top_global, ir, imdSession, pull_work,
784                         &ems->s, &ems->f, mdAtoms, top, fr, vsite, constr, nrnb, wcycle, FALSE);
785     dd_store_state(cr->dd, &ems->s);
786 }
787 
788 namespace
789 {
790 
791 /*! \brief Class to handle the work of setting and doing an energy evaluation.
792  *
793  * This class is a mere aggregate of parameters to pass to evaluate an
794  * energy, so that future changes to names and types of them consume
795  * less time when refactoring other code.
796  *
797  * Aggregate initialization is used, for which the chief risk is that
798  * if a member is added at the end and not all initializer lists are
799  * updated, then the member will be value initialized, which will
800  * typically mean initialization to zero.
801  *
802  * Use a braced initializer list to construct one of these. */
803 class EnergyEvaluator
804 {
805 public:
806     /*! \brief Evaluates an energy on the state in \c ems.
807      *
808      * \todo In practice, the same objects mu_tot, vir, and pres
809      * are always passed to this function, so we would rather have
810      * them as data members. However, their C-array types are
811      * unsuited for aggregate initialization. When the types
812      * improve, the call signature of this method can be reduced.
813      */
814     void run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst);
815     //! Handles logging (deprecated).
816     FILE* fplog;
817     //! Handles logging.
818     const gmx::MDLogger& mdlog;
819     //! Handles communication.
820     const t_commrec* cr;
821     //! Coordinates multi-simulations.
822     const gmx_multisim_t* ms;
823     //! Holds the simulation topology.
824     const gmx_mtop_t* top_global;
825     //! Holds the domain topology.
826     gmx_localtop_t* top;
827     //! User input options.
828     t_inputrec* inputrec;
829     //! The Interactive Molecular Dynamics session.
830     gmx::ImdSession* imdSession;
831     //! The pull work object.
832     pull_t* pull_work;
833     //! Manages flop accounting.
834     t_nrnb* nrnb;
835     //! Manages wall cycle accounting.
836     gmx_wallcycle_t wcycle;
837     //! Coordinates global reduction.
838     gmx_global_stat_t gstat;
839     //! Handles virtual sites.
840     VirtualSitesHandler* vsite;
841     //! Handles constraints.
842     gmx::Constraints* constr;
843     //! Per-atom data for this domain.
844     gmx::MDAtoms* mdAtoms;
845     //! Handles how to calculate the forces.
846     t_forcerec* fr;
847     //! Schedule of force-calculation work each step for this task.
848     MdrunScheduleWorkload* runScheduleWork;
849     //! Stores the computed energies.
850     gmx_enerdata_t* enerd;
851 };
852 
run(em_state_t * ems,rvec mu_tot,tensor vir,tensor pres,int64_t count,gmx_bool bFirst)853 void EnergyEvaluator::run(em_state_t* ems, rvec mu_tot, tensor vir, tensor pres, int64_t count, gmx_bool bFirst)
854 {
855     real     t;
856     gmx_bool bNS;
857     tensor   force_vir, shake_vir, ekin;
858     real     dvdl_constr;
859     real     terminate = 0;
860 
861     /* Set the time to the initial time, the time does not change during EM */
862     t = inputrec->init_t;
863 
864     if (bFirst || (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
865     {
866         /* This is the first state or an old state used before the last ns */
867         bNS = TRUE;
868     }
869     else
870     {
871         bNS = FALSE;
872         if (inputrec->nstlist > 0)
873         {
874             bNS = TRUE;
875         }
876     }
877 
878     if (vsite)
879     {
880         vsite->construct(ems->s.x, 1, {}, ems->s.box);
881     }
882 
883     if (DOMAINDECOMP(cr) && bNS)
884     {
885         /* Repartition the domain decomposition */
886         em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession, pull_work,
887                                ems, top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
888     }
889 
890     /* Calc force & energy on new trial position  */
891     /* do_force always puts the charge groups in the box and shifts again
892      * We do not unshift, so molecules are always whole in congrad.c
893      */
894     /* PLUMED */
895     int plumedNeedsEnergy=0;
896     matrix plumed_vir;
897     if(plumedswitch){
898       long int lstep=count; plumed_cmd(plumedmain,"setStepLong",&lstep);
899       plumed_cmd(plumedmain,"setPositions",&ems->s.x[0][0]);
900       plumed_cmd(plumedmain,"setMasses",&mdAtoms->mdatoms()->massT[0]);
901       plumed_cmd(plumedmain,"setCharges",&mdAtoms->mdatoms()->chargeA[0]);
902       plumed_cmd(plumedmain,"setBox",&ems->s.box[0][0]);
903       plumed_cmd(plumedmain,"prepareCalc",nullptr);
904       plumed_cmd(plumedmain,"setForces",&ems->f.view().force()[0][0]);
905       plumed_cmd(plumedmain,"isEnergyNeeded",&plumedNeedsEnergy);
906       clear_mat(plumed_vir);
907       plumed_cmd(plumedmain,"setVirial",&plumed_vir[0][0]);
908     }
909     /* END PLUMED */
910 
911     do_force(fplog, cr, ms, inputrec, nullptr, nullptr, imdSession, pull_work, count, nrnb, wcycle,
912              top, ems->s.box, ems->s.x.arrayRefWithPadding(), &ems->s.hist, &ems->f.view(), force_vir,
913              mdAtoms->mdatoms(), enerd, ems->s.lambda, fr, runScheduleWork, vsite, mu_tot, t, nullptr,
914              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY
915                      | (bNS ? GMX_FORCE_NS : 0),
916              DDBalanceRegionHandler(cr));
917 
918     /* PLUMED */
919     if(plumedswitch){
920       if(plumedNeedsEnergy) {
921         msmul(force_vir,2.0,plumed_vir);
922         plumed_cmd(plumedmain,"setEnergy",&enerd->term[F_EPOT]);
923         plumed_cmd(plumedmain,"performCalc",nullptr);
924         msmul(plumed_vir,0.5,force_vir);
925       } else {
926         msmul(plumed_vir,0.5,plumed_vir);
927         m_add(force_vir,plumed_vir,force_vir);
928       }
929     }
930     /* END PLUMED */
931 
932     /* Clear the unused shake virial and pressure */
933     clear_mat(shake_vir);
934     clear_mat(pres);
935 
936     /* Communicate stuff when parallel */
937     if (PAR(cr) && inputrec->eI != eiNM)
938     {
939         wallcycle_start(wcycle, ewcMoveE);
940 
941         global_stat(gstat, cr, enerd, force_vir, shake_vir, inputrec, nullptr, nullptr, nullptr, 1,
942                     &terminate, nullptr, FALSE, CGLO_ENERGY | CGLO_PRESSURE | CGLO_CONSTRAINT);
943 
944         wallcycle_stop(wcycle, ewcMoveE);
945     }
946 
947     if (fr->dispersionCorrection)
948     {
949         /* Calculate long range corrections to pressure and energy */
950         const DispersionCorrection::Correction correction =
951                 fr->dispersionCorrection->calculate(ems->s.box, ems->s.lambda[efptVDW]);
952 
953         enerd->term[F_DISPCORR] = correction.energy;
954         enerd->term[F_EPOT] += correction.energy;
955         enerd->term[F_PRES] += correction.pressure;
956         enerd->term[F_DVDL] += correction.dvdl;
957     }
958     else
959     {
960         enerd->term[F_DISPCORR] = 0;
961     }
962 
963     ems->epot = enerd->term[F_EPOT];
964 
965     if (constr)
966     {
967         /* Project out the constraint components of the force */
968         bool needsLogging  = false;
969         bool computeEnergy = false;
970         bool computeVirial = true;
971         dvdl_constr        = 0;
972         auto f             = ems->f.view().forceWithPadding();
973         constr->apply(needsLogging, computeEnergy, count, 0, 1.0, ems->s.x.arrayRefWithPadding(), f,
974                       f.unpaddedArrayRef(), ems->s.box, ems->s.lambda[efptBONDED], &dvdl_constr,
975                       gmx::ArrayRefWithPadding<RVec>(), computeVirial, shake_vir,
976                       gmx::ConstraintVariable::ForceDispl);
977         enerd->term[F_DVDL_CONSTR] += dvdl_constr;
978         m_add(force_vir, shake_vir, vir);
979     }
980     else
981     {
982         copy_mat(force_vir, vir);
983     }
984 
985     clear_mat(ekin);
986     enerd->term[F_PRES] = calc_pres(fr->pbcType, inputrec->nwall, ems->s.box, ekin, vir, pres);
987 
988     if (inputrec->efep != efepNO)
989     {
990         accumulateKineticLambdaComponents(enerd, ems->s.lambda, *inputrec->fepvals);
991     }
992 
993     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
994     {
995         get_state_f_norm_max(cr, &(inputrec->opts), mdAtoms->mdatoms(), ems);
996     }
997 }
998 
999 } // namespace
1000 
1001 //! Parallel utility summing energies and forces
reorder_partsum(const t_commrec * cr,t_grpopts * opts,const gmx_mtop_t * top_global,const em_state_t * s_min,const em_state_t * s_b)1002 static double reorder_partsum(const t_commrec*  cr,
1003                               t_grpopts*        opts,
1004                               const gmx_mtop_t* top_global,
1005                               const em_state_t* s_min,
1006                               const em_state_t* s_b)
1007 {
1008     if (debug)
1009     {
1010         fprintf(debug, "Doing reorder_partsum\n");
1011     }
1012 
1013     auto fm = s_min->f.view().force();
1014     auto fb = s_b->f.view().force();
1015 
1016     /* Collect fm in a global vector fmg.
1017      * This conflicts with the spirit of domain decomposition,
1018      * but to fully optimize this a much more complicated algorithm is required.
1019      */
1020     const int natoms = top_global->natoms;
1021     rvec*     fmg;
1022     snew(fmg, natoms);
1023 
1024     gmx::ArrayRef<const int> indicesMin = s_min->s.cg_gl;
1025     int                      i          = 0;
1026     for (int a : indicesMin)
1027     {
1028         copy_rvec(fm[i], fmg[a]);
1029         i++;
1030     }
1031     gmx_sum(top_global->natoms * 3, fmg[0], cr);
1032 
1033     /* Now we will determine the part of the sum for the cgs in state s_b */
1034     gmx::ArrayRef<const int> indicesB = s_b->s.cg_gl;
1035 
1036     double partsum                        = 0;
1037     i                                     = 0;
1038     int                                gf = 0;
1039     gmx::ArrayRef<const unsigned char> grpnrFREEZE =
1040             top_global->groups.groupNumbers[SimulationAtomGroupType::Freeze];
1041     for (int a : indicesB)
1042     {
1043         if (!grpnrFREEZE.empty())
1044         {
1045             gf = grpnrFREEZE[i];
1046         }
1047         for (int m = 0; m < DIM; m++)
1048         {
1049             if (!opts->nFreeze[gf][m])
1050             {
1051                 partsum += (fb[i][m] - fmg[a][m]) * fb[i][m];
1052             }
1053         }
1054         i++;
1055     }
1056 
1057     sfree(fmg);
1058 
1059     return partsum;
1060 }
1061 
1062 //! Print some stuff, like beta, whatever that means.
pr_beta(const t_commrec * cr,t_grpopts * opts,t_mdatoms * mdatoms,const gmx_mtop_t * top_global,const em_state_t * s_min,const em_state_t * s_b)1063 static real pr_beta(const t_commrec*  cr,
1064                     t_grpopts*        opts,
1065                     t_mdatoms*        mdatoms,
1066                     const gmx_mtop_t* top_global,
1067                     const em_state_t* s_min,
1068                     const em_state_t* s_b)
1069 {
1070     double sum;
1071 
1072     /* This is just the classical Polak-Ribiere calculation of beta;
1073      * it looks a bit complicated since we take freeze groups into account,
1074      * and might have to sum it in parallel runs.
1075      */
1076 
1077     if (!DOMAINDECOMP(cr)
1078         || (s_min->s.ddp_count == cr->dd->ddp_count && s_b->s.ddp_count == cr->dd->ddp_count))
1079     {
1080         auto fm = s_min->f.view().force();
1081         auto fb = s_b->f.view().force();
1082         sum     = 0;
1083         int gf  = 0;
1084         /* This part of code can be incorrect with DD,
1085          * since the atom ordering in s_b and s_min might differ.
1086          */
1087         for (int i = 0; i < mdatoms->homenr; i++)
1088         {
1089             if (mdatoms->cFREEZE)
1090             {
1091                 gf = mdatoms->cFREEZE[i];
1092             }
1093             for (int m = 0; m < DIM; m++)
1094             {
1095                 if (!opts->nFreeze[gf][m])
1096                 {
1097                     sum += (fb[i][m] - fm[i][m]) * fb[i][m];
1098                 }
1099             }
1100         }
1101     }
1102     else
1103     {
1104         /* We need to reorder cgs while summing */
1105         sum = reorder_partsum(cr, opts, top_global, s_min, s_b);
1106     }
1107     if (PAR(cr))
1108     {
1109         gmx_sumd(1, &sum, cr);
1110     }
1111 
1112     return sum / gmx::square(s_min->fnorm);
1113 }
1114 
1115 namespace gmx
1116 {
1117 
do_cg()1118 void LegacySimulator::do_cg()
1119 {
1120     const char* CG = "Polak-Ribiere Conjugate Gradients";
1121 
1122     gmx_localtop_t    top(top_global->ffparams);
1123     gmx_global_stat_t gstat;
1124     double            tmp, minstep;
1125     real              stepsize;
1126     real              a, b, c, beta = 0.0;
1127     real              epot_repl = 0;
1128     real              pnorm;
1129     gmx_bool          converged, foundlower;
1130     rvec              mu_tot = { 0 };
1131     gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
1132     tensor            vir, pres;
1133     int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
1134     int               m, step, nminstep;
1135     auto              mdatoms = mdAtoms->mdatoms();
1136 
1137     GMX_LOG(mdlog.info)
1138             .asParagraph()
1139             .appendText(
1140                     "Note that activating conjugate gradient energy minimization via the "
1141                     "integrator .mdp option and the command gmx mdrun may "
1142                     "be available in a different form in a future version of GROMACS, "
1143                     "e.g. gmx minimize and an .mdp option.");
1144 
1145     step = 0;
1146 
1147     if (MASTER(cr))
1148     {
1149         // In CG, the state is extended with a search direction
1150         state_global->flags |= (1 << estCGP);
1151 
1152         // Ensure the extra per-atom state array gets allocated
1153         state_change_natoms(state_global, state_global->natoms);
1154 
1155         // Initialize the search direction to zero
1156         for (RVec& cg_p : state_global->cg_p)
1157         {
1158             cg_p = { 0, 0, 0 };
1159         }
1160     }
1161 
1162     /* Create 4 states on the stack and extract pointers that we will swap */
1163     em_state_t  s0{}, s1{}, s2{}, s3{};
1164     em_state_t* s_min = &s0;
1165     em_state_t* s_a   = &s1;
1166     em_state_t* s_b   = &s2;
1167     em_state_t* s_c   = &s3;
1168 
1169     /* Init em and store the local state in s_min */
1170     init_em(fplog, mdlog, CG, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, s_min,
1171             &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr);
1172     const bool        simulationsShareState = false;
1173     gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
1174                                    mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
1175                                    StartingBehavior::NewSimulation, simulationsShareState, ms);
1176     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work,
1177                                    nullptr, false, StartingBehavior::NewSimulation,
1178                                    simulationsShareState, mdModulesNotifier);
1179 
1180     /* Print to log file */
1181     print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
1182 
1183     /* Max number of steps */
1184     number_steps = inputrec->nsteps;
1185 
1186     if (MASTER(cr))
1187     {
1188         sp_header(stderr, CG, inputrec->em_tol, number_steps);
1189     }
1190     if (fplog)
1191     {
1192         sp_header(fplog, CG, inputrec->em_tol, number_steps);
1193     }
1194 
1195     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
1196                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
1197                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
1198     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1199     /* do_force always puts the charge groups in the box and shifts again
1200      * We do not unshift, so molecules are always whole in congrad.c
1201      */
1202     energyEvaluator.run(s_min, mu_tot, vir, pres, -1, TRUE);
1203 
1204     if (MASTER(cr))
1205     {
1206         /* Copy stuff to the energy bin for easy printing etc. */
1207         matrix nullBox = {};
1208         energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
1209                                          enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0,
1210                                          nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
1211 
1212         EnergyOutput::printHeader(fplog, step, step);
1213         energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step,
1214                                            step, fr->fcdata.get(), nullptr);
1215     }
1216 
1217     /* Estimate/guess the initial stepsize */
1218     stepsize = inputrec->em_stepsize / s_min->fnorm;
1219 
1220     if (MASTER(cr))
1221     {
1222         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1223         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
1224         fprintf(stderr, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
1225         fprintf(stderr, "\n");
1226         /* and copy to the log file too... */
1227         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", s_min->fmax, s_min->a_fmax + 1);
1228         fprintf(fplog, "   F-Norm            = %12.5e\n", s_min->fnorm / sqrtNumAtoms);
1229         fprintf(fplog, "\n");
1230     }
1231     /* Start the loop over CG steps.
1232      * Each successful step is counted, and we continue until
1233      * we either converge or reach the max number of steps.
1234      */
1235     converged = FALSE;
1236     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1237     {
1238 
1239         /* start taking steps in a new direction
1240          * First time we enter the routine, beta=0, and the direction is
1241          * simply the negative gradient.
1242          */
1243 
1244         /* Calculate the new direction in p, and the gradient in this direction, gpa */
1245         gmx::ArrayRef<gmx::RVec>       pm  = s_min->s.cg_p;
1246         gmx::ArrayRef<const gmx::RVec> sfm = s_min->f.view().force();
1247         double                         gpa = 0;
1248         int                            gf  = 0;
1249         for (int i = 0; i < mdatoms->homenr; i++)
1250         {
1251             if (mdatoms->cFREEZE)
1252             {
1253                 gf = mdatoms->cFREEZE[i];
1254             }
1255             for (m = 0; m < DIM; m++)
1256             {
1257                 if (!inputrec->opts.nFreeze[gf][m])
1258                 {
1259                     pm[i][m] = sfm[i][m] + beta * pm[i][m];
1260                     gpa -= pm[i][m] * sfm[i][m];
1261                     /* f is negative gradient, thus the sign */
1262                 }
1263                 else
1264                 {
1265                     pm[i][m] = 0;
1266                 }
1267             }
1268         }
1269 
1270         /* Sum the gradient along the line across CPUs */
1271         if (PAR(cr))
1272         {
1273             gmx_sumd(1, &gpa, cr);
1274         }
1275 
1276         /* Calculate the norm of the search vector */
1277         get_f_norm_max(cr, &(inputrec->opts), mdatoms, pm, &pnorm, nullptr, nullptr);
1278 
1279         /* Just in case stepsize reaches zero due to numerical precision... */
1280         if (stepsize <= 0)
1281         {
1282             stepsize = inputrec->em_stepsize / pnorm;
1283         }
1284 
1285         /*
1286          * Double check the value of the derivative in the search direction.
1287          * If it is positive it must be due to the old information in the
1288          * CG formula, so just remove that and start over with beta=0.
1289          * This corresponds to a steepest descent step.
1290          */
1291         if (gpa > 0)
1292         {
1293             beta = 0;
1294             step--;   /* Don't count this step since we are restarting */
1295             continue; /* Go back to the beginning of the big for-loop */
1296         }
1297 
1298         /* Calculate minimum allowed stepsize, before the average (norm)
1299          * relative change in coordinate is smaller than precision
1300          */
1301         minstep      = 0;
1302         auto s_min_x = makeArrayRef(s_min->s.x);
1303         for (int i = 0; i < mdatoms->homenr; i++)
1304         {
1305             for (m = 0; m < DIM; m++)
1306             {
1307                 tmp = fabs(s_min_x[i][m]);
1308                 if (tmp < 1.0)
1309                 {
1310                     tmp = 1.0;
1311                 }
1312                 tmp = pm[i][m] / tmp;
1313                 minstep += tmp * tmp;
1314             }
1315         }
1316         /* Add up from all CPUs */
1317         if (PAR(cr))
1318         {
1319             gmx_sumd(1, &minstep, cr);
1320         }
1321 
1322         minstep = GMX_REAL_EPS / sqrt(minstep / (3 * top_global->natoms));
1323 
1324         if (stepsize < minstep)
1325         {
1326             converged = TRUE;
1327             break;
1328         }
1329 
1330         /* Write coordinates if necessary */
1331         do_x = do_per_step(step, inputrec->nstxout);
1332         do_f = do_per_step(step, inputrec->nstfout);
1333 
1334         write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, step, s_min,
1335                       state_global, observablesHistory);
1336 
1337         /* Take a step downhill.
1338          * In theory, we should minimize the function along this direction.
1339          * That is quite possible, but it turns out to take 5-10 function evaluations
1340          * for each line. However, we dont really need to find the exact minimum -
1341          * it is much better to start a new CG step in a modified direction as soon
1342          * as we are close to it. This will save a lot of energy evaluations.
1343          *
1344          * In practice, we just try to take a single step.
1345          * If it worked (i.e. lowered the energy), we increase the stepsize but
1346          * the continue straight to the next CG step without trying to find any minimum.
1347          * If it didn't work (higher energy), there must be a minimum somewhere between
1348          * the old position and the new one.
1349          *
1350          * Due to the finite numerical accuracy, it turns out that it is a good idea
1351          * to even accept a SMALL increase in energy, if the derivative is still downhill.
1352          * This leads to lower final energies in the tests I've done. / Erik
1353          */
1354         s_a->epot = s_min->epot;
1355         a         = 0.0;
1356         c         = a + stepsize; /* reference position along line is zero */
1357 
1358         if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1359         {
1360             em_dd_partition_system(fplog, mdlog, step, cr, top_global, inputrec, imdSession,
1361                                    pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
1362         }
1363 
1364         /* Take a trial step (new coords in s_c) */
1365         do_em_step(cr, inputrec, mdatoms, s_min, c, s_min->s.cg_p.constArrayRefWithPadding(), s_c,
1366                    constr, -1);
1367 
1368         neval++;
1369         /* Calculate energy for the trial step */
1370         energyEvaluator.run(s_c, mu_tot, vir, pres, -1, FALSE);
1371 
1372         /* Calc derivative along line */
1373         const rvec*                    pc  = s_c->s.cg_p.rvec_array();
1374         gmx::ArrayRef<const gmx::RVec> sfc = s_c->f.view().force();
1375         double                         gpc = 0;
1376         for (int i = 0; i < mdatoms->homenr; i++)
1377         {
1378             for (m = 0; m < DIM; m++)
1379             {
1380                 gpc -= pc[i][m] * sfc[i][m]; /* f is negative gradient, thus the sign */
1381             }
1382         }
1383         /* Sum the gradient along the line across CPUs */
1384         if (PAR(cr))
1385         {
1386             gmx_sumd(1, &gpc, cr);
1387         }
1388 
1389         /* This is the max amount of increase in energy we tolerate */
1390         tmp = std::sqrt(GMX_REAL_EPS) * fabs(s_a->epot);
1391 
1392         /* Accept the step if the energy is lower, or if it is not significantly higher
1393          * and the line derivative is still negative.
1394          */
1395         if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1396         {
1397             foundlower = TRUE;
1398             /* Great, we found a better energy. Increase step for next iteration
1399              * if we are still going down, decrease it otherwise
1400              */
1401             if (gpc < 0)
1402             {
1403                 stepsize *= 1.618034; /* The golden section */
1404             }
1405             else
1406             {
1407                 stepsize *= 0.618034; /* 1/golden section */
1408             }
1409         }
1410         else
1411         {
1412             /* New energy is the same or higher. We will have to do some work
1413              * to find a smaller value in the interval. Take smaller step next time!
1414              */
1415             foundlower = FALSE;
1416             stepsize *= 0.618034;
1417         }
1418 
1419 
1420         /* OK, if we didn't find a lower value we will have to locate one now - there must
1421          * be one in the interval [a=0,c].
1422          * The same thing is valid here, though: Don't spend dozens of iterations to find
1423          * the line minimum. We try to interpolate based on the derivative at the endpoints,
1424          * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1425          *
1426          * I also have a safeguard for potentially really pathological functions so we never
1427          * take more than 20 steps before we give up ...
1428          *
1429          * If we already found a lower value we just skip this step and continue to the update.
1430          */
1431         double gpb;
1432         if (!foundlower)
1433         {
1434             nminstep = 0;
1435 
1436             do
1437             {
1438                 /* Select a new trial point.
1439                  * If the derivatives at points a & c have different sign we interpolate to zero,
1440                  * otherwise just do a bisection.
1441                  */
1442                 if (gpa < 0 && gpc > 0)
1443                 {
1444                     b = a + gpa * (a - c) / (gpc - gpa);
1445                 }
1446                 else
1447                 {
1448                     b = 0.5 * (a + c);
1449                 }
1450 
1451                 /* safeguard if interpolation close to machine accuracy causes errors:
1452                  * never go outside the interval
1453                  */
1454                 if (b <= a || b >= c)
1455                 {
1456                     b = 0.5 * (a + c);
1457                 }
1458 
1459                 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1460                 {
1461                     /* Reload the old state */
1462                     em_dd_partition_system(fplog, mdlog, -1, cr, top_global, inputrec, imdSession, pull_work,
1463                                            s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
1464                 }
1465 
1466                 /* Take a trial step to this new point - new coords in s_b */
1467                 do_em_step(cr, inputrec, mdatoms, s_min, b,
1468                            s_min->s.cg_p.constArrayRefWithPadding(), s_b, constr, -1);
1469 
1470                 neval++;
1471                 /* Calculate energy for the trial step */
1472                 energyEvaluator.run(s_b, mu_tot, vir, pres, -1, FALSE);
1473 
1474                 /* p does not change within a step, but since the domain decomposition
1475                  * might change, we have to use cg_p of s_b here.
1476                  */
1477                 const rvec*                    pb  = s_b->s.cg_p.rvec_array();
1478                 gmx::ArrayRef<const gmx::RVec> sfb = s_b->f.view().force();
1479                 gpb                                = 0;
1480                 for (int i = 0; i < mdatoms->homenr; i++)
1481                 {
1482                     for (m = 0; m < DIM; m++)
1483                     {
1484                         gpb -= pb[i][m] * sfb[i][m]; /* f is negative gradient, thus the sign */
1485                     }
1486                 }
1487                 /* Sum the gradient along the line across CPUs */
1488                 if (PAR(cr))
1489                 {
1490                     gmx_sumd(1, &gpb, cr);
1491                 }
1492 
1493                 if (debug)
1494                 {
1495                     fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n", s_a->epot, s_b->epot,
1496                             s_c->epot, gpb);
1497                 }
1498 
1499                 epot_repl = s_b->epot;
1500 
1501                 /* Keep one of the intervals based on the value of the derivative at the new point */
1502                 if (gpb > 0)
1503                 {
1504                     /* Replace c endpoint with b */
1505                     swap_em_state(&s_b, &s_c);
1506                     c   = b;
1507                     gpc = gpb;
1508                 }
1509                 else
1510                 {
1511                     /* Replace a endpoint with b */
1512                     swap_em_state(&s_b, &s_a);
1513                     a   = b;
1514                     gpa = gpb;
1515                 }
1516 
1517                 /*
1518                  * Stop search as soon as we find a value smaller than the endpoints.
1519                  * Never run more than 20 steps, no matter what.
1520                  */
1521                 nminstep++;
1522             } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) && (nminstep < 20));
1523 
1524             if (std::fabs(epot_repl - s_min->epot) < fabs(s_min->epot) * GMX_REAL_EPS || nminstep >= 20)
1525             {
1526                 /* OK. We couldn't find a significantly lower energy.
1527                  * If beta==0 this was steepest descent, and then we give up.
1528                  * If not, set beta=0 and restart with steepest descent before quitting.
1529                  */
1530                 if (beta == 0.0)
1531                 {
1532                     /* Converged */
1533                     converged = TRUE;
1534                     break;
1535                 }
1536                 else
1537                 {
1538                     /* Reset memory before giving up */
1539                     beta = 0.0;
1540                     continue;
1541                 }
1542             }
1543 
1544             /* Select min energy state of A & C, put the best in B.
1545              */
1546             if (s_c->epot < s_a->epot)
1547             {
1548                 if (debug)
1549                 {
1550                     fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n", s_c->epot,
1551                             s_a->epot);
1552                 }
1553                 swap_em_state(&s_b, &s_c);
1554                 gpb = gpc;
1555             }
1556             else
1557             {
1558                 if (debug)
1559                 {
1560                     fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n", s_a->epot,
1561                             s_c->epot);
1562                 }
1563                 swap_em_state(&s_b, &s_a);
1564                 gpb = gpa;
1565             }
1566         }
1567         else
1568         {
1569             if (debug)
1570             {
1571                 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n", s_c->epot);
1572             }
1573             swap_em_state(&s_b, &s_c);
1574             gpb = gpc;
1575         }
1576 
1577         /* new search direction */
1578         /* beta = 0 means forget all memory and restart with steepest descents. */
1579         if (nstcg && ((step % nstcg) == 0))
1580         {
1581             beta = 0.0;
1582         }
1583         else
1584         {
1585             /* s_min->fnorm cannot be zero, because then we would have converged
1586              * and broken out.
1587              */
1588 
1589             /* Polak-Ribiere update.
1590              * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1591              */
1592             beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1593         }
1594         /* Limit beta to prevent oscillations */
1595         if (fabs(beta) > 5.0)
1596         {
1597             beta = 0.0;
1598         }
1599 
1600 
1601         /* update positions */
1602         swap_em_state(&s_min, &s_b);
1603         gpa = gpb;
1604 
1605         /* Print it if necessary */
1606         if (MASTER(cr))
1607         {
1608             if (mdrunOptions.verbose)
1609             {
1610                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1611                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step,
1612                         s_min->epot, s_min->fnorm / sqrtNumAtoms, s_min->fmax, s_min->a_fmax + 1);
1613                 fflush(stderr);
1614             }
1615             /* Store the new (lower) energies */
1616             matrix nullBox = {};
1617             energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
1618                                              enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0,
1619                                              nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
1620 
1621             do_log = do_per_step(step, inputrec->nstlog);
1622             do_ene = do_per_step(step, inputrec->nstenergy);
1623 
1624             imdSession->fillEnergyRecord(step, TRUE);
1625 
1626             if (do_log)
1627             {
1628                 EnergyOutput::printHeader(fplog, step, step);
1629             }
1630             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
1631                                                do_log ? fplog : nullptr, step, step,
1632                                                fr->fcdata.get(), nullptr);
1633         }
1634 
1635         /* Send energies and positions to the IMD client if bIMD is TRUE. */
1636         if (MASTER(cr) && imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0))
1637         {
1638             imdSession->sendPositionsAndEnergies();
1639         }
1640 
1641         /* Stop when the maximum force lies below tolerance.
1642          * If we have reached machine precision, converged is already set to true.
1643          */
1644         converged = converged || (s_min->fmax < inputrec->em_tol);
1645 
1646     } /* End of the loop */
1647 
1648     if (converged)
1649     {
1650         step--; /* we never took that last step in this case */
1651     }
1652     if (s_min->fmax > inputrec->em_tol)
1653     {
1654         if (MASTER(cr))
1655         {
1656             warn_step(fplog, inputrec->em_tol, s_min->fmax, step - 1 == number_steps, FALSE);
1657         }
1658         converged = FALSE;
1659     }
1660 
1661     if (MASTER(cr))
1662     {
1663         /* If we printed energy and/or logfile last step (which was the last step)
1664          * we don't have to do it again, but otherwise print the final values.
1665          */
1666         if (!do_log)
1667         {
1668             /* Write final value to log since we didn't do anything the last step */
1669             EnergyOutput::printHeader(fplog, step, step);
1670         }
1671         if (!do_ene || !do_log)
1672         {
1673             /* Write final energy file entries */
1674             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
1675                                                !do_log ? fplog : nullptr, step, step,
1676                                                fr->fcdata.get(), nullptr);
1677         }
1678     }
1679 
1680     /* Print some stuff... */
1681     if (MASTER(cr))
1682     {
1683         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1684     }
1685 
1686     /* IMPORTANT!
1687      * For accurate normal mode calculation it is imperative that we
1688      * store the last conformation into the full precision binary trajectory.
1689      *
1690      * However, we should only do it if we did NOT already write this step
1691      * above (which we did if do_x or do_f was true).
1692      */
1693     /* Note that with 0 < nstfout != nstxout we can end up with two frames
1694      * in the trajectory with the same step number.
1695      */
1696     do_x = !do_per_step(step, inputrec->nstxout);
1697     do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1698 
1699     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec,
1700                   step, s_min, state_global, observablesHistory);
1701 
1702 
1703     if (MASTER(cr))
1704     {
1705         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1706         print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
1707         print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps, s_min, sqrtNumAtoms);
1708 
1709         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1710     }
1711 
1712     finish_em(cr, outf, walltime_accounting, wcycle);
1713 
1714     /* To print the actual number of steps we needed somewhere */
1715     walltime_accounting_set_nsteps_done(walltime_accounting, step);
1716 }
1717 
1718 
do_lbfgs()1719 void LegacySimulator::do_lbfgs()
1720 {
1721     static const char* LBFGS = "Low-Memory BFGS Minimizer";
1722     em_state_t         ems;
1723     gmx_localtop_t     top(top_global->ffparams);
1724     gmx_global_stat_t  gstat;
1725     int                ncorr, nmaxcorr, point, cp, neval, nminstep;
1726     double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
1727     real *             rho, *alpha, *p, *s, **dx, **dg;
1728     real               a, b, c, maxdelta, delta;
1729     real               diag, Epot0;
1730     real               dgdx, dgdg, sq, yr, beta;
1731     gmx_bool           converged;
1732     rvec               mu_tot = { 0 };
1733     gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
1734     tensor             vir, pres;
1735     int                start, end, number_steps;
1736     int                i, k, m, n, gf, step;
1737     int                mdof_flags;
1738     auto               mdatoms = mdAtoms->mdatoms();
1739 
1740     GMX_LOG(mdlog.info)
1741             .asParagraph()
1742             .appendText(
1743                     "Note that activating L-BFGS energy minimization via the "
1744                     "integrator .mdp option and the command gmx mdrun may "
1745                     "be available in a different form in a future version of GROMACS, "
1746                     "e.g. gmx minimize and an .mdp option.");
1747 
1748     if (PAR(cr))
1749     {
1750         gmx_fatal(FARGS, "L-BFGS minimization only supports a single rank");
1751     }
1752 
1753     if (nullptr != constr)
1754     {
1755         gmx_fatal(
1756                 FARGS,
1757                 "The combination of constraints and L-BFGS minimization is not implemented. Either "
1758                 "do not use constraints, or use another minimizer (e.g. steepest descent).");
1759     }
1760 
1761     n        = 3 * state_global->natoms;
1762     nmaxcorr = inputrec->nbfgscorr;
1763 
1764     snew(frozen, n);
1765 
1766     snew(p, n);
1767     snew(rho, nmaxcorr);
1768     snew(alpha, nmaxcorr);
1769 
1770     snew(dx, nmaxcorr);
1771     for (i = 0; i < nmaxcorr; i++)
1772     {
1773         snew(dx[i], n);
1774     }
1775 
1776     snew(dg, nmaxcorr);
1777     for (i = 0; i < nmaxcorr; i++)
1778     {
1779         snew(dg[i], n);
1780     }
1781 
1782     step  = 0;
1783     neval = 0;
1784 
1785     /* Init em */
1786     init_em(fplog, mdlog, LBFGS, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global,
1787             &ems, &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr);
1788     const bool        simulationsShareState = false;
1789     gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
1790                                    mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
1791                                    StartingBehavior::NewSimulation, simulationsShareState, ms);
1792     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work,
1793                                    nullptr, false, StartingBehavior::NewSimulation,
1794                                    simulationsShareState, mdModulesNotifier);
1795 
1796     start = 0;
1797     end   = mdatoms->homenr;
1798 
1799     /* We need 4 working states */
1800     em_state_t  s0{}, s1{}, s2{}, s3{};
1801     em_state_t* sa   = &s0;
1802     em_state_t* sb   = &s1;
1803     em_state_t* sc   = &s2;
1804     em_state_t* last = &s3;
1805     /* Initialize by copying the state from ems (we could skip x and f here) */
1806     *sa = ems;
1807     *sb = ems;
1808     *sc = ems;
1809 
1810     /* Print to log file */
1811     print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
1812 
1813     do_log = do_ene = do_x = do_f = TRUE;
1814 
1815     /* Max number of steps */
1816     number_steps = inputrec->nsteps;
1817 
1818     /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1819     gf = 0;
1820     for (i = start; i < end; i++)
1821     {
1822         if (mdatoms->cFREEZE)
1823         {
1824             gf = mdatoms->cFREEZE[i];
1825         }
1826         for (m = 0; m < DIM; m++)
1827         {
1828             frozen[3 * i + m] = (inputrec->opts.nFreeze[gf][m] != 0);
1829         }
1830     }
1831     if (MASTER(cr))
1832     {
1833         sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1834     }
1835     if (fplog)
1836     {
1837         sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1838     }
1839 
1840     if (vsite)
1841     {
1842         vsite->construct(state_global->x, 1, {}, state_global->box);
1843     }
1844 
1845     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1846     /* do_force always puts the charge groups in the box and shifts again
1847      * We do not unshift, so molecules are always whole
1848      */
1849     neval++;
1850     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
1851                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
1852                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
1853     energyEvaluator.run(&ems, mu_tot, vir, pres, -1, TRUE);
1854 
1855     if (MASTER(cr))
1856     {
1857         /* Copy stuff to the energy bin for easy printing etc. */
1858         matrix nullBox = {};
1859         energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
1860                                          enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0,
1861                                          nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
1862 
1863         EnergyOutput::printHeader(fplog, step, step);
1864         energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step,
1865                                            step, fr->fcdata.get(), nullptr);
1866     }
1867 
1868     /* Set the initial step.
1869      * since it will be multiplied by the non-normalized search direction
1870      * vector (force vector the first time), we scale it by the
1871      * norm of the force.
1872      */
1873 
1874     if (MASTER(cr))
1875     {
1876         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1877         fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1878         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
1879         fprintf(stderr, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
1880         fprintf(stderr, "\n");
1881         /* and copy to the log file too... */
1882         fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1883         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", ems.fmax, ems.a_fmax + 1);
1884         fprintf(fplog, "   F-Norm            = %12.5e\n", ems.fnorm / sqrtNumAtoms);
1885         fprintf(fplog, "\n");
1886     }
1887 
1888     // Point is an index to the memory of search directions, where 0 is the first one.
1889     point = 0;
1890 
1891     // Set initial search direction to the force (-gradient), or 0 for frozen particles.
1892     real* fInit = static_cast<real*>(ems.f.view().force().data()[0]);
1893     for (i = 0; i < n; i++)
1894     {
1895         if (!frozen[i])
1896         {
1897             dx[point][i] = fInit[i]; /* Initial search direction */
1898         }
1899         else
1900         {
1901             dx[point][i] = 0;
1902         }
1903     }
1904 
1905     // Stepsize will be modified during the search, and actually it is not critical
1906     // (the main efficiency in the algorithm comes from changing directions), but
1907     // we still need an initial value, so estimate it as the inverse of the norm
1908     // so we take small steps where the potential fluctuates a lot.
1909     stepsize = 1.0 / ems.fnorm;
1910 
1911     /* Start the loop over BFGS steps.
1912      * Each successful step is counted, and we continue until
1913      * we either converge or reach the max number of steps.
1914      */
1915 
1916     ncorr = 0;
1917 
1918     /* Set the gradient from the force */
1919     converged = FALSE;
1920     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1921     {
1922 
1923         /* Write coordinates if necessary */
1924         do_x = do_per_step(step, inputrec->nstxout);
1925         do_f = do_per_step(step, inputrec->nstfout);
1926 
1927         mdof_flags = 0;
1928         if (do_x)
1929         {
1930             mdof_flags |= MDOF_X;
1931         }
1932 
1933         if (do_f)
1934         {
1935             mdof_flags |= MDOF_F;
1936         }
1937 
1938         if (inputrec->bIMD)
1939         {
1940             mdof_flags |= MDOF_IMD;
1941         }
1942 
1943         gmx::WriteCheckpointDataHolder checkpointDataHolder;
1944         mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags, top_global->natoms, step,
1945                                          static_cast<real>(step), &ems.s, state_global, observablesHistory,
1946                                          ems.f.view().force(), &checkpointDataHolder);
1947 
1948         /* Do the linesearching in the direction dx[point][0..(n-1)] */
1949 
1950         /* make s a pointer to current search direction - point=0 first time we get here */
1951         s = dx[point];
1952 
1953         real* xx = static_cast<real*>(ems.s.x.rvec_array()[0]);
1954         real* ff = static_cast<real*>(ems.f.view().force().data()[0]);
1955 
1956         // calculate line gradient in position A
1957         for (gpa = 0, i = 0; i < n; i++)
1958         {
1959             gpa -= s[i] * ff[i];
1960         }
1961 
1962         /* Calculate minimum allowed stepsize along the line, before the average (norm)
1963          * relative change in coordinate is smaller than precision
1964          */
1965         for (minstep = 0, i = 0; i < n; i++)
1966         {
1967             tmp = fabs(xx[i]);
1968             if (tmp < 1.0)
1969             {
1970                 tmp = 1.0;
1971             }
1972             tmp = s[i] / tmp;
1973             minstep += tmp * tmp;
1974         }
1975         minstep = GMX_REAL_EPS / sqrt(minstep / n);
1976 
1977         if (stepsize < minstep)
1978         {
1979             converged = TRUE;
1980             break;
1981         }
1982 
1983         // Before taking any steps along the line, store the old position
1984         *last       = ems;
1985         real* lastx = static_cast<real*>(last->s.x.data()[0]);
1986         real* lastf = static_cast<real*>(last->f.view().force().data()[0]);
1987         Epot0       = ems.epot;
1988 
1989         *sa = ems;
1990 
1991         /* Take a step downhill.
1992          * In theory, we should find the actual minimum of the function in this
1993          * direction, somewhere along the line.
1994          * That is quite possible, but it turns out to take 5-10 function evaluations
1995          * for each line. However, we dont really need to find the exact minimum -
1996          * it is much better to start a new BFGS step in a modified direction as soon
1997          * as we are close to it. This will save a lot of energy evaluations.
1998          *
1999          * In practice, we just try to take a single step.
2000          * If it worked (i.e. lowered the energy), we increase the stepsize but
2001          * continue straight to the next BFGS step without trying to find any minimum,
2002          * i.e. we change the search direction too. If the line was smooth, it is
2003          * likely we are in a smooth region, and then it makes sense to take longer
2004          * steps in the modified search direction too.
2005          *
2006          * If it didn't work (higher energy), there must be a minimum somewhere between
2007          * the old position and the new one. Then we need to start by finding a lower
2008          * value before we change search direction. Since the energy was apparently
2009          * quite rough, we need to decrease the step size.
2010          *
2011          * Due to the finite numerical accuracy, it turns out that it is a good idea
2012          * to accept a SMALL increase in energy, if the derivative is still downhill.
2013          * This leads to lower final energies in the tests I've done. / Erik
2014          */
2015 
2016         // State "A" is the first position along the line.
2017         // reference position along line is initially zero
2018         a = 0.0;
2019 
2020         // Check stepsize first. We do not allow displacements
2021         // larger than emstep.
2022         //
2023         do
2024         {
2025             // Pick a new position C by adding stepsize to A.
2026             c = a + stepsize;
2027 
2028             // Calculate what the largest change in any individual coordinate
2029             // would be (translation along line * gradient along line)
2030             maxdelta = 0;
2031             for (i = 0; i < n; i++)
2032             {
2033                 delta = c * s[i];
2034                 if (delta > maxdelta)
2035                 {
2036                     maxdelta = delta;
2037                 }
2038             }
2039             // If any displacement is larger than the stepsize limit, reduce the step
2040             if (maxdelta > inputrec->em_stepsize)
2041             {
2042                 stepsize *= 0.1;
2043             }
2044         } while (maxdelta > inputrec->em_stepsize);
2045 
2046         // Take a trial step and move the coordinate array xc[] to position C
2047         real* xc = static_cast<real*>(sc->s.x.rvec_array()[0]);
2048         for (i = 0; i < n; i++)
2049         {
2050             xc[i] = lastx[i] + c * s[i];
2051         }
2052 
2053         neval++;
2054         // Calculate energy for the trial step in position C
2055         energyEvaluator.run(sc, mu_tot, vir, pres, step, FALSE);
2056 
2057         // Calc line gradient in position C
2058         real* fc = static_cast<real*>(sc->f.view().force()[0]);
2059         for (gpc = 0, i = 0; i < n; i++)
2060         {
2061             gpc -= s[i] * fc[i]; /* f is negative gradient, thus the sign */
2062         }
2063         /* Sum the gradient along the line across CPUs */
2064         if (PAR(cr))
2065         {
2066             gmx_sumd(1, &gpc, cr);
2067         }
2068 
2069         // This is the max amount of increase in energy we tolerate.
2070         // By allowing VERY small changes (close to numerical precision) we
2071         // frequently find even better (lower) final energies.
2072         tmp = std::sqrt(GMX_REAL_EPS) * fabs(sa->epot);
2073 
2074         // Accept the step if the energy is lower in the new position C (compared to A),
2075         // or if it is not significantly higher and the line derivative is still negative.
2076         foundlower = sc->epot < sa->epot || (gpc < 0 && sc->epot < (sa->epot + tmp));
2077         // If true, great, we found a better energy. We no longer try to alter the
2078         // stepsize, but simply accept this new better position. The we select a new
2079         // search direction instead, which will be much more efficient than continuing
2080         // to take smaller steps along a line. Set fnorm based on the new C position,
2081         // which will be used to update the stepsize to 1/fnorm further down.
2082 
2083         // If false, the energy is NOT lower in point C, i.e. it will be the same
2084         // or higher than in point A. In this case it is pointless to move to point C,
2085         // so we will have to do more iterations along the same line to find a smaller
2086         // value in the interval [A=0.0,C].
2087         // Here, A is still 0.0, but that will change when we do a search in the interval
2088         // [0.0,C] below. That search we will do by interpolation or bisection rather
2089         // than with the stepsize, so no need to modify it. For the next search direction
2090         // it will be reset to 1/fnorm anyway.
2091 
2092         if (!foundlower)
2093         {
2094             // OK, if we didn't find a lower value we will have to locate one now - there must
2095             // be one in the interval [a,c].
2096             // The same thing is valid here, though: Don't spend dozens of iterations to find
2097             // the line minimum. We try to interpolate based on the derivative at the endpoints,
2098             // and only continue until we find a lower value. In most cases this means 1-2 iterations.
2099             // I also have a safeguard for potentially really pathological functions so we never
2100             // take more than 20 steps before we give up.
2101             // If we already found a lower value we just skip this step and continue to the update.
2102             real fnorm = 0;
2103             nminstep   = 0;
2104             do
2105             {
2106                 // Select a new trial point B in the interval [A,C].
2107                 // If the derivatives at points a & c have different sign we interpolate to zero,
2108                 // otherwise just do a bisection since there might be multiple minima/maxima
2109                 // inside the interval.
2110                 if (gpa < 0 && gpc > 0)
2111                 {
2112                     b = a + gpa * (a - c) / (gpc - gpa);
2113                 }
2114                 else
2115                 {
2116                     b = 0.5 * (a + c);
2117                 }
2118 
2119                 /* safeguard if interpolation close to machine accuracy causes errors:
2120                  * never go outside the interval
2121                  */
2122                 if (b <= a || b >= c)
2123                 {
2124                     b = 0.5 * (a + c);
2125                 }
2126 
2127                 // Take a trial step to point B
2128                 real* xb = static_cast<real*>(sb->s.x.rvec_array()[0]);
2129                 for (i = 0; i < n; i++)
2130                 {
2131                     xb[i] = lastx[i] + b * s[i];
2132                 }
2133 
2134                 neval++;
2135                 // Calculate energy for the trial step in point B
2136                 energyEvaluator.run(sb, mu_tot, vir, pres, step, FALSE);
2137                 fnorm = sb->fnorm;
2138 
2139                 // Calculate gradient in point B
2140                 real* fb = static_cast<real*>(sb->f.view().force()[0]);
2141                 for (gpb = 0, i = 0; i < n; i++)
2142                 {
2143                     gpb -= s[i] * fb[i]; /* f is negative gradient, thus the sign */
2144                 }
2145                 /* Sum the gradient along the line across CPUs */
2146                 if (PAR(cr))
2147                 {
2148                     gmx_sumd(1, &gpb, cr);
2149                 }
2150 
2151                 // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
2152                 // at the new point B, and rename the endpoints of this new interval A and C.
2153                 if (gpb > 0)
2154                 {
2155                     /* Replace c endpoint with b */
2156                     c = b;
2157                     /* copy state b to c */
2158                     *sc = *sb;
2159                 }
2160                 else
2161                 {
2162                     /* Replace a endpoint with b */
2163                     a = b;
2164                     /* copy state b to a */
2165                     *sa = *sb;
2166                 }
2167 
2168                 /*
2169                  * Stop search as soon as we find a value smaller than the endpoints,
2170                  * or if the tolerance is below machine precision.
2171                  * Never run more than 20 steps, no matter what.
2172                  */
2173                 nminstep++;
2174             } while ((sb->epot > sa->epot || sb->epot > sc->epot) && (nminstep < 20));
2175 
2176             if (std::fabs(sb->epot - Epot0) < GMX_REAL_EPS || nminstep >= 20)
2177             {
2178                 /* OK. We couldn't find a significantly lower energy.
2179                  * If ncorr==0 this was steepest descent, and then we give up.
2180                  * If not, reset memory to restart as steepest descent before quitting.
2181                  */
2182                 if (ncorr == 0)
2183                 {
2184                     /* Converged */
2185                     converged = TRUE;
2186                     break;
2187                 }
2188                 else
2189                 {
2190                     /* Reset memory */
2191                     ncorr = 0;
2192                     /* Search in gradient direction */
2193                     for (i = 0; i < n; i++)
2194                     {
2195                         dx[point][i] = ff[i];
2196                     }
2197                     /* Reset stepsize */
2198                     stepsize = 1.0 / fnorm;
2199                     continue;
2200                 }
2201             }
2202 
2203             /* Select min energy state of A & C, put the best in xx/ff/Epot
2204              */
2205             if (sc->epot < sa->epot)
2206             {
2207                 /* Use state C */
2208                 ems        = *sc;
2209                 step_taken = c;
2210             }
2211             else
2212             {
2213                 /* Use state A */
2214                 ems        = *sa;
2215                 step_taken = a;
2216             }
2217         }
2218         else
2219         {
2220             /* found lower */
2221             /* Use state C */
2222             ems        = *sc;
2223             step_taken = c;
2224         }
2225 
2226         /* Update the memory information, and calculate a new
2227          * approximation of the inverse hessian
2228          */
2229 
2230         /* Have new data in Epot, xx, ff */
2231         if (ncorr < nmaxcorr)
2232         {
2233             ncorr++;
2234         }
2235 
2236         for (i = 0; i < n; i++)
2237         {
2238             dg[point][i] = lastf[i] - ff[i];
2239             dx[point][i] *= step_taken;
2240         }
2241 
2242         dgdg = 0;
2243         dgdx = 0;
2244         for (i = 0; i < n; i++)
2245         {
2246             dgdg += dg[point][i] * dg[point][i];
2247             dgdx += dg[point][i] * dx[point][i];
2248         }
2249 
2250         diag = dgdx / dgdg;
2251 
2252         rho[point] = 1.0 / dgdx;
2253         point++;
2254 
2255         if (point >= nmaxcorr)
2256         {
2257             point = 0;
2258         }
2259 
2260         /* Update */
2261         for (i = 0; i < n; i++)
2262         {
2263             p[i] = ff[i];
2264         }
2265 
2266         cp = point;
2267 
2268         /* Recursive update. First go back over the memory points */
2269         for (k = 0; k < ncorr; k++)
2270         {
2271             cp--;
2272             if (cp < 0)
2273             {
2274                 cp = ncorr - 1;
2275             }
2276 
2277             sq = 0;
2278             for (i = 0; i < n; i++)
2279             {
2280                 sq += dx[cp][i] * p[i];
2281             }
2282 
2283             alpha[cp] = rho[cp] * sq;
2284 
2285             for (i = 0; i < n; i++)
2286             {
2287                 p[i] -= alpha[cp] * dg[cp][i];
2288             }
2289         }
2290 
2291         for (i = 0; i < n; i++)
2292         {
2293             p[i] *= diag;
2294         }
2295 
2296         /* And then go forward again */
2297         for (k = 0; k < ncorr; k++)
2298         {
2299             yr = 0;
2300             for (i = 0; i < n; i++)
2301             {
2302                 yr += p[i] * dg[cp][i];
2303             }
2304 
2305             beta = rho[cp] * yr;
2306             beta = alpha[cp] - beta;
2307 
2308             for (i = 0; i < n; i++)
2309             {
2310                 p[i] += beta * dx[cp][i];
2311             }
2312 
2313             cp++;
2314             if (cp >= ncorr)
2315             {
2316                 cp = 0;
2317             }
2318         }
2319 
2320         for (i = 0; i < n; i++)
2321         {
2322             if (!frozen[i])
2323             {
2324                 dx[point][i] = p[i];
2325             }
2326             else
2327             {
2328                 dx[point][i] = 0;
2329             }
2330         }
2331 
2332         /* Print it if necessary */
2333         if (MASTER(cr))
2334         {
2335             if (mdrunOptions.verbose)
2336             {
2337                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2338                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n", step,
2339                         ems.epot, ems.fnorm / sqrtNumAtoms, ems.fmax, ems.a_fmax + 1);
2340                 fflush(stderr);
2341             }
2342             /* Store the new (lower) energies */
2343             matrix nullBox = {};
2344             energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(step), mdatoms->tmass,
2345                                              enerd, nullptr, nullptr, nullBox, PTCouplingArrays(), 0,
2346                                              nullptr, nullptr, vir, pres, nullptr, mu_tot, constr);
2347 
2348             do_log = do_per_step(step, inputrec->nstlog);
2349             do_ene = do_per_step(step, inputrec->nstenergy);
2350 
2351             imdSession->fillEnergyRecord(step, TRUE);
2352 
2353             if (do_log)
2354             {
2355                 EnergyOutput::printHeader(fplog, step, step);
2356             }
2357             energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
2358                                                do_log ? fplog : nullptr, step, step,
2359                                                fr->fcdata.get(), nullptr);
2360         }
2361 
2362         /* Send x and E to IMD client, if bIMD is TRUE. */
2363         if (imdSession->run(step, TRUE, state_global->box, state_global->x.rvec_array(), 0) && MASTER(cr))
2364         {
2365             imdSession->sendPositionsAndEnergies();
2366         }
2367 
2368         // Reset stepsize in we are doing more iterations
2369         stepsize = 1.0;
2370 
2371         /* Stop when the maximum force lies below tolerance.
2372          * If we have reached machine precision, converged is already set to true.
2373          */
2374         converged = converged || (ems.fmax < inputrec->em_tol);
2375 
2376     } /* End of the loop */
2377 
2378     if (converged)
2379     {
2380         step--; /* we never took that last step in this case */
2381     }
2382     if (ems.fmax > inputrec->em_tol)
2383     {
2384         if (MASTER(cr))
2385         {
2386             warn_step(fplog, inputrec->em_tol, ems.fmax, step - 1 == number_steps, FALSE);
2387         }
2388         converged = FALSE;
2389     }
2390 
2391     /* If we printed energy and/or logfile last step (which was the last step)
2392      * we don't have to do it again, but otherwise print the final values.
2393      */
2394     if (!do_log) /* Write final value to log since we didn't do anythin last step */
2395     {
2396         EnergyOutput::printHeader(fplog, step, step);
2397     }
2398     if (!do_ene || !do_log) /* Write final energy file entries */
2399     {
2400         energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
2401                                            !do_log ? fplog : nullptr, step, step, fr->fcdata.get(),
2402                                            nullptr);
2403     }
2404 
2405     /* Print some stuff... */
2406     if (MASTER(cr))
2407     {
2408         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2409     }
2410 
2411     /* IMPORTANT!
2412      * For accurate normal mode calculation it is imperative that we
2413      * store the last conformation into the full precision binary trajectory.
2414      *
2415      * However, we should only do it if we did NOT already write this step
2416      * above (which we did if do_x or do_f was true).
2417      */
2418     do_x = !do_per_step(step, inputrec->nstxout);
2419     do_f = !do_per_step(step, inputrec->nstfout);
2420     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm), top_global, inputrec,
2421                   step, &ems, state_global, observablesHistory);
2422 
2423     if (MASTER(cr))
2424     {
2425         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2426         print_converged(stderr, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
2427         print_converged(fplog, LBFGS, inputrec->em_tol, step, converged, number_steps, &ems, sqrtNumAtoms);
2428 
2429         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2430     }
2431 
2432     finish_em(cr, outf, walltime_accounting, wcycle);
2433 
2434     /* To print the actual number of steps we needed somewhere */
2435     walltime_accounting_set_nsteps_done(walltime_accounting, step);
2436 }
2437 
do_steep()2438 void LegacySimulator::do_steep()
2439 {
2440     const char*       SD = "Steepest Descents";
2441     gmx_localtop_t    top(top_global->ffparams);
2442     gmx_global_stat_t gstat;
2443     real              stepsize;
2444     real              ustep;
2445     gmx_bool          bDone, bAbort, do_x, do_f;
2446     tensor            vir, pres;
2447     rvec              mu_tot = { 0 };
2448     int               nsteps;
2449     int               count          = 0;
2450     int               steps_accepted = 0;
2451     auto              mdatoms        = mdAtoms->mdatoms();
2452 
2453     GMX_LOG(mdlog.info)
2454             .asParagraph()
2455             .appendText(
2456                     "Note that activating steepest-descent energy minimization via the "
2457                     "integrator .mdp option and the command gmx mdrun may "
2458                     "be available in a different form in a future version of GROMACS, "
2459                     "e.g. gmx minimize and an .mdp option.");
2460 
2461     /* Create 2 states on the stack and extract pointers that we will swap */
2462     em_state_t  s0{}, s1{};
2463     em_state_t* s_min = &s0;
2464     em_state_t* s_try = &s1;
2465 
2466     /* Init em and store the local state in s_try */
2467     init_em(fplog, mdlog, SD, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global, s_try,
2468             &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, nullptr);
2469     const bool        simulationsShareState = false;
2470     gmx_mdoutf*       outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
2471                                    mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
2472                                    StartingBehavior::NewSimulation, simulationsShareState, ms);
2473     gmx::EnergyOutput energyOutput(mdoutf_get_fp_ene(outf), top_global, inputrec, pull_work,
2474                                    nullptr, false, StartingBehavior::NewSimulation,
2475                                    simulationsShareState, mdModulesNotifier);
2476 
2477     /* Print to log file  */
2478     print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
2479 
2480     /* Set variables for stepsize (in nm). This is the largest
2481      * step that we are going to make in any direction.
2482      */
2483     ustep    = inputrec->em_stepsize;
2484     stepsize = 0;
2485 
2486     /* Max number of steps  */
2487     nsteps = inputrec->nsteps;
2488 
2489     if (MASTER(cr))
2490     {
2491         /* Print to the screen  */
2492         sp_header(stderr, SD, inputrec->em_tol, nsteps);
2493     }
2494     if (fplog)
2495     {
2496         sp_header(fplog, SD, inputrec->em_tol, nsteps);
2497     }
2498     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
2499                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
2500                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
2501 
2502     /**** HERE STARTS THE LOOP ****
2503      * count is the counter for the number of steps
2504      * bDone will be TRUE when the minimization has converged
2505      * bAbort will be TRUE when nsteps steps have been performed or when
2506      * the stepsize becomes smaller than is reasonable for machine precision
2507      */
2508     count  = 0;
2509     bDone  = FALSE;
2510     bAbort = FALSE;
2511     while (!bDone && !bAbort)
2512     {
2513         bAbort = (nsteps >= 0) && (count == nsteps);
2514 
2515         /* set new coordinates, except for first step */
2516         bool validStep = true;
2517         if (count > 0)
2518         {
2519             validStep = do_em_step(cr, inputrec, mdatoms, s_min, stepsize,
2520                                    s_min->f.view().forceWithPadding(), s_try, constr, count);
2521         }
2522 
2523         if (validStep)
2524         {
2525             energyEvaluator.run(s_try, mu_tot, vir, pres, count, count == 0);
2526         }
2527         else
2528         {
2529             // Signal constraint error during stepping with energy=inf
2530             s_try->epot = std::numeric_limits<real>::infinity();
2531         }
2532 
2533         if (MASTER(cr))
2534         {
2535             EnergyOutput::printHeader(fplog, count, count);
2536         }
2537 
2538         if (count == 0)
2539         {
2540             s_min->epot = s_try->epot;
2541         }
2542 
2543         /* Print it if necessary  */
2544         if (MASTER(cr))
2545         {
2546             if (mdrunOptions.verbose)
2547             {
2548                 fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2549                         count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax + 1,
2550                         ((count == 0) || (s_try->epot < s_min->epot)) ? '\n' : '\r');
2551                 fflush(stderr);
2552             }
2553 
2554             if ((count == 0) || (s_try->epot < s_min->epot))
2555             {
2556                 /* Store the new (lower) energies  */
2557                 matrix nullBox = {};
2558                 energyOutput.addDataAtEnergyStep(false, false, static_cast<double>(count),
2559                                                  mdatoms->tmass, enerd, nullptr, nullptr, nullBox,
2560                                                  PTCouplingArrays(), 0, nullptr, nullptr, vir, pres,
2561                                                  nullptr, mu_tot, constr);
2562 
2563                 imdSession->fillEnergyRecord(count, TRUE);
2564 
2565                 const bool do_dr = do_per_step(steps_accepted, inputrec->nstdisreout);
2566                 const bool do_or = do_per_step(steps_accepted, inputrec->nstorireout);
2567                 energyOutput.printStepToEnergyFile(mdoutf_get_fp_ene(outf), TRUE, do_dr, do_or,
2568                                                    fplog, count, count, fr->fcdata.get(), nullptr);
2569                 fflush(fplog);
2570             }
2571         }
2572 
2573         /* Now if the new energy is smaller than the previous...
2574          * or if this is the first step!
2575          * or if we did random steps!
2576          */
2577 
2578         if ((count == 0) || (s_try->epot < s_min->epot))
2579         {
2580             steps_accepted++;
2581 
2582             /* Test whether the convergence criterion is met...  */
2583             bDone = (s_try->fmax < inputrec->em_tol);
2584 
2585             /* Copy the arrays for force, positions and energy  */
2586             /* The 'Min' array always holds the coords and forces of the minimal
2587                sampled energy  */
2588             swap_em_state(&s_min, &s_try);
2589             if (count > 0)
2590             {
2591                 ustep *= 1.2;
2592             }
2593 
2594             /* Write to trn, if necessary */
2595             do_x = do_per_step(steps_accepted, inputrec->nstxout);
2596             do_f = do_per_step(steps_accepted, inputrec->nstfout);
2597             write_em_traj(fplog, cr, outf, do_x, do_f, nullptr, top_global, inputrec, count, s_min,
2598                           state_global, observablesHistory);
2599         }
2600         else
2601         {
2602             /* If energy is not smaller make the step smaller...  */
2603             ustep *= 0.5;
2604 
2605             if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2606             {
2607                 /* Reload the old state */
2608                 em_dd_partition_system(fplog, mdlog, count, cr, top_global, inputrec, imdSession,
2609                                        pull_work, s_min, &top, mdAtoms, fr, vsite, constr, nrnb, wcycle);
2610             }
2611         }
2612 
2613         // If the force is very small after finishing minimization,
2614         // we risk dividing by zero when calculating the step size.
2615         // So we check first if the minimization has stopped before
2616         // trying to obtain a new step size.
2617         if (!bDone)
2618         {
2619             /* Determine new step  */
2620             stepsize = ustep / s_min->fmax;
2621         }
2622 
2623         /* Check if stepsize is too small, with 1 nm as a characteristic length */
2624 #if GMX_DOUBLE
2625         if (count == nsteps || ustep < 1e-12)
2626 #else
2627         if (count == nsteps || ustep < 1e-6)
2628 #endif
2629         {
2630             if (MASTER(cr))
2631             {
2632                 warn_step(fplog, inputrec->em_tol, s_min->fmax, count == nsteps, constr != nullptr);
2633             }
2634             bAbort = TRUE;
2635         }
2636 
2637         /* Send IMD energies and positions, if bIMD is TRUE. */
2638         if (imdSession->run(count, TRUE, MASTER(cr) ? state_global->box : nullptr,
2639                             MASTER(cr) ? state_global->x.rvec_array() : nullptr, 0)
2640             && MASTER(cr))
2641         {
2642             imdSession->sendPositionsAndEnergies();
2643         }
2644 
2645         count++;
2646     } /* End of the loop  */
2647 
2648     /* Print some data...  */
2649     if (MASTER(cr))
2650     {
2651         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2652     }
2653     write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout != 0, ftp2fn(efSTO, nfile, fnm),
2654                   top_global, inputrec, count, s_min, state_global, observablesHistory);
2655 
2656     if (MASTER(cr))
2657     {
2658         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2659 
2660         print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
2661         print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps, s_min, sqrtNumAtoms);
2662     }
2663 
2664     finish_em(cr, outf, walltime_accounting, wcycle);
2665 
2666     /* To print the actual number of steps we needed somewhere */
2667     inputrec->nsteps = count;
2668 
2669     walltime_accounting_set_nsteps_done(walltime_accounting, count);
2670 }
2671 
do_nm()2672 void LegacySimulator::do_nm()
2673 {
2674     const char*         NM = "Normal Mode Analysis";
2675     int                 nnodes;
2676     gmx_localtop_t      top(top_global->ffparams);
2677     gmx_global_stat_t   gstat;
2678     tensor              vir, pres;
2679     rvec                mu_tot = { 0 };
2680     rvec*               dfdx;
2681     gmx_bool            bSparse; /* use sparse matrix storage format */
2682     size_t              sz;
2683     gmx_sparsematrix_t* sparse_matrix = nullptr;
2684     real*               full_matrix   = nullptr;
2685 
2686     /* added with respect to mdrun */
2687     int  row, col;
2688     real der_range = 10.0 * std::sqrt(GMX_REAL_EPS);
2689     real x_min;
2690     bool bIsMaster = MASTER(cr);
2691     auto mdatoms   = mdAtoms->mdatoms();
2692 
2693     GMX_LOG(mdlog.info)
2694             .asParagraph()
2695             .appendText(
2696                     "Note that activating normal-mode analysis via the integrator "
2697                     ".mdp option and the command gmx mdrun may "
2698                     "be available in a different form in a future version of GROMACS, "
2699                     "e.g. gmx normal-modes.");
2700 
2701     if (constr != nullptr)
2702     {
2703         gmx_fatal(
2704                 FARGS,
2705                 "Constraints present with Normal Mode Analysis, this combination is not supported");
2706     }
2707 
2708     gmx_shellfc_t* shellfc;
2709 
2710     em_state_t state_work{};
2711 
2712     /* Init em and store the local state in state_minimum */
2713     init_em(fplog, mdlog, NM, cr, ms /*PLUMED*/, inputrec, imdSession, pull_work, state_global, top_global,
2714             &state_work, &top, nrnb, fr, mdAtoms, &gstat, vsite, constr, &shellfc);
2715     const bool  simulationsShareState = false;
2716     gmx_mdoutf* outf = init_mdoutf(fplog, nfile, fnm, mdrunOptions, cr, outputProvider,
2717                                    mdModulesNotifier, inputrec, top_global, nullptr, wcycle,
2718                                    StartingBehavior::NewSimulation, simulationsShareState, ms);
2719 
2720     std::vector<int>       atom_index = get_atom_index(top_global);
2721     std::vector<gmx::RVec> fneg(atom_index.size(), { 0, 0, 0 });
2722     snew(dfdx, atom_index.size());
2723 
2724 #if !GMX_DOUBLE
2725     if (bIsMaster)
2726     {
2727         fprintf(stderr,
2728                 "NOTE: This version of GROMACS has been compiled in single precision,\n"
2729                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2730                 "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
2731                 "      are fairly modest even if you recompile in double precision.\n\n");
2732     }
2733 #endif
2734 
2735     /* Check if we can/should use sparse storage format.
2736      *
2737      * Sparse format is only useful when the Hessian itself is sparse, which it
2738      * will be when we use a cutoff.
2739      * For small systems (n<1000) it is easier to always use full matrix format, though.
2740      */
2741     if (EEL_FULL(fr->ic->eeltype) || fr->rlist == 0.0)
2742     {
2743         GMX_LOG(mdlog.warning)
2744                 .appendText("Non-cutoff electrostatics used, forcing full Hessian format.");
2745         bSparse = FALSE;
2746     }
2747     else if (atom_index.size() < 1000)
2748     {
2749         GMX_LOG(mdlog.warning)
2750                 .appendTextFormatted("Small system size (N=%zu), using full Hessian format.",
2751                                      atom_index.size());
2752         bSparse = FALSE;
2753     }
2754     else
2755     {
2756         GMX_LOG(mdlog.warning).appendText("Using compressed symmetric sparse Hessian format.");
2757         bSparse = TRUE;
2758     }
2759 
2760     /* Number of dimensions, based on real atoms, that is not vsites or shell */
2761     sz = DIM * atom_index.size();
2762 
2763     fprintf(stderr, "Allocating Hessian memory...\n\n");
2764 
2765     if (bSparse)
2766     {
2767         sparse_matrix                       = gmx_sparsematrix_init(sz);
2768         sparse_matrix->compressed_symmetric = TRUE;
2769     }
2770     else
2771     {
2772         snew(full_matrix, sz * sz);
2773     }
2774 
2775     /* Write start time and temperature */
2776     print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
2777 
2778     /* fudge nr of steps to nr of atoms */
2779     inputrec->nsteps = atom_index.size() * 2;
2780 
2781     if (bIsMaster)
2782     {
2783         fprintf(stderr, "starting normal mode calculation '%s'\n%" PRId64 " steps.\n\n",
2784                 *(top_global->name), inputrec->nsteps);
2785     }
2786 
2787     nnodes = cr->nnodes;
2788 
2789     /* Make evaluate_energy do a single node force calculation */
2790     cr->nnodes = 1;
2791     EnergyEvaluator energyEvaluator{ fplog,    mdlog,      cr,        ms,   top_global,      &top,
2792                                      inputrec, imdSession, pull_work, nrnb, wcycle,          gstat,
2793                                      vsite,    constr,     mdAtoms,   fr,   runScheduleWork, enerd };
2794     energyEvaluator.run(&state_work, mu_tot, vir, pres, -1, TRUE);
2795     cr->nnodes = nnodes;
2796 
2797     /* if forces are not small, warn user */
2798     get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, &state_work);
2799 
2800     GMX_LOG(mdlog.warning).appendTextFormatted("Maximum force:%12.5e", state_work.fmax);
2801     if (state_work.fmax > 1.0e-3)
2802     {
2803         GMX_LOG(mdlog.warning)
2804                 .appendText(
2805                         "The force is probably not small enough to "
2806                         "ensure that you are at a minimum.\n"
2807                         "Be aware that negative eigenvalues may occur\n"
2808                         "when the resulting matrix is diagonalized.");
2809     }
2810 
2811     /***********************************************************
2812      *
2813      *      Loop over all pairs in matrix
2814      *
2815      *      do_force called twice. Once with positive and
2816      *      once with negative displacement
2817      *
2818      ************************************************************/
2819 
2820     /* Steps are divided one by one over the nodes */
2821     bool bNS          = true;
2822     auto state_work_x = makeArrayRef(state_work.s.x);
2823     auto state_work_f = state_work.f.view().force();
2824     for (index aid = cr->nodeid; aid < ssize(atom_index); aid += nnodes)
2825     {
2826         size_t atom = atom_index[aid];
2827         for (size_t d = 0; d < DIM; d++)
2828         {
2829             int64_t step        = 0;
2830             int     force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
2831             double  t           = 0;
2832 
2833             x_min = state_work_x[atom][d];
2834 
2835             for (unsigned int dx = 0; (dx < 2); dx++)
2836             {
2837                 if (dx == 0)
2838                 {
2839                     state_work_x[atom][d] = x_min - der_range;
2840                 }
2841                 else
2842                 {
2843                     state_work_x[atom][d] = x_min + der_range;
2844                 }
2845 
2846                 /* Make evaluate_energy do a single node force calculation */
2847                 cr->nnodes = 1;
2848                 if (shellfc)
2849                 {
2850                     /* Now is the time to relax the shells */
2851                     relax_shell_flexcon(fplog, cr, ms, mdrunOptions.verbose, nullptr, step, inputrec,
2852                                         imdSession, pull_work, bNS, force_flags, &top, constr, enerd,
2853                                         state_work.s.natoms, state_work.s.x.arrayRefWithPadding(),
2854                                         state_work.s.v.arrayRefWithPadding(), state_work.s.box,
2855                                         state_work.s.lambda, &state_work.s.hist, &state_work.f.view(),
2856                                         vir, mdatoms, nrnb, wcycle, shellfc, fr, runScheduleWork, t,
2857                                         mu_tot, vsite, DDBalanceRegionHandler(nullptr));
2858                     bNS = false;
2859                     step++;
2860                 }
2861                 else
2862                 {
2863                     energyEvaluator.run(&state_work, mu_tot, vir, pres, aid * 2 + dx, FALSE);
2864                 }
2865 
2866                 cr->nnodes = nnodes;
2867 
2868                 if (dx == 0)
2869                 {
2870                     std::copy(state_work_f.begin(), state_work_f.begin() + atom_index.size(),
2871                               fneg.begin());
2872                 }
2873             }
2874 
2875             /* x is restored to original */
2876             state_work_x[atom][d] = x_min;
2877 
2878             for (size_t j = 0; j < atom_index.size(); j++)
2879             {
2880                 for (size_t k = 0; (k < DIM); k++)
2881                 {
2882                     dfdx[j][k] = -(state_work_f[atom_index[j]][k] - fneg[j][k]) / (2 * der_range);
2883                 }
2884             }
2885 
2886             if (!bIsMaster)
2887             {
2888 #if GMX_MPI
2889 #    define mpi_type GMX_MPI_REAL
2890                 MPI_Send(dfdx[0], atom_index.size() * DIM, mpi_type, MASTER(cr), cr->nodeid,
2891                          cr->mpi_comm_mygroup);
2892 #endif
2893             }
2894             else
2895             {
2896                 for (index node = 0; (node < nnodes && aid + node < ssize(atom_index)); node++)
2897                 {
2898                     if (node > 0)
2899                     {
2900 #if GMX_MPI
2901                         MPI_Status stat;
2902                         MPI_Recv(dfdx[0], atom_index.size() * DIM, mpi_type, node, node,
2903                                  cr->mpi_comm_mygroup, &stat);
2904 #    undef mpi_type
2905 #endif
2906                     }
2907 
2908                     row = (aid + node) * DIM + d;
2909 
2910                     for (size_t j = 0; j < atom_index.size(); j++)
2911                     {
2912                         for (size_t k = 0; k < DIM; k++)
2913                         {
2914                             col = j * DIM + k;
2915 
2916                             if (bSparse)
2917                             {
2918                                 if (col >= row && dfdx[j][k] != 0.0)
2919                                 {
2920                                     gmx_sparsematrix_increment_value(sparse_matrix, row, col, dfdx[j][k]);
2921                                 }
2922                             }
2923                             else
2924                             {
2925                                 full_matrix[row * sz + col] = dfdx[j][k];
2926                             }
2927                         }
2928                     }
2929                 }
2930             }
2931 
2932             if (mdrunOptions.verbose && fplog)
2933             {
2934                 fflush(fplog);
2935             }
2936         }
2937         /* write progress */
2938         if (bIsMaster && mdrunOptions.verbose)
2939         {
2940             fprintf(stderr, "\rFinished step %d out of %td",
2941                     std::min<int>(atom + nnodes, atom_index.size()), ssize(atom_index));
2942             fflush(stderr);
2943         }
2944     }
2945 
2946     if (bIsMaster)
2947     {
2948         fprintf(stderr, "\n\nWriting Hessian...\n");
2949         gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
2950     }
2951 
2952     finish_em(cr, outf, walltime_accounting, wcycle);
2953 
2954     walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size() * 2);
2955 }
2956 
2957 } // namespace gmx
2958