1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2015,2016,2017,2018,2019, The GROMACS development team.
5  * Copyright (c) 2020,2021, by the GROMACS development team, led by
6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7  * and including many others, as listed in the AUTHORS file in the
8  * top-level source directory and at http://www.gromacs.org.
9  *
10  * GROMACS is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public License
12  * as published by the Free Software Foundation; either version 2.1
13  * of the License, or (at your option) any later version.
14  *
15  * GROMACS is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with GROMACS; if not, see
22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
24  *
25  * If you want to redistribute modifications to GROMACS, please
26  * consider that scientific software is very special. Version
27  * control is crucial - bugs must be traceable. We will be happy to
28  * consider code for inclusion in the official distribution, but
29  * derived work must not be called official GROMACS. Details are found
30  * in the README & COPYING files - if they are missing, get the
31  * official version at http://www.gromacs.org.
32  *
33  * To help us fund GROMACS development, we humbly ask that you cite
34  * the research papers on the package. Check out http://www.gromacs.org.
35  */
36 
37 /*! \internal \file
38  * \brief
39  * Implements the BiasState class.
40  *
41  * \author Viveca Lindahl
42  * \author Berk Hess <hess@kth.se>
43  * \ingroup module_awh
44  */
45 
46 #include "gmxpre.h"
47 
48 #include "biasstate.h"
49 
50 #include <cassert>
51 #include <cmath>
52 #include <cstdio>
53 #include <cstdlib>
54 #include <cstring>
55 
56 #include <algorithm>
57 #include <optional>
58 
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/xvgr.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/math/utilities.h"
63 #include "gromacs/mdrunutility/multisim.h"
64 #include "gromacs/mdtypes/awh_history.h"
65 #include "gromacs/mdtypes/awh_params.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/simd/simd.h"
68 #include "gromacs/simd/simd_math.h"
69 #include "gromacs/utility/arrayref.h"
70 #include "gromacs/utility/exceptions.h"
71 #include "gromacs/utility/gmxassert.h"
72 #include "gromacs/utility/smalloc.h"
73 #include "gromacs/utility/stringutil.h"
74 
75 #include "biasgrid.h"
76 #include "pointstate.h"
77 
78 namespace gmx
79 {
80 
getPmf(gmx::ArrayRef<float> pmf) const81 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
82 {
83     GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
84 
85     /* The PMF is just the negative of the log of the sampled PMF histogram.
86      * Points with zero target weight are ignored, they will mostly contain noise.
87      */
88     for (size_t i = 0; i < points_.size(); i++)
89     {
90         pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
91     }
92 }
93 
94 namespace
95 {
96 
97 /*! \brief
98  * Sum an array over all simulations on the master rank of each simulation.
99  *
100  * \param[in,out] arrayRef      The data to sum.
101  * \param[in]     multiSimComm  Struct for multi-simulation communication.
102  */
sumOverSimulations(gmx::ArrayRef<int> arrayRef,const gmx_multisim_t * multiSimComm)103 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
104 {
105     gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
106 }
107 
108 /*! \brief
109  * Sum an array over all simulations on the master rank of each simulation.
110  *
111  * \param[in,out] arrayRef      The data to sum.
112  * \param[in]     multiSimComm  Struct for multi-simulation communication.
113  */
sumOverSimulations(gmx::ArrayRef<double> arrayRef,const gmx_multisim_t * multiSimComm)114 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
115 {
116     gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
117 }
118 
119 /*! \brief
120  * Sum an array over all simulations on all ranks of each simulation.
121  *
122  * This assumes the data is identical on all ranks within each simulation.
123  *
124  * \param[in,out] arrayRef      The data to sum.
125  * \param[in]     commRecord    Struct for intra-simulation communication.
126  * \param[in]     multiSimComm  Struct for multi-simulation communication.
127  */
128 template<typename T>
sumOverSimulations(gmx::ArrayRef<T> arrayRef,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm)129 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
130 {
131     if (MASTER(commRecord))
132     {
133         sumOverSimulations(arrayRef, multiSimComm);
134     }
135     if (commRecord->nnodes > 1)
136     {
137         gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
138     }
139 }
140 
141 /*! \brief
142  * Sum PMF over multiple simulations, when requested.
143  *
144  * \param[in,out] pointState         The state of the points in the bias.
145  * \param[in]     numSharedUpdate    The number of biases sharing the histogram.
146  * \param[in]     commRecord         Struct for intra-simulation communication.
147  * \param[in]     multiSimComm       Struct for multi-simulation communication.
148  */
sumPmf(gmx::ArrayRef<PointState> pointState,int numSharedUpdate,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm)149 void sumPmf(gmx::ArrayRef<PointState> pointState,
150             int                       numSharedUpdate,
151             const t_commrec*          commRecord,
152             const gmx_multisim_t*     multiSimComm)
153 {
154     if (numSharedUpdate == 1)
155     {
156         return;
157     }
158     GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
159                "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
160     GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
161                "Sharing within a simulation is not implemented (yet)");
162 
163     std::vector<double> buffer(pointState.size());
164 
165     /* Need to temporarily exponentiate the log weights to sum over simulations */
166     for (size_t i = 0; i < buffer.size(); i++)
167     {
168         buffer[i] = pointState[i].inTargetRegion() ? std::exp(pointState[i].logPmfSum()) : 0;
169     }
170 
171     sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
172 
173     /* Take log again to get (non-normalized) PMF */
174     double normFac = 1.0 / numSharedUpdate;
175     for (gmx::index i = 0; i < pointState.ssize(); i++)
176     {
177         if (pointState[i].inTargetRegion())
178         {
179             pointState[i].setLogPmfSum(std::log(buffer[i] * normFac));
180         }
181     }
182 }
183 
184 /*! \brief
185  * Find the minimum free energy value.
186  *
187  * \param[in] pointState  The state of the points.
188  * \returns the minimum free energy value.
189  */
freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)190 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
191 {
192     double fMin = GMX_FLOAT_MAX;
193 
194     for (auto const& ps : pointState)
195     {
196         if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
197         {
198             fMin = ps.freeEnergy();
199         }
200     }
201 
202     return fMin;
203 }
204 
205 /*! \brief
206  * Find and return the log of the probability weight of a point given a coordinate value.
207  *
208  * The unnormalized weight is given by
209  * w(point|value) = exp(bias(point) - U(value,point)),
210  * where U is a harmonic umbrella potential.
211  *
212  * \param[in] dimParams              The bias dimensions parameters
213  * \param[in] points                 The point state.
214  * \param[in] grid                   The grid.
215  * \param[in] pointIndex             Point to evaluate probability weight for.
216  * \param[in] pointBias              Bias for the point (as a log weight).
217  * \param[in] value                  Coordinate value.
218  * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
219  * empty when there are no free energy lambda state dimensions.
220  * \param[in] gridpointIndex         The index of the current grid point.
221  * \returns the log of the biased probability weight.
222  */
biasedLogWeightFromPoint(const std::vector<DimParams> & dimParams,const std::vector<PointState> & points,const BiasGrid & grid,int pointIndex,double pointBias,const awh_dvec value,gmx::ArrayRef<const double> neighborLambdaEnergies,int gridpointIndex)223 double biasedLogWeightFromPoint(const std::vector<DimParams>&  dimParams,
224                                 const std::vector<PointState>& points,
225                                 const BiasGrid&                grid,
226                                 int                            pointIndex,
227                                 double                         pointBias,
228                                 const awh_dvec                 value,
229                                 gmx::ArrayRef<const double>    neighborLambdaEnergies,
230                                 int                            gridpointIndex)
231 {
232     double logWeight = detail::c_largeNegativeExponent;
233 
234     /* Only points in the target region have non-zero weight */
235     if (points[pointIndex].inTargetRegion())
236     {
237         logWeight = pointBias;
238 
239         /* Add potential for all parameter dimensions */
240         for (size_t d = 0; d < dimParams.size(); d++)
241         {
242             if (dimParams[d].isFepLambdaDimension())
243             {
244                 /* If this is not a sampling step or if this function is called from
245                  * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
246                  * be empty. No convolution is required along the lambda dimension. */
247                 if (!neighborLambdaEnergies.empty())
248                 {
249                     const int pointLambdaIndex     = grid.point(pointIndex).coordValue[d];
250                     const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
251                     logWeight -= dimParams[d].fepDimParams().beta
252                                  * (neighborLambdaEnergies[pointLambdaIndex]
253                                     - neighborLambdaEnergies[gridpointLambdaIndex]);
254                 }
255             }
256             else
257             {
258                 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
259                 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
260             }
261         }
262     }
263     return logWeight;
264 }
265 
266 /*! \brief
267  * Calculates the marginal distribution (marginal probability) for each value along
268  * a free energy lambda axis.
269  * The marginal distribution of one coordinate dimension value is the sum of the probability
270  * distribution of all values (herein all neighbor values) with the same value in the dimension
271  * of interest.
272  * \param[in] grid               The bias grid.
273  * \param[in] neighbors          The points to use for the calculation of the marginal distribution.
274  * \param[in] probWeightNeighbor Probability weights of the neighbors.
275  * \returns The calculated marginal distribution in a 1D array with
276  * as many elements as there are points along the axis of interest.
277  */
calculateFELambdaMarginalDistribution(const BiasGrid & grid,gmx::ArrayRef<const int> neighbors,gmx::ArrayRef<const double> probWeightNeighbor)278 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid&          grid,
279                                                           gmx::ArrayRef<const int> neighbors,
280                                                           gmx::ArrayRef<const double> probWeightNeighbor)
281 {
282     const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
283     GMX_RELEASE_ASSERT(lambdaAxisIndex,
284                        "There must be a free energy lambda axis in order to calculate the free "
285                        "energy lambda marginal distribution.");
286     const int           numFepLambdaStates = grid.numFepLambdaStates();
287     std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
288 
289     for (size_t i = 0; i < neighbors.size(); i++)
290     {
291         const int neighbor    = neighbors[i];
292         const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
293         lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
294     }
295     return lambdaMarginalDistribution;
296 }
297 
298 } // namespace
299 
calcConvolvedPmf(const std::vector<DimParams> & dimParams,const BiasGrid & grid,std::vector<float> * convolvedPmf) const300 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
301                                  const BiasGrid&               grid,
302                                  std::vector<float>*           convolvedPmf) const
303 {
304     size_t numPoints = grid.numPoints();
305 
306     convolvedPmf->resize(numPoints);
307 
308     /* Get the PMF to convolve. */
309     std::vector<float> pmf(numPoints);
310     getPmf(pmf);
311 
312     for (size_t m = 0; m < numPoints; m++)
313     {
314         double           freeEnergyWeights = 0;
315         const GridPoint& point             = grid.point(m);
316         for (auto& neighbor : point.neighbor)
317         {
318             /* Do not convolve the bias along a lambda axis - only use the pmf from the current point */
319             if (!pointsHaveDifferentLambda(grid, m, neighbor))
320             {
321                 /* The negative PMF is a positive bias. */
322                 double biasNeighbor = -pmf[neighbor];
323 
324                 /* Add the convolved PMF weights for the neighbors of this point.
325                 Note that this function only adds point within the target > 0 region.
326                 Sum weights, take the logarithm last to get the free energy. */
327                 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
328                                                             biasNeighbor, point.coordValue, {}, m);
329                 freeEnergyWeights += std::exp(logWeight);
330             }
331         }
332 
333         GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
334                            "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
335         (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
336     }
337 }
338 
339 namespace
340 {
341 
342 /*! \brief
343  * Updates the target distribution for all points.
344  *
345  * The target distribution is always updated for all points
346  * at the same time.
347  *
348  * \param[in,out] pointState  The state of all points.
349  * \param[in]     params      The bias parameters.
350  */
updateTargetDistribution(gmx::ArrayRef<PointState> pointState,const BiasParams & params)351 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
352 {
353     double freeEnergyCutoff = 0;
354     if (params.eTarget == eawhtargetCUTOFF)
355     {
356         freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
357     }
358 
359     double sumTarget = 0;
360     for (PointState& ps : pointState)
361     {
362         sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
363     }
364     GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
365 
366     /* Normalize to 1 */
367     double invSum = 1.0 / sumTarget;
368     for (PointState& ps : pointState)
369     {
370         ps.scaleTarget(invSum);
371     }
372 }
373 
374 /*! \brief
375  * Puts together a string describing a grid point.
376  *
377  * \param[in] grid         The grid.
378  * \param[in] point        BiasGrid point index.
379  * \returns a string for the point.
380  */
gridPointValueString(const BiasGrid & grid,int point)381 std::string gridPointValueString(const BiasGrid& grid, int point)
382 {
383     std::string pointString;
384 
385     pointString += "(";
386 
387     for (int d = 0; d < grid.numDimensions(); d++)
388     {
389         pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
390         if (d < grid.numDimensions() - 1)
391         {
392             pointString += ",";
393         }
394         else
395         {
396             pointString += ")";
397         }
398     }
399 
400     return pointString;
401 }
402 
403 } // namespace
404 
warnForHistogramAnomalies(const BiasGrid & grid,int biasIndex,double t,FILE * fplog,int maxNumWarnings) const405 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
406 {
407     GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
408     const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
409 
410     /* Sum up the histograms and get their normalization */
411     double sumVisits  = 0;
412     double sumWeights = 0;
413     for (auto& pointState : points_)
414     {
415         if (pointState.inTargetRegion())
416         {
417             sumVisits += pointState.numVisitsTot();
418             sumWeights += pointState.weightSumTot();
419         }
420     }
421     GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
422     GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
423     double invNormVisits = 1.0 / sumVisits;
424     double invNormWeight = 1.0 / sumWeights;
425 
426     /* Check all points for warnings */
427     int    numWarnings = 0;
428     size_t numPoints   = grid.numPoints();
429     for (size_t m = 0; m < numPoints; m++)
430     {
431         /* Skip points close to boundary or non-target region */
432         const GridPoint& gridPoint = grid.point(m);
433         bool             skipPoint = false;
434         for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
435         {
436             int neighbor = gridPoint.neighbor[n];
437             skipPoint    = !points_[neighbor].inTargetRegion();
438             for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
439             {
440                 const GridPoint& neighborPoint = grid.point(neighbor);
441                 skipPoint                      = neighborPoint.index[d] == 0
442                             || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
443             }
444         }
445 
446         /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
447         const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
448         const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
449         if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
450         {
451             std::string pointValueString = gridPointValueString(grid, m);
452             std::string warningMessage   = gmx::formatString(
453                     "\nawh%d warning: "
454                     "at t = %g ps the obtained coordinate distribution at coordinate value %s "
455                     "is less than a fraction %g of the reference distribution at that point. "
456                     "If you are not certain about your settings you might want to increase your "
457                     "pull force constant or "
458                     "modify your sampling region.\n",
459                     biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
460             gmx::TextLineWrapper wrapper;
461             wrapper.settings().setLineLength(c_linewidth);
462             fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
463 
464             numWarnings++;
465         }
466         if (numWarnings >= maxNumWarnings)
467         {
468             break;
469         }
470     }
471 
472     return numWarnings;
473 }
474 
calcUmbrellaForceAndPotential(const std::vector<DimParams> & dimParams,const BiasGrid & grid,int point,ArrayRef<const double> neighborLambdaDhdl,gmx::ArrayRef<double> force) const475 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
476                                                 const BiasGrid&               grid,
477                                                 int                           point,
478                                                 ArrayRef<const double>        neighborLambdaDhdl,
479                                                 gmx::ArrayRef<double>         force) const
480 {
481     double potential = 0;
482     for (size_t d = 0; d < dimParams.size(); d++)
483     {
484         if (dimParams[d].isFepLambdaDimension())
485         {
486             if (!neighborLambdaDhdl.empty())
487             {
488                 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
489                 force[d]                        = neighborLambdaDhdl[coordpointLambdaIndex];
490                 /* The potential should not be affected by the lambda dimension. */
491             }
492         }
493         else
494         {
495             double deviation =
496                     getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
497             double k = dimParams[d].pullDimParams().k;
498 
499             /* Force from harmonic potential 0.5*k*dev^2 */
500             force[d] = -k * deviation;
501             potential += 0.5 * k * deviation * deviation;
502         }
503     }
504 
505     return potential;
506 }
507 
calcConvolvedForce(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor,ArrayRef<const double> neighborLambdaDhdl,gmx::ArrayRef<double> forceWorkBuffer,gmx::ArrayRef<double> force) const508 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
509                                    const BiasGrid&               grid,
510                                    gmx::ArrayRef<const double>   probWeightNeighbor,
511                                    ArrayRef<const double>        neighborLambdaDhdl,
512                                    gmx::ArrayRef<double>         forceWorkBuffer,
513                                    gmx::ArrayRef<double>         force) const
514 {
515     for (size_t d = 0; d < dimParams.size(); d++)
516     {
517         force[d] = 0;
518     }
519 
520     /* Only neighboring points have non-negligible contribution. */
521     const std::vector<int>& neighbor          = grid.point(coordState_.gridpointIndex()).neighbor;
522     gmx::ArrayRef<double>   forceFromNeighbor = forceWorkBuffer;
523     for (size_t n = 0; n < neighbor.size(); n++)
524     {
525         double weightNeighbor = probWeightNeighbor[n];
526         int    indexNeighbor  = neighbor[n];
527 
528         /* Get the umbrella force from this point. The returned potential is ignored here. */
529         calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
530 
531         /* Add the weighted umbrella force to the convolved force. */
532         for (size_t d = 0; d < dimParams.size(); d++)
533         {
534             force[d] += forceFromNeighbor[d] * weightNeighbor;
535         }
536     }
537 }
538 
moveUmbrella(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor,ArrayRef<const double> neighborLambdaDhdl,gmx::ArrayRef<double> biasForce,int64_t step,int64_t seed,int indexSeed)539 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
540                                const BiasGrid&               grid,
541                                gmx::ArrayRef<const double>   probWeightNeighbor,
542                                ArrayRef<const double>        neighborLambdaDhdl,
543                                gmx::ArrayRef<double>         biasForce,
544                                int64_t                       step,
545                                int64_t                       seed,
546                                int                           indexSeed)
547 {
548     /* Generate and set a new coordinate reference value */
549     coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor,
550                                         step, seed, indexSeed);
551 
552     std::vector<double> newForce(dimParams.size());
553     double              newPotential = calcUmbrellaForceAndPotential(
554             dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
555 
556     /*  A modification of the reference value at time t will lead to a different
557         force over t-dt/2 to t and over t to t+dt/2. For high switching rates
558         this means the force and velocity will change signs roughly as often.
559         To avoid any issues we take the average of the previous and new force
560         at steps when the reference value has been moved. E.g. if the ref. value
561         is set every step to (coord dvalue +/- delta) would give zero force.
562      */
563     for (gmx::index d = 0; d < biasForce.ssize(); d++)
564     {
565         /* Average of the current and new force */
566         biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
567     }
568 
569     return newPotential;
570 }
571 
572 namespace
573 {
574 
575 /*! \brief
576  * Sets the histogram rescaling factors needed to control the histogram size.
577  *
578  * For sake of robustness, the reference weight histogram can grow at a rate
579  * different from the actual sampling rate. Typically this happens for a limited
580  * initial time, alternatively growth is scaled down by a constant factor for all
581  * times. Since the size of the reference histogram sets the size of the free
582  * energy update this should be reflected also in the PMF. Thus the PMF histogram
583  * needs to be rescaled too.
584  *
585  * This function should only be called by the bias update function or wrapped by a function that
586  * knows what scale factors should be applied when, e.g,
587  * getSkippedUpdateHistogramScaleFactors().
588  *
589  * \param[in]  params             The bias parameters.
590  * \param[in]  newHistogramSize   New reference weight histogram size.
591  * \param[in]  oldHistogramSize   Previous reference weight histogram size (before adding new samples).
592  * \param[out] weightHistScaling  Scaling factor for the reference weight histogram.
593  * \param[out] logPmfSumScaling   Log of the scaling factor for the PMF histogram.
594  */
setHistogramUpdateScaleFactors(const BiasParams & params,double newHistogramSize,double oldHistogramSize,double * weightHistScaling,double * logPmfSumScaling)595 void setHistogramUpdateScaleFactors(const BiasParams& params,
596                                     double            newHistogramSize,
597                                     double            oldHistogramSize,
598                                     double*           weightHistScaling,
599                                     double*           logPmfSumScaling)
600 {
601 
602     /* The two scaling factors below are slightly different (ignoring the log factor) because the
603        reference and the PMF histogram apply weight scaling differently. The weight histogram
604        applies is  locally, i.e. each sample is scaled down meaning all samples get equal weight.
605        It is done this way because that is what target type local Boltzmann (for which
606        target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
607        by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
608        1) empirically this is necessary for converging the PMF; 2) since the extraction of
609        the PMF is theoretically only valid for a constant bias, new samples should get more
610        weight than old ones for which the bias is fluctuating more. */
611     *weightHistScaling =
612             newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
613     *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
614 }
615 
616 } // namespace
617 
getSkippedUpdateHistogramScaleFactors(const BiasParams & params,double * weightHistScaling,double * logPmfSumScaling) const618 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
619                                                       double*           weightHistScaling,
620                                                       double*           logPmfSumScaling) const
621 {
622     GMX_ASSERT(params.skipUpdates(),
623                "Calling function for skipped updates when skipping updates is not allowed");
624 
625     if (inInitialStage())
626     {
627         /* In between global updates the reference histogram size is kept constant so we trivially
628            know what the histogram size was at the time of the skipped update. */
629         double histogramSize = histogramSize_.histogramSize();
630         setHistogramUpdateScaleFactors(params, histogramSize, histogramSize, weightHistScaling,
631                                        logPmfSumScaling);
632     }
633     else
634     {
635         /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
636         *weightHistScaling = 1;
637         *logPmfSumScaling  = 0;
638     }
639 }
640 
doSkippedUpdatesForAllPoints(const BiasParams & params)641 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
642 {
643     double weightHistScaling;
644     double logPmfsumScaling;
645 
646     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
647 
648     for (auto& pointState : points_)
649     {
650         bool didUpdate = pointState.performPreviouslySkippedUpdates(
651                 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
652 
653         /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
654         if (didUpdate)
655         {
656             pointState.updateBias();
657         }
658     }
659 }
660 
doSkippedUpdatesInNeighborhood(const BiasParams & params,const BiasGrid & grid)661 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
662 {
663     double weightHistScaling;
664     double logPmfsumScaling;
665 
666     getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
667 
668     /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
669     const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
670     for (auto& neighbor : neighbors)
671     {
672         bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
673                 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
674 
675         if (didUpdate)
676         {
677             points_[neighbor].updateBias();
678         }
679     }
680 }
681 
682 namespace
683 {
684 
685 /*! \brief
686  * Merge update lists from multiple sharing simulations.
687  *
688  * \param[in,out] updateList    Update list for this simulation (assumed >= npoints long).
689  * \param[in]     numPoints     Total number of points.
690  * \param[in]     commRecord    Struct for intra-simulation communication.
691  * \param[in]     multiSimComm  Struct for multi-simulation communication.
692  */
mergeSharedUpdateLists(std::vector<int> * updateList,int numPoints,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm)693 void mergeSharedUpdateLists(std::vector<int>*     updateList,
694                             int                   numPoints,
695                             const t_commrec*      commRecord,
696                             const gmx_multisim_t* multiSimComm)
697 {
698     std::vector<int> numUpdatesOfPoint;
699 
700     /* Flag the update points of this sim.
701        TODO: we can probably avoid allocating this array and just use the input array. */
702     numUpdatesOfPoint.resize(numPoints, 0);
703     for (auto& pointIndex : *updateList)
704     {
705         numUpdatesOfPoint[pointIndex] = 1;
706     }
707 
708     /* Sum over the sims to get all the flagged points */
709     sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
710 
711     /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
712     updateList->clear();
713     for (int m = 0; m < numPoints; m++)
714     {
715         if (numUpdatesOfPoint[m] > 0)
716         {
717             updateList->push_back(m);
718         }
719     }
720 }
721 
722 /*! \brief
723  * Generate an update list of points sampled since the last update.
724  *
725  * \param[in] grid              The AWH bias.
726  * \param[in] points            The point state.
727  * \param[in] originUpdatelist  The origin of the rectangular region that has been sampled since
728  * last update. \param[in] endUpdatelist     The end of the rectangular that has been sampled since
729  * last update. \param[in,out] updateList    Local update list to set (assumed >= npoints long).
730  */
makeLocalUpdateList(const BiasGrid & grid,const std::vector<PointState> & points,const awh_ivec originUpdatelist,const awh_ivec endUpdatelist,std::vector<int> * updateList)731 void makeLocalUpdateList(const BiasGrid&                grid,
732                          const std::vector<PointState>& points,
733                          const awh_ivec                 originUpdatelist,
734                          const awh_ivec                 endUpdatelist,
735                          std::vector<int>*              updateList)
736 {
737     awh_ivec origin;
738     awh_ivec numPoints;
739 
740     /* Define the update search grid */
741     for (int d = 0; d < grid.numDimensions(); d++)
742     {
743         origin[d]    = originUpdatelist[d];
744         numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
745 
746         /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
747            This helps for calculating the distance/number of points but should be removed and fixed when the way of
748            updating origin/end updatelist is changed (see sampleProbabilityWeights). */
749         numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
750     }
751 
752     /* Make the update list */
753     updateList->clear();
754     int  pointIndex  = -1;
755     bool pointExists = true;
756     while (pointExists)
757     {
758         pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
759 
760         if (pointExists && points[pointIndex].inTargetRegion())
761         {
762             updateList->push_back(pointIndex);
763         }
764     }
765 }
766 
767 } // namespace
768 
resetLocalUpdateRange(const BiasGrid & grid)769 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
770 {
771     const int gridpointIndex = coordState_.gridpointIndex();
772     for (int d = 0; d < grid.numDimensions(); d++)
773     {
774         /* This gives the  minimum range consisting only of the current closest point. */
775         originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
776         endUpdatelist_[d]    = grid.point(gridpointIndex).index[d];
777     }
778 }
779 
780 namespace
781 {
782 
783 /*! \brief
784  * Add partial histograms (accumulating between updates) to accumulating histograms.
785  *
786  * \param[in,out] pointState         The state of the points in the bias.
787  * \param[in,out] weightSumCovering  The weights for checking covering.
788  * \param[in]     numSharedUpdate    The number of biases sharing the histrogram.
789  * \param[in]     commRecord         Struct for intra-simulation communication.
790  * \param[in]     multiSimComm       Struct for multi-simulation communication.
791  * \param[in]     localUpdateList    List of points with data.
792  */
sumHistograms(gmx::ArrayRef<PointState> pointState,gmx::ArrayRef<double> weightSumCovering,int numSharedUpdate,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm,const std::vector<int> & localUpdateList)793 void sumHistograms(gmx::ArrayRef<PointState> pointState,
794                    gmx::ArrayRef<double>     weightSumCovering,
795                    int                       numSharedUpdate,
796                    const t_commrec*          commRecord,
797                    const gmx_multisim_t*     multiSimComm,
798                    const std::vector<int>&   localUpdateList)
799 {
800     /* The covering checking histograms are added before summing over simulations, so that the
801        weights from different simulations are kept distinguishable. */
802     for (int globalIndex : localUpdateList)
803     {
804         weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
805     }
806 
807     /* Sum histograms over multiple simulations if needed. */
808     if (numSharedUpdate > 1)
809     {
810         GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
811                    "Sharing within a simulation is not implemented (yet)");
812 
813         /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
814         std::vector<double> weightSum;
815         std::vector<double> coordVisits;
816 
817         weightSum.resize(localUpdateList.size());
818         coordVisits.resize(localUpdateList.size());
819 
820         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
821         {
822             const PointState& ps = pointState[localUpdateList[localIndex]];
823 
824             weightSum[localIndex]   = ps.weightSumIteration();
825             coordVisits[localIndex] = ps.numVisitsIteration();
826         }
827 
828         sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
829         sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
830 
831         /* Transfer back the result */
832         for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
833         {
834             PointState& ps = pointState[localUpdateList[localIndex]];
835 
836             ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
837         }
838     }
839 
840     /* Now add the partial counts and weights to the accumulating histograms.
841        Note: we still need to use the weights for the update so we wait
842        with resetting them until the end of the update. */
843     for (int globalIndex : localUpdateList)
844     {
845         pointState[globalIndex].addPartialWeightAndCount();
846     }
847 }
848 
849 /*! \brief
850  * Label points along an axis as covered or not.
851  *
852  * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
853  *
854  * \param[in]     visited        Visited? For each point.
855  * \param[in]     checkCovering  Check for covering? For each point.
856  * \param[in]     numPoints      The number of grid points along this dimension.
857  * \param[in]     period         Period in number of points.
858  * \param[in]     coverRadius    Cover radius, in points, needed for defining a point as covered.
859  * \param[in,out] covered        In this array elements are 1 for covered points and 0 for
860  * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
861  */
labelCoveredPoints(const std::vector<bool> & visited,const std::vector<bool> & checkCovering,int numPoints,int period,int coverRadius,gmx::ArrayRef<int> covered)862 void labelCoveredPoints(const std::vector<bool>& visited,
863                         const std::vector<bool>& checkCovering,
864                         int                      numPoints,
865                         int                      period,
866                         int                      coverRadius,
867                         gmx::ArrayRef<int>       covered)
868 {
869     GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
870 
871     bool haveFirstNotVisited = false;
872     int  firstNotVisited     = -1;
873     int  notVisitedLow       = -1;
874     int  notVisitedHigh      = -1;
875 
876     for (int n = 0; n < numPoints; n++)
877     {
878         if (checkCovering[n] && !visited[n])
879         {
880             if (!haveFirstNotVisited)
881             {
882                 notVisitedLow       = n;
883                 firstNotVisited     = n;
884                 haveFirstNotVisited = true;
885             }
886             else
887             {
888                 notVisitedHigh = n;
889 
890                 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
891                    by unvisited points. The unvisted end points affect the coveredness of the
892                    visited with a reach equal to the cover radius. */
893                 int notCoveredLow  = notVisitedLow + coverRadius;
894                 int notCoveredHigh = notVisitedHigh - coverRadius;
895                 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
896                 {
897                     covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
898                 }
899 
900                 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
901                    the notVisitedLow of the next. */
902                 notVisitedLow = notVisitedHigh;
903             }
904         }
905     }
906 
907     /* Have labelled all the internal points. Now take care of the boundary regions. */
908     if (!haveFirstNotVisited)
909     {
910         /* No non-visited points <=> all points visited => all points covered. */
911 
912         for (int n = 0; n < numPoints; n++)
913         {
914             covered[n] = 1;
915         }
916     }
917     else
918     {
919         int lastNotVisited = notVisitedLow;
920 
921         /* For periodic boundaries, non-visited points can influence points
922            on the other side of the boundary so we need to wrap around. */
923 
924         /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
925            For non-periodic boundaries there is no lower end point so a dummy value is used. */
926         int notVisitedHigh = firstNotVisited;
927         int notVisitedLow  = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
928 
929         int notCoveredLow  = notVisitedLow + coverRadius;
930         int notCoveredHigh = notVisitedHigh - coverRadius;
931 
932         for (int i = 0; i <= notVisitedHigh; i++)
933         {
934             /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
935             covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
936         }
937 
938         /* Upper end. Same as for lower end but in the other direction. */
939         notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
940         notVisitedLow  = lastNotVisited;
941 
942         notCoveredLow  = notVisitedLow + coverRadius;
943         notCoveredHigh = notVisitedHigh - coverRadius;
944 
945         for (int i = notVisitedLow; i <= numPoints - 1; i++)
946         {
947             /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
948             covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
949         }
950     }
951 }
952 
953 } // namespace
954 
isSamplingRegionCovered(const BiasParams & params,const std::vector<DimParams> & dimParams,const BiasGrid & grid,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm) const955 bool BiasState::isSamplingRegionCovered(const BiasParams&             params,
956                                         const std::vector<DimParams>& dimParams,
957                                         const BiasGrid&               grid,
958                                         const t_commrec*              commRecord,
959                                         const gmx_multisim_t*         multiSimComm) const
960 {
961     /* Allocate and initialize arrays: one for checking visits along each dimension,
962        one for keeping track of which points to check and one for the covered points.
963        Possibly these could be kept as AWH variables to avoid these allocations. */
964     struct CheckDim
965     {
966         std::vector<bool> visited;
967         std::vector<bool> checkCovering;
968         // We use int for the covering array since we might use gmx_sumi_sim.
969         std::vector<int> covered;
970     };
971 
972     std::vector<CheckDim> checkDim;
973     checkDim.resize(grid.numDimensions());
974 
975     for (int d = 0; d < grid.numDimensions(); d++)
976     {
977         const size_t numPoints = grid.axis(d).numPoints();
978         checkDim[d].visited.resize(numPoints, false);
979         checkDim[d].checkCovering.resize(numPoints, false);
980         checkDim[d].covered.resize(numPoints, 0);
981     }
982 
983     /* Set visited points along each dimension and which points should be checked for covering.
984        Specifically, points above the free energy cutoff (if there is one) or points outside
985        of the target region are ignored. */
986 
987     /* Set the free energy cutoff */
988     double maxFreeEnergy = GMX_FLOAT_MAX;
989 
990     if (params.eTarget == eawhtargetCUTOFF)
991     {
992         maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
993     }
994 
995     /* Set the threshold weight for a point to be considered visited. */
996     double weightThreshold = 1;
997     for (int d = 0; d < grid.numDimensions(); d++)
998     {
999         if (grid.axis(d).isFepLambdaAxis())
1000         {
1001             /* Do not modify the weight threshold based on a FEP lambda axis. The spread
1002              * of the sampling weights is not depending on a Gaussian distribution (like
1003              * below). */
1004             weightThreshold *= 1.0;
1005         }
1006         else
1007         {
1008             /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
1009              * approximately (given that the spacing can be modified if the dimension is periodic)
1010              * proportional to sqrt(1/(2*pi)). */
1011             weightThreshold *= grid.axis(d).spacing()
1012                                * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1013         }
1014     }
1015 
1016     /* Project the sampling weights onto each dimension */
1017     for (size_t m = 0; m < grid.numPoints(); m++)
1018     {
1019         const PointState& pointState = points_[m];
1020 
1021         for (int d = 0; d < grid.numDimensions(); d++)
1022         {
1023             int n = grid.point(m).index[d];
1024 
1025             /* Is visited if it was already visited or if there is enough weight at the current point */
1026             checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1027 
1028             /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1029             checkDim[d].checkCovering[n] =
1030                     checkDim[d].checkCovering[n]
1031                     || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1032         }
1033     }
1034 
1035     /* Label each point along each dimension as covered or not. */
1036     for (int d = 0; d < grid.numDimensions(); d++)
1037     {
1038         labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(),
1039                            grid.axis(d).numPointsInPeriod(), params.coverRadius()[d], checkDim[d].covered);
1040     }
1041 
1042     /* Now check for global covering. Each dimension needs to be covered separately.
1043        A dimension is covered if each point is covered.  Multiple simulations collectively
1044        cover the points, i.e. a point is covered if any of the simulations covered it.
1045        However, visited points are not shared, i.e. if a point is covered or not is
1046        determined by the visits of a single simulation. In general the covering criterion is
1047        all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1048        For 1 simulation, all points covered <=> all points visited. For multiple simulations
1049        however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1050        In the limit of large coverRadius, all points covered => all points visited by at least one
1051        simulation (since no point will be covered until all points have been visited by a
1052        single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1053        needs with surrounding points before sharing covering information with other simulations. */
1054 
1055     /* Communicate the covered points between sharing simulations if needed. */
1056     if (params.numSharedUpdate > 1)
1057     {
1058         /* For multiple dimensions this may not be the best way to do it. */
1059         for (int d = 0; d < grid.numDimensions(); d++)
1060         {
1061             sumOverSimulations(
1062                     gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1063                     commRecord, multiSimComm);
1064         }
1065     }
1066 
1067     /* Now check if for each dimension all points are covered. Break if not true. */
1068     bool allPointsCovered = true;
1069     for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1070     {
1071         for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1072         {
1073             allPointsCovered = (checkDim[d].covered[n] != 0);
1074         }
1075     }
1076 
1077     return allPointsCovered;
1078 }
1079 
1080 /*! \brief
1081  * Normalizes the free energy and PMF sum.
1082  *
1083  * \param[in] pointState  The state of the points.
1084  */
normalizeFreeEnergyAndPmfSum(std::vector<PointState> * pointState)1085 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1086 {
1087     double minF = freeEnergyMinimumValue(*pointState);
1088 
1089     for (PointState& ps : *pointState)
1090     {
1091         ps.normalizeFreeEnergyAndPmfSum(minF);
1092     }
1093 }
1094 
updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams> & dimParams,const BiasGrid & grid,const BiasParams & params,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm,double t,int64_t step,FILE * fplog,std::vector<int> * updateList)1095 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1096                                                          const BiasGrid&               grid,
1097                                                          const BiasParams&             params,
1098                                                          const t_commrec*              commRecord,
1099                                                          const gmx_multisim_t*         multiSimComm,
1100                                                          double                        t,
1101                                                          int64_t                       step,
1102                                                          FILE*                         fplog,
1103                                                          std::vector<int>*             updateList)
1104 {
1105     /* Note hat updateList is only used in this scope and is always
1106      * re-initialized. We do not use a local vector, because that would
1107      * cause reallocation every time this funtion is called and the vector
1108      * can be the size of the whole grid.
1109      */
1110 
1111     /* Make a list of all local points, i.e. those that could have been touched since
1112        the last update. These are the points needed for summing histograms below
1113        (non-local points only add zeros). For local updates, this will also be the
1114        final update list. */
1115     makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1116     if (params.numSharedUpdate > 1)
1117     {
1118         mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1119     }
1120 
1121     /* Reset the range for the next update */
1122     resetLocalUpdateRange(grid);
1123 
1124     /* Add samples to histograms for all local points and sync simulations if needed */
1125     sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1126 
1127     sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1128 
1129     /* Renormalize the free energy if values are too large. */
1130     bool needToNormalizeFreeEnergy = false;
1131     for (int& globalIndex : *updateList)
1132     {
1133         /* We want to keep the absolute value of the free energies to be less
1134            c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1135            ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1136            in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1137            this requirement would be broken. */
1138         if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1139         {
1140             needToNormalizeFreeEnergy = true;
1141             break;
1142         }
1143     }
1144 
1145     /* Update target distribution? */
1146     bool needToUpdateTargetDistribution =
1147             (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1148 
1149     /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1150     bool detectedCovering = false;
1151     if (inInitialStage())
1152     {
1153         detectedCovering =
1154                 (params.isCheckCoveringStep(step)
1155                  && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1156     }
1157 
1158     /* The weighthistogram size after this update. */
1159     double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_,
1160                                                               weightSumCovering_, fplog);
1161 
1162     /* Make the update list. Usually we try to only update local points,
1163      * but if the update has non-trivial or non-deterministic effects
1164      * on non-local points a global update is needed. This is the case when:
1165      * 1) a covering occurred in the initial stage, leading to non-trivial
1166      *    histogram rescaling factors; or
1167      * 2) the target distribution will be updated, since we don't make any
1168      *    assumption on its form; or
1169      * 3) the AWH parameters are such that we never attempt to skip non-local
1170      *    updates; or
1171      * 4) the free energy values have grown so large that a renormalization
1172      *    is needed.
1173      */
1174     if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1175     {
1176         /* Global update, just add all points. */
1177         updateList->clear();
1178         for (size_t m = 0; m < points_.size(); m++)
1179         {
1180             if (points_[m].inTargetRegion())
1181             {
1182                 updateList->push_back(m);
1183             }
1184         }
1185     }
1186 
1187     /* Set histogram scale factors. */
1188     double weightHistScalingSkipped = 0;
1189     double logPmfsumScalingSkipped  = 0;
1190     if (params.skipUpdates())
1191     {
1192         getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1193     }
1194     double weightHistScalingNew;
1195     double logPmfsumScalingNew;
1196     setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1197                                    &weightHistScalingNew, &logPmfsumScalingNew);
1198 
1199     /* Update free energy and reference weight histogram for points in the update list. */
1200     for (int pointIndex : *updateList)
1201     {
1202         PointState* pointStateToUpdate = &points_[pointIndex];
1203 
1204         /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1205         if (params.skipUpdates())
1206         {
1207             pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(),
1208                                                                 weightHistScalingSkipped,
1209                                                                 logPmfsumScalingSkipped);
1210         }
1211 
1212         /* Now do an update with new sampling data. */
1213         pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(),
1214                                                   weightHistScalingNew, logPmfsumScalingNew);
1215     }
1216 
1217     /* Only update the histogram size after we are done with the local point updates */
1218     histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1219 
1220     if (needToNormalizeFreeEnergy)
1221     {
1222         normalizeFreeEnergyAndPmfSum(&points_);
1223     }
1224 
1225     if (needToUpdateTargetDistribution)
1226     {
1227         /* The target distribution is always updated for all points at once. */
1228         updateTargetDistribution(points_, params);
1229     }
1230 
1231     /* Update the bias. The bias is updated separately and last since it simply a function of
1232        the free energy and the target distribution and we want to avoid doing extra work. */
1233     for (int pointIndex : *updateList)
1234     {
1235         points_[pointIndex].updateBias();
1236     }
1237 
1238     /* Increase the update counter. */
1239     histogramSize_.incrementNumUpdates();
1240 }
1241 
updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> neighborLambdaEnergies,std::vector<double,AlignedAllocator<double>> * weight) const1242 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1243                                                            const BiasGrid&               grid,
1244                                                            gmx::ArrayRef<const double> neighborLambdaEnergies,
1245                                                            std::vector<double, AlignedAllocator<double>>* weight) const
1246 {
1247     /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1248     const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1249 
1250 #if GMX_SIMD_HAVE_DOUBLE
1251     typedef SimdDouble PackType;
1252     constexpr int      packSize = GMX_SIMD_DOUBLE_WIDTH;
1253 #else
1254     typedef double PackType;
1255     constexpr int  packSize = 1;
1256 #endif
1257     /* Round the size of the weight array up to packSize */
1258     const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1259     weight->resize(weightSize);
1260 
1261     double* gmx_restrict weightData = weight->data();
1262     PackType             weightSumPack(0.0);
1263     for (size_t i = 0; i < neighbors.size(); i += packSize)
1264     {
1265         for (size_t n = i; n < i + packSize; n++)
1266         {
1267             if (n < neighbors.size())
1268             {
1269                 const int neighbor = neighbors[n];
1270                 (*weight)[n]       = biasedLogWeightFromPoint(
1271                         dimParams, points_, grid, neighbor, points_[neighbor].bias(),
1272                         coordState_.coordValue(), neighborLambdaEnergies, coordState_.gridpointIndex());
1273             }
1274             else
1275             {
1276                 /* Pad with values that don't affect the result */
1277                 (*weight)[n] = detail::c_largeNegativeExponent;
1278             }
1279         }
1280         PackType weightPack = load<PackType>(weightData + i);
1281         weightPack          = gmx::exp(weightPack);
1282         weightSumPack       = weightSumPack + weightPack;
1283         store(weightData + i, weightPack);
1284     }
1285     /* Sum of probability weights */
1286     double weightSum = reduce(weightSumPack);
1287     GMX_RELEASE_ASSERT(weightSum > 0,
1288                        "zero probability weight when updating AWH probability weights.");
1289 
1290     /* Normalize probabilities to sum to 1 */
1291     double invWeightSum = 1 / weightSum;
1292 
1293     /* When there is a free energy lambda state axis remove the convolved contributions along that
1294      * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1295      * will be modified), but before normalizing the weights (below). */
1296     if (grid.hasLambdaAxis())
1297     {
1298         /* If there is only one axis the bias will not be convolved in any dimension. */
1299         if (grid.axis().size() == 1)
1300         {
1301             weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1302         }
1303         else
1304         {
1305             for (size_t i = 0; i < neighbors.size(); i++)
1306             {
1307                 const int neighbor = neighbors[i];
1308                 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1309                 {
1310                     weightSum -= weightData[i];
1311                 }
1312             }
1313             /* Subtracting potentially very large values above may lead to rounding errors, causing
1314              * negative weightSum, even in double precision. */
1315             weightSum = std::max(weightSum, GMX_DOUBLE_MIN);
1316         }
1317     }
1318 
1319     for (double& w : *weight)
1320     {
1321         w *= invWeightSum;
1322     }
1323 
1324     /* Return the convolved bias */
1325     return std::log(weightSum);
1326 }
1327 
calcConvolvedBias(const std::vector<DimParams> & dimParams,const BiasGrid & grid,const awh_dvec & coordValue) const1328 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1329                                     const BiasGrid&               grid,
1330                                     const awh_dvec&               coordValue) const
1331 {
1332     int              point     = grid.nearestIndex(coordValue);
1333     const GridPoint& gridPoint = grid.point(point);
1334 
1335     /* Sum the probability weights from the neighborhood of the given point */
1336     double weightSum = 0;
1337     for (int neighbor : gridPoint.neighbor)
1338     {
1339         /* No convolution is required along the lambda dimension. */
1340         if (pointsHaveDifferentLambda(grid, point, neighbor))
1341         {
1342             continue;
1343         }
1344         double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1345                                                     points_[neighbor].bias(), coordValue, {}, point);
1346         weightSum += std::exp(logWeight);
1347     }
1348 
1349     /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1350     return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1351 }
1352 
sampleProbabilityWeights(const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor)1353 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1354 {
1355     const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1356 
1357     /* Save weights for next update */
1358     for (size_t n = 0; n < neighbor.size(); n++)
1359     {
1360         points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1361     }
1362 
1363     /* Update the local update range. Two corner points define this rectangular
1364      * domain. We need to choose two new corner points such that the new domain
1365      * contains both the old update range and the current neighborhood.
1366      * In the simplest case when an update is performed every sample,
1367      * the update range would simply equal the current neighborhood.
1368      */
1369     int neighborStart = neighbor[0];
1370     int neighborLast  = neighbor[neighbor.size() - 1];
1371     for (int d = 0; d < grid.numDimensions(); d++)
1372     {
1373         int origin = grid.point(neighborStart).index[d];
1374         int last   = grid.point(neighborLast).index[d];
1375 
1376         if (origin > last)
1377         {
1378             /* Unwrap if wrapped around the boundary (only happens for periodic
1379              * boundaries). This has been already for the stored index interval.
1380              */
1381             /* TODO: what we want to do is to find the smallest the update
1382              * interval that contains all points that need to be updated.
1383              * This amounts to combining two intervals, the current
1384              * [origin, end] update interval and the new touched neighborhood
1385              * into a new interval that contains all points from both the old
1386              * intervals.
1387              *
1388              * For periodic boundaries it becomes slightly more complicated
1389              * than for closed boundaries because then it needs not be
1390              * true that origin < end (so one can't simply relate the origin/end
1391              * in the min()/max() below). The strategy here is to choose the
1392              * origin closest to a reference point (index 0) and then unwrap
1393              * the end index if needed and choose the largest end index.
1394              * This ensures that both intervals are in the new interval
1395              * but it's not necessarily the smallest.
1396              * Currently we solve this by going through each possibility
1397              * and checking them.
1398              */
1399             last += grid.axis(d).numPointsInPeriod();
1400         }
1401 
1402         originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1403         endUpdatelist_[d]    = std::max(endUpdatelist_[d], last);
1404     }
1405 }
1406 
sampleCoordAndPmf(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor,double convolvedBias)1407 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1408                                   const BiasGrid&               grid,
1409                                   gmx::ArrayRef<const double>   probWeightNeighbor,
1410                                   double                        convolvedBias)
1411 {
1412     /* Sampling-based deconvolution extracting the PMF.
1413      * Update the PMF histogram with the current coordinate value.
1414      *
1415      * Because of the finite width of the harmonic potential, the free energy
1416      * defined for each coordinate point does not exactly equal that of the
1417      * actual coordinate, the PMF. However, the PMF can be estimated by applying
1418      * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1419      * reweighted histogram of the coordinate value. Strictly, this relies on
1420      * the unknown normalization constant Z being either constant or known. Here,
1421      * neither is true except in the long simulation time limit. Empirically however,
1422      * it works (mainly because how the PMF histogram is rescaled).
1423      */
1424 
1425     const int                gridPointIndex  = coordState_.gridpointIndex();
1426     const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1427 
1428     /* Update the PMF of points along a lambda axis with their bias. */
1429     if (lambdaAxisIndex)
1430     {
1431         const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1432 
1433         std::vector<double> lambdaMarginalDistribution =
1434                 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1435 
1436         awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0], coordState_.coordValue()[1],
1437                                            coordState_.coordValue()[2], coordState_.coordValue()[3] };
1438         for (size_t i = 0; i < neighbors.size(); i++)
1439         {
1440             const int neighbor = neighbors[i];
1441             double    bias;
1442             if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1443             {
1444                 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1445                 if (neighbor == gridPointIndex)
1446                 {
1447                     bias = convolvedBias;
1448                 }
1449                 else
1450                 {
1451                     coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1452                     bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1453                 }
1454 
1455                 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1456                 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1457 
1458                 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1459                 {
1460                     points_[neighbor].samplePmf(weightedBias);
1461                 }
1462                 else
1463                 {
1464                     points_[neighbor].updatePmfUnvisited(weightedBias);
1465                 }
1466             }
1467         }
1468     }
1469     else
1470     {
1471         /* Only save coordinate data that is in range (the given index is always
1472          * in range even if the coordinate value is not).
1473          */
1474         if (grid.covers(coordState_.coordValue()))
1475         {
1476             /* Save PMF sum and keep a histogram of the sampled coordinate values */
1477             points_[gridPointIndex].samplePmf(convolvedBias);
1478         }
1479     }
1480 
1481     /* Save probability weights for the update */
1482     sampleProbabilityWeights(grid, probWeightNeighbor);
1483 }
1484 
initHistoryFromState(AwhBiasHistory * biasHistory) const1485 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1486 {
1487     biasHistory->pointState.resize(points_.size());
1488 }
1489 
updateHistory(AwhBiasHistory * biasHistory,const BiasGrid & grid) const1490 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1491 {
1492     GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1493                        "The AWH history setup does not match the AWH state.");
1494 
1495     AwhBiasStateHistory* stateHistory = &biasHistory->state;
1496     stateHistory->umbrellaGridpoint   = coordState_.umbrellaGridpoint();
1497 
1498     for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1499     {
1500         AwhPointStateHistory* psh = &biasHistory->pointState[m];
1501 
1502         points_[m].storeState(psh);
1503 
1504         psh->weightsum_covering = weightSumCovering_[m];
1505     }
1506 
1507     histogramSize_.storeState(stateHistory);
1508 
1509     stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1510     stateHistory->end_index_updatelist    = multiDimGridIndexToLinear(grid, endUpdatelist_);
1511 }
1512 
restoreFromHistory(const AwhBiasHistory & biasHistory,const BiasGrid & grid)1513 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1514 {
1515     const AwhBiasStateHistory& stateHistory = biasHistory.state;
1516 
1517     coordState_.restoreFromHistory(stateHistory);
1518 
1519     if (biasHistory.pointState.size() != points_.size())
1520     {
1521         GMX_THROW(
1522                 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1523                                   "Likely you provided a checkpoint from a different simulation."));
1524     }
1525     for (size_t m = 0; m < points_.size(); m++)
1526     {
1527         points_[m].setFromHistory(biasHistory.pointState[m]);
1528     }
1529 
1530     for (size_t m = 0; m < weightSumCovering_.size(); m++)
1531     {
1532         weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1533     }
1534 
1535     histogramSize_.restoreFromHistory(stateHistory);
1536 
1537     linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1538     linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1539 }
1540 
broadcast(const t_commrec * commRecord)1541 void BiasState::broadcast(const t_commrec* commRecord)
1542 {
1543     gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1544 
1545     gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1546 
1547     gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(),
1548               commRecord->mpi_comm_mygroup);
1549 
1550     gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1551 }
1552 
setFreeEnergyToConvolvedPmf(const std::vector<DimParams> & dimParams,const BiasGrid & grid)1553 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const BiasGrid& grid)
1554 {
1555     std::vector<float> convolvedPmf;
1556 
1557     calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1558 
1559     for (size_t m = 0; m < points_.size(); m++)
1560     {
1561         points_[m].setFreeEnergy(convolvedPmf[m]);
1562     }
1563 }
1564 
1565 /*! \brief
1566  * Count trailing data rows containing only zeros.
1567  *
1568  * \param[in] data        2D data array.
1569  * \param[in] numRows     Number of rows in array.
1570  * \param[in] numColumns  Number of cols in array.
1571  * \returns the number of trailing zero rows.
1572  */
countTrailingZeroRows(const double * const * data,int numRows,int numColumns)1573 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1574 {
1575     int numZeroRows = 0;
1576     for (int m = numRows - 1; m >= 0; m--)
1577     {
1578         bool rowIsZero = true;
1579         for (int d = 0; d < numColumns; d++)
1580         {
1581             if (data[d][m] != 0)
1582             {
1583                 rowIsZero = false;
1584                 break;
1585             }
1586         }
1587 
1588         if (!rowIsZero)
1589         {
1590             /* At a row with non-zero data */
1591             break;
1592         }
1593         else
1594         {
1595             /* Still at a zero data row, keep checking rows higher up. */
1596             numZeroRows++;
1597         }
1598     }
1599 
1600     return numZeroRows;
1601 }
1602 
1603 /*! \brief
1604  * Initializes the PMF and target with data read from an input table.
1605  *
1606  * \param[in]     dimParams   The dimension parameters.
1607  * \param[in]     grid        The grid.
1608  * \param[in]     filename    The filename to read PMF and target from.
1609  * \param[in]     numBias     Number of biases.
1610  * \param[in]     biasIndex   The index of the bias.
1611  * \param[in,out] pointState  The state of the points in this bias.
1612  */
readUserPmfAndTargetDistribution(const std::vector<DimParams> & dimParams,const BiasGrid & grid,const std::string & filename,int numBias,int biasIndex,std::vector<PointState> * pointState)1613 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1614                                              const BiasGrid&               grid,
1615                                              const std::string&            filename,
1616                                              int                           numBias,
1617                                              int                           biasIndex,
1618                                              std::vector<PointState>*      pointState)
1619 {
1620     /* Read the PMF and target distribution.
1621        From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1622        base on the force constant. The free energy and target together determine the bias.
1623      */
1624     std::string filenameModified(filename);
1625     if (numBias > 1)
1626     {
1627         size_t n = filenameModified.rfind('.');
1628         GMX_RELEASE_ASSERT(n != std::string::npos,
1629                            "The filename should contain an extension starting with .");
1630         filenameModified.insert(n, formatString("%d", biasIndex));
1631     }
1632 
1633     std::string correctFormatMessage = formatString(
1634             "%s is expected in the following format. "
1635             "The first ndim column(s) should contain the coordinate values for each point, "
1636             "each column containing values of one dimension (in ascending order). "
1637             "For a multidimensional coordinate, points should be listed "
1638             "in the order obtained by traversing lower dimensions first. "
1639             "E.g. for two-dimensional grid of size nxn: "
1640             "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1641             "Column ndim +  1 should contain the PMF value for each coordinate value. "
1642             "The target distribution values should be in column ndim + 2  or column ndim + 5. "
1643             "Make sure the input file ends with a new line but has no trailing new lines.",
1644             filename.c_str());
1645     gmx::TextLineWrapper wrapper;
1646     wrapper.settings().setLineLength(c_linewidth);
1647     correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1648 
1649     double** data;
1650     int      numColumns;
1651     int      numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1652 
1653     /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1654 
1655     if (numRows <= 0)
1656     {
1657         std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(),
1658                                              correctFormatMessage.c_str());
1659         GMX_THROW(InvalidInputError(mesg));
1660     }
1661 
1662     /* Less than 2 points is not useful for PMF or target. */
1663     if (numRows < 2)
1664     {
1665         std::string mesg = gmx::formatString(
1666                 "%s contains too few data points (%d)."
1667                 "The minimum number of points is 2.",
1668                 filename.c_str(), numRows);
1669         GMX_THROW(InvalidInputError(mesg));
1670     }
1671 
1672     /* Make sure there are enough columns of data.
1673 
1674        Two formats are allowed. Either with columns  {coords, PMF, target} or
1675        {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1676        is how AWH output is written (x, y, z being other AWH variables). For this format,
1677        trailing columns are ignored.
1678      */
1679     int columnIndexTarget;
1680     int numColumnsMin  = dimParams.size() + 2;
1681     int columnIndexPmf = dimParams.size();
1682     if (numColumns == numColumnsMin)
1683     {
1684         columnIndexTarget = columnIndexPmf + 1;
1685     }
1686     else
1687     {
1688         columnIndexTarget = columnIndexPmf + 4;
1689     }
1690 
1691     if (numColumns < numColumnsMin)
1692     {
1693         std::string mesg = gmx::formatString(
1694                 "The number of columns in %s should be at least %d."
1695                 "\n\n%s",
1696                 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1697         GMX_THROW(InvalidInputError(mesg));
1698     }
1699 
1700     /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1701        since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1702     int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1703     if (numZeroRows > 1)
1704     {
1705         std::string mesg = gmx::formatString(
1706                 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1707                 "try again.",
1708                 numZeroRows, filename.c_str());
1709         GMX_THROW(InvalidInputError(mesg));
1710     }
1711 
1712     /* Convert from user units to internal units before sending the data of to grid. */
1713     for (size_t d = 0; d < dimParams.size(); d++)
1714     {
1715         double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1716         if (scalingFactor == 1)
1717         {
1718             continue;
1719         }
1720         for (size_t m = 0; m < pointState->size(); m++)
1721         {
1722             data[d][m] *= scalingFactor;
1723         }
1724     }
1725 
1726     /* Get a data point for each AWH grid point so that they all get data. */
1727     std::vector<int> gridIndexToDataIndex(grid.numPoints());
1728     mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1729 
1730     /* Extract the data for each grid point.
1731      * We check if the target distribution is zero for all points.
1732      */
1733     bool targetDistributionIsZero = true;
1734     for (size_t m = 0; m < pointState->size(); m++)
1735     {
1736         (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1737         double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1738 
1739         /* Check if the values are allowed. */
1740         if (target < 0)
1741         {
1742             std::string mesg = gmx::formatString(
1743                     "Target distribution weight at point %zu (%g) in %s is negative.", m, target,
1744                     filename.c_str());
1745             GMX_THROW(InvalidInputError(mesg));
1746         }
1747         if (target > 0)
1748         {
1749             targetDistributionIsZero = false;
1750         }
1751         (*pointState)[m].setTargetConstantWeight(target);
1752     }
1753 
1754     if (targetDistributionIsZero)
1755     {
1756         std::string mesg =
1757                 gmx::formatString("The target weights given in column %d in %s are all 0",
1758                                   columnIndexTarget, filename.c_str());
1759         GMX_THROW(InvalidInputError(mesg));
1760     }
1761 
1762     /* Free the arrays. */
1763     for (int m = 0; m < numColumns; m++)
1764     {
1765         sfree(data[m]);
1766     }
1767     sfree(data);
1768 }
1769 
normalizePmf(int numSharingSims)1770 void BiasState::normalizePmf(int numSharingSims)
1771 {
1772     /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1773        Approximately (for large enough force constant) we should have:
1774        sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1775      */
1776 
1777     /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1778     double expSumPmf = 0;
1779     double expSumF   = 0;
1780     for (const PointState& pointState : points_)
1781     {
1782         if (pointState.inTargetRegion())
1783         {
1784             expSumPmf += std::exp(pointState.logPmfSum());
1785             expSumF += std::exp(-pointState.freeEnergy());
1786         }
1787     }
1788     double numSamples = histogramSize_.histogramSize() / numSharingSims;
1789 
1790     /* Renormalize */
1791     double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1792     for (PointState& pointState : points_)
1793     {
1794         if (pointState.inTargetRegion())
1795         {
1796             pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1797         }
1798     }
1799 }
1800 
initGridPointState(const AwhBiasParams & awhBiasParams,const std::vector<DimParams> & dimParams,const BiasGrid & grid,const BiasParams & params,const std::string & filename,int numBias)1801 void BiasState::initGridPointState(const AwhBiasParams&          awhBiasParams,
1802                                    const std::vector<DimParams>& dimParams,
1803                                    const BiasGrid&               grid,
1804                                    const BiasParams&             params,
1805                                    const std::string&            filename,
1806                                    int                           numBias)
1807 {
1808     /* Modify PMF, free energy and the constant target distribution factor
1809      * to user input values if there is data given.
1810      */
1811     if (awhBiasParams.bUserData)
1812     {
1813         readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1814         setFreeEnergyToConvolvedPmf(dimParams, grid);
1815     }
1816 
1817     /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1818     GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1819                        "AWH reference weight histogram not initialized properly with local "
1820                        "Boltzmann target distribution.");
1821 
1822     updateTargetDistribution(points_, params);
1823 
1824     for (PointState& pointState : points_)
1825     {
1826         if (pointState.inTargetRegion())
1827         {
1828             pointState.updateBias();
1829         }
1830         else
1831         {
1832             /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1833             pointState.setTargetToZero();
1834         }
1835     }
1836 
1837     /* Set the initial reference weighthistogram. */
1838     const double histogramSize = histogramSize_.histogramSize();
1839     for (auto& pointState : points_)
1840     {
1841         pointState.setInitialReferenceWeightHistogram(histogramSize);
1842     }
1843 
1844     /* Make sure the pmf is normalized consistently with the histogram size.
1845        Note: the target distribution and free energy need to be set here. */
1846     normalizePmf(params.numSharedUpdate);
1847 }
1848 
BiasState(const AwhBiasParams & awhBiasParams,double histogramSizeInitial,const std::vector<DimParams> & dimParams,const BiasGrid & grid)1849 BiasState::BiasState(const AwhBiasParams&          awhBiasParams,
1850                      double                        histogramSizeInitial,
1851                      const std::vector<DimParams>& dimParams,
1852                      const BiasGrid&               grid) :
1853     coordState_(awhBiasParams, dimParams, grid),
1854     points_(grid.numPoints()),
1855     weightSumCovering_(grid.numPoints()),
1856     histogramSize_(awhBiasParams, histogramSizeInitial)
1857 {
1858     /* The minimum and maximum multidimensional point indices that are affected by the next update */
1859     for (size_t d = 0; d < dimParams.size(); d++)
1860     {
1861         int index            = grid.point(coordState_.gridpointIndex()).index[d];
1862         originUpdatelist_[d] = index;
1863         endUpdatelist_[d]    = index;
1864     }
1865 }
1866 
1867 } // namespace gmx
1868