1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2015,2016,2017,2018,2019, The GROMACS development team.
5 * Copyright (c) 2020,2021, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
9 *
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
14 *
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
32 *
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
35 */
36
37 /*! \internal \file
38 * \brief
39 * Implements the BiasState class.
40 *
41 * \author Viveca Lindahl
42 * \author Berk Hess <hess@kth.se>
43 * \ingroup module_awh
44 */
45
46 #include "gmxpre.h"
47
48 #include "biasstate.h"
49
50 #include <cassert>
51 #include <cmath>
52 #include <cstdio>
53 #include <cstdlib>
54 #include <cstring>
55
56 #include <algorithm>
57 #include <optional>
58
59 #include "gromacs/fileio/gmxfio.h"
60 #include "gromacs/fileio/xvgr.h"
61 #include "gromacs/gmxlib/network.h"
62 #include "gromacs/math/utilities.h"
63 #include "gromacs/mdrunutility/multisim.h"
64 #include "gromacs/mdtypes/awh_history.h"
65 #include "gromacs/mdtypes/awh_params.h"
66 #include "gromacs/mdtypes/commrec.h"
67 #include "gromacs/simd/simd.h"
68 #include "gromacs/simd/simd_math.h"
69 #include "gromacs/utility/arrayref.h"
70 #include "gromacs/utility/exceptions.h"
71 #include "gromacs/utility/gmxassert.h"
72 #include "gromacs/utility/smalloc.h"
73 #include "gromacs/utility/stringutil.h"
74
75 #include "biasgrid.h"
76 #include "pointstate.h"
77
78 namespace gmx
79 {
80
getPmf(gmx::ArrayRef<float> pmf) const81 void BiasState::getPmf(gmx::ArrayRef<float> pmf) const
82 {
83 GMX_ASSERT(pmf.size() == points_.size(), "pmf should have the size of the bias grid");
84
85 /* The PMF is just the negative of the log of the sampled PMF histogram.
86 * Points with zero target weight are ignored, they will mostly contain noise.
87 */
88 for (size_t i = 0; i < points_.size(); i++)
89 {
90 pmf[i] = points_[i].inTargetRegion() ? -points_[i].logPmfSum() : GMX_FLOAT_MAX;
91 }
92 }
93
94 namespace
95 {
96
97 /*! \brief
98 * Sum an array over all simulations on the master rank of each simulation.
99 *
100 * \param[in,out] arrayRef The data to sum.
101 * \param[in] multiSimComm Struct for multi-simulation communication.
102 */
sumOverSimulations(gmx::ArrayRef<int> arrayRef,const gmx_multisim_t * multiSimComm)103 void sumOverSimulations(gmx::ArrayRef<int> arrayRef, const gmx_multisim_t* multiSimComm)
104 {
105 gmx_sumi_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
106 }
107
108 /*! \brief
109 * Sum an array over all simulations on the master rank of each simulation.
110 *
111 * \param[in,out] arrayRef The data to sum.
112 * \param[in] multiSimComm Struct for multi-simulation communication.
113 */
sumOverSimulations(gmx::ArrayRef<double> arrayRef,const gmx_multisim_t * multiSimComm)114 void sumOverSimulations(gmx::ArrayRef<double> arrayRef, const gmx_multisim_t* multiSimComm)
115 {
116 gmx_sumd_sim(arrayRef.size(), arrayRef.data(), multiSimComm);
117 }
118
119 /*! \brief
120 * Sum an array over all simulations on all ranks of each simulation.
121 *
122 * This assumes the data is identical on all ranks within each simulation.
123 *
124 * \param[in,out] arrayRef The data to sum.
125 * \param[in] commRecord Struct for intra-simulation communication.
126 * \param[in] multiSimComm Struct for multi-simulation communication.
127 */
128 template<typename T>
sumOverSimulations(gmx::ArrayRef<T> arrayRef,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm)129 void sumOverSimulations(gmx::ArrayRef<T> arrayRef, const t_commrec* commRecord, const gmx_multisim_t* multiSimComm)
130 {
131 if (MASTER(commRecord))
132 {
133 sumOverSimulations(arrayRef, multiSimComm);
134 }
135 if (commRecord->nnodes > 1)
136 {
137 gmx_bcast(arrayRef.size() * sizeof(T), arrayRef.data(), commRecord->mpi_comm_mygroup);
138 }
139 }
140
141 /*! \brief
142 * Sum PMF over multiple simulations, when requested.
143 *
144 * \param[in,out] pointState The state of the points in the bias.
145 * \param[in] numSharedUpdate The number of biases sharing the histogram.
146 * \param[in] commRecord Struct for intra-simulation communication.
147 * \param[in] multiSimComm Struct for multi-simulation communication.
148 */
sumPmf(gmx::ArrayRef<PointState> pointState,int numSharedUpdate,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm)149 void sumPmf(gmx::ArrayRef<PointState> pointState,
150 int numSharedUpdate,
151 const t_commrec* commRecord,
152 const gmx_multisim_t* multiSimComm)
153 {
154 if (numSharedUpdate == 1)
155 {
156 return;
157 }
158 GMX_ASSERT(multiSimComm != nullptr && numSharedUpdate % multiSimComm->numSimulations_ == 0,
159 "numSharedUpdate should be a multiple of multiSimComm->numSimulations_");
160 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
161 "Sharing within a simulation is not implemented (yet)");
162
163 std::vector<double> buffer(pointState.size());
164
165 /* Need to temporarily exponentiate the log weights to sum over simulations */
166 for (size_t i = 0; i < buffer.size(); i++)
167 {
168 buffer[i] = pointState[i].inTargetRegion() ? std::exp(pointState[i].logPmfSum()) : 0;
169 }
170
171 sumOverSimulations(gmx::ArrayRef<double>(buffer), commRecord, multiSimComm);
172
173 /* Take log again to get (non-normalized) PMF */
174 double normFac = 1.0 / numSharedUpdate;
175 for (gmx::index i = 0; i < pointState.ssize(); i++)
176 {
177 if (pointState[i].inTargetRegion())
178 {
179 pointState[i].setLogPmfSum(std::log(buffer[i] * normFac));
180 }
181 }
182 }
183
184 /*! \brief
185 * Find the minimum free energy value.
186 *
187 * \param[in] pointState The state of the points.
188 * \returns the minimum free energy value.
189 */
freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)190 double freeEnergyMinimumValue(gmx::ArrayRef<const PointState> pointState)
191 {
192 double fMin = GMX_FLOAT_MAX;
193
194 for (auto const& ps : pointState)
195 {
196 if (ps.inTargetRegion() && ps.freeEnergy() < fMin)
197 {
198 fMin = ps.freeEnergy();
199 }
200 }
201
202 return fMin;
203 }
204
205 /*! \brief
206 * Find and return the log of the probability weight of a point given a coordinate value.
207 *
208 * The unnormalized weight is given by
209 * w(point|value) = exp(bias(point) - U(value,point)),
210 * where U is a harmonic umbrella potential.
211 *
212 * \param[in] dimParams The bias dimensions parameters
213 * \param[in] points The point state.
214 * \param[in] grid The grid.
215 * \param[in] pointIndex Point to evaluate probability weight for.
216 * \param[in] pointBias Bias for the point (as a log weight).
217 * \param[in] value Coordinate value.
218 * \param[in] neighborLambdaEnergies The energy of the system in neighboring lambdas states. Can be
219 * empty when there are no free energy lambda state dimensions.
220 * \param[in] gridpointIndex The index of the current grid point.
221 * \returns the log of the biased probability weight.
222 */
biasedLogWeightFromPoint(const std::vector<DimParams> & dimParams,const std::vector<PointState> & points,const BiasGrid & grid,int pointIndex,double pointBias,const awh_dvec value,gmx::ArrayRef<const double> neighborLambdaEnergies,int gridpointIndex)223 double biasedLogWeightFromPoint(const std::vector<DimParams>& dimParams,
224 const std::vector<PointState>& points,
225 const BiasGrid& grid,
226 int pointIndex,
227 double pointBias,
228 const awh_dvec value,
229 gmx::ArrayRef<const double> neighborLambdaEnergies,
230 int gridpointIndex)
231 {
232 double logWeight = detail::c_largeNegativeExponent;
233
234 /* Only points in the target region have non-zero weight */
235 if (points[pointIndex].inTargetRegion())
236 {
237 logWeight = pointBias;
238
239 /* Add potential for all parameter dimensions */
240 for (size_t d = 0; d < dimParams.size(); d++)
241 {
242 if (dimParams[d].isFepLambdaDimension())
243 {
244 /* If this is not a sampling step or if this function is called from
245 * calcConvolvedBias(), when writing energy subblocks, neighborLambdaEnergies will
246 * be empty. No convolution is required along the lambda dimension. */
247 if (!neighborLambdaEnergies.empty())
248 {
249 const int pointLambdaIndex = grid.point(pointIndex).coordValue[d];
250 const int gridpointLambdaIndex = grid.point(gridpointIndex).coordValue[d];
251 logWeight -= dimParams[d].fepDimParams().beta
252 * (neighborLambdaEnergies[pointLambdaIndex]
253 - neighborLambdaEnergies[gridpointLambdaIndex]);
254 }
255 }
256 else
257 {
258 double dev = getDeviationFromPointAlongGridAxis(grid, d, pointIndex, value[d]);
259 logWeight -= 0.5 * dimParams[d].pullDimParams().betak * dev * dev;
260 }
261 }
262 }
263 return logWeight;
264 }
265
266 /*! \brief
267 * Calculates the marginal distribution (marginal probability) for each value along
268 * a free energy lambda axis.
269 * The marginal distribution of one coordinate dimension value is the sum of the probability
270 * distribution of all values (herein all neighbor values) with the same value in the dimension
271 * of interest.
272 * \param[in] grid The bias grid.
273 * \param[in] neighbors The points to use for the calculation of the marginal distribution.
274 * \param[in] probWeightNeighbor Probability weights of the neighbors.
275 * \returns The calculated marginal distribution in a 1D array with
276 * as many elements as there are points along the axis of interest.
277 */
calculateFELambdaMarginalDistribution(const BiasGrid & grid,gmx::ArrayRef<const int> neighbors,gmx::ArrayRef<const double> probWeightNeighbor)278 std::vector<double> calculateFELambdaMarginalDistribution(const BiasGrid& grid,
279 gmx::ArrayRef<const int> neighbors,
280 gmx::ArrayRef<const double> probWeightNeighbor)
281 {
282 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
283 GMX_RELEASE_ASSERT(lambdaAxisIndex,
284 "There must be a free energy lambda axis in order to calculate the free "
285 "energy lambda marginal distribution.");
286 const int numFepLambdaStates = grid.numFepLambdaStates();
287 std::vector<double> lambdaMarginalDistribution(numFepLambdaStates, 0);
288
289 for (size_t i = 0; i < neighbors.size(); i++)
290 {
291 const int neighbor = neighbors[i];
292 const int lambdaState = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
293 lambdaMarginalDistribution[lambdaState] += probWeightNeighbor[i];
294 }
295 return lambdaMarginalDistribution;
296 }
297
298 } // namespace
299
calcConvolvedPmf(const std::vector<DimParams> & dimParams,const BiasGrid & grid,std::vector<float> * convolvedPmf) const300 void BiasState::calcConvolvedPmf(const std::vector<DimParams>& dimParams,
301 const BiasGrid& grid,
302 std::vector<float>* convolvedPmf) const
303 {
304 size_t numPoints = grid.numPoints();
305
306 convolvedPmf->resize(numPoints);
307
308 /* Get the PMF to convolve. */
309 std::vector<float> pmf(numPoints);
310 getPmf(pmf);
311
312 for (size_t m = 0; m < numPoints; m++)
313 {
314 double freeEnergyWeights = 0;
315 const GridPoint& point = grid.point(m);
316 for (auto& neighbor : point.neighbor)
317 {
318 /* Do not convolve the bias along a lambda axis - only use the pmf from the current point */
319 if (!pointsHaveDifferentLambda(grid, m, neighbor))
320 {
321 /* The negative PMF is a positive bias. */
322 double biasNeighbor = -pmf[neighbor];
323
324 /* Add the convolved PMF weights for the neighbors of this point.
325 Note that this function only adds point within the target > 0 region.
326 Sum weights, take the logarithm last to get the free energy. */
327 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
328 biasNeighbor, point.coordValue, {}, m);
329 freeEnergyWeights += std::exp(logWeight);
330 }
331 }
332
333 GMX_RELEASE_ASSERT(freeEnergyWeights > 0,
334 "Attempting to do log(<= 0) in AWH convolved PMF calculation.");
335 (*convolvedPmf)[m] = -std::log(static_cast<float>(freeEnergyWeights));
336 }
337 }
338
339 namespace
340 {
341
342 /*! \brief
343 * Updates the target distribution for all points.
344 *
345 * The target distribution is always updated for all points
346 * at the same time.
347 *
348 * \param[in,out] pointState The state of all points.
349 * \param[in] params The bias parameters.
350 */
updateTargetDistribution(gmx::ArrayRef<PointState> pointState,const BiasParams & params)351 void updateTargetDistribution(gmx::ArrayRef<PointState> pointState, const BiasParams& params)
352 {
353 double freeEnergyCutoff = 0;
354 if (params.eTarget == eawhtargetCUTOFF)
355 {
356 freeEnergyCutoff = freeEnergyMinimumValue(pointState) + params.freeEnergyCutoffInKT;
357 }
358
359 double sumTarget = 0;
360 for (PointState& ps : pointState)
361 {
362 sumTarget += ps.updateTargetWeight(params, freeEnergyCutoff);
363 }
364 GMX_RELEASE_ASSERT(sumTarget > 0, "We should have a non-zero distribution");
365
366 /* Normalize to 1 */
367 double invSum = 1.0 / sumTarget;
368 for (PointState& ps : pointState)
369 {
370 ps.scaleTarget(invSum);
371 }
372 }
373
374 /*! \brief
375 * Puts together a string describing a grid point.
376 *
377 * \param[in] grid The grid.
378 * \param[in] point BiasGrid point index.
379 * \returns a string for the point.
380 */
gridPointValueString(const BiasGrid & grid,int point)381 std::string gridPointValueString(const BiasGrid& grid, int point)
382 {
383 std::string pointString;
384
385 pointString += "(";
386
387 for (int d = 0; d < grid.numDimensions(); d++)
388 {
389 pointString += gmx::formatString("%g", grid.point(point).coordValue[d]);
390 if (d < grid.numDimensions() - 1)
391 {
392 pointString += ",";
393 }
394 else
395 {
396 pointString += ")";
397 }
398 }
399
400 return pointString;
401 }
402
403 } // namespace
404
warnForHistogramAnomalies(const BiasGrid & grid,int biasIndex,double t,FILE * fplog,int maxNumWarnings) const405 int BiasState::warnForHistogramAnomalies(const BiasGrid& grid, int biasIndex, double t, FILE* fplog, int maxNumWarnings) const
406 {
407 GMX_ASSERT(fplog != nullptr, "Warnings can only be issued if there is log file.");
408 const double maxHistogramRatio = 0.5; /* Tolerance for printing a warning about the histogram ratios */
409
410 /* Sum up the histograms and get their normalization */
411 double sumVisits = 0;
412 double sumWeights = 0;
413 for (auto& pointState : points_)
414 {
415 if (pointState.inTargetRegion())
416 {
417 sumVisits += pointState.numVisitsTot();
418 sumWeights += pointState.weightSumTot();
419 }
420 }
421 GMX_RELEASE_ASSERT(sumVisits > 0, "We should have visits");
422 GMX_RELEASE_ASSERT(sumWeights > 0, "We should have weight");
423 double invNormVisits = 1.0 / sumVisits;
424 double invNormWeight = 1.0 / sumWeights;
425
426 /* Check all points for warnings */
427 int numWarnings = 0;
428 size_t numPoints = grid.numPoints();
429 for (size_t m = 0; m < numPoints; m++)
430 {
431 /* Skip points close to boundary or non-target region */
432 const GridPoint& gridPoint = grid.point(m);
433 bool skipPoint = false;
434 for (size_t n = 0; (n < gridPoint.neighbor.size()) && !skipPoint; n++)
435 {
436 int neighbor = gridPoint.neighbor[n];
437 skipPoint = !points_[neighbor].inTargetRegion();
438 for (int d = 0; (d < grid.numDimensions()) && !skipPoint; d++)
439 {
440 const GridPoint& neighborPoint = grid.point(neighbor);
441 skipPoint = neighborPoint.index[d] == 0
442 || neighborPoint.index[d] == grid.axis(d).numPoints() - 1;
443 }
444 }
445
446 /* Warn if the coordinate distribution is less than the target distribution with a certain fraction somewhere */
447 const double relativeWeight = points_[m].weightSumTot() * invNormWeight;
448 const double relativeVisits = points_[m].numVisitsTot() * invNormVisits;
449 if (!skipPoint && relativeVisits < relativeWeight * maxHistogramRatio)
450 {
451 std::string pointValueString = gridPointValueString(grid, m);
452 std::string warningMessage = gmx::formatString(
453 "\nawh%d warning: "
454 "at t = %g ps the obtained coordinate distribution at coordinate value %s "
455 "is less than a fraction %g of the reference distribution at that point. "
456 "If you are not certain about your settings you might want to increase your "
457 "pull force constant or "
458 "modify your sampling region.\n",
459 biasIndex + 1, t, pointValueString.c_str(), maxHistogramRatio);
460 gmx::TextLineWrapper wrapper;
461 wrapper.settings().setLineLength(c_linewidth);
462 fprintf(fplog, "%s", wrapper.wrapToString(warningMessage).c_str());
463
464 numWarnings++;
465 }
466 if (numWarnings >= maxNumWarnings)
467 {
468 break;
469 }
470 }
471
472 return numWarnings;
473 }
474
calcUmbrellaForceAndPotential(const std::vector<DimParams> & dimParams,const BiasGrid & grid,int point,ArrayRef<const double> neighborLambdaDhdl,gmx::ArrayRef<double> force) const475 double BiasState::calcUmbrellaForceAndPotential(const std::vector<DimParams>& dimParams,
476 const BiasGrid& grid,
477 int point,
478 ArrayRef<const double> neighborLambdaDhdl,
479 gmx::ArrayRef<double> force) const
480 {
481 double potential = 0;
482 for (size_t d = 0; d < dimParams.size(); d++)
483 {
484 if (dimParams[d].isFepLambdaDimension())
485 {
486 if (!neighborLambdaDhdl.empty())
487 {
488 const int coordpointLambdaIndex = grid.point(point).coordValue[d];
489 force[d] = neighborLambdaDhdl[coordpointLambdaIndex];
490 /* The potential should not be affected by the lambda dimension. */
491 }
492 }
493 else
494 {
495 double deviation =
496 getDeviationFromPointAlongGridAxis(grid, d, point, coordState_.coordValue()[d]);
497 double k = dimParams[d].pullDimParams().k;
498
499 /* Force from harmonic potential 0.5*k*dev^2 */
500 force[d] = -k * deviation;
501 potential += 0.5 * k * deviation * deviation;
502 }
503 }
504
505 return potential;
506 }
507
calcConvolvedForce(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor,ArrayRef<const double> neighborLambdaDhdl,gmx::ArrayRef<double> forceWorkBuffer,gmx::ArrayRef<double> force) const508 void BiasState::calcConvolvedForce(const std::vector<DimParams>& dimParams,
509 const BiasGrid& grid,
510 gmx::ArrayRef<const double> probWeightNeighbor,
511 ArrayRef<const double> neighborLambdaDhdl,
512 gmx::ArrayRef<double> forceWorkBuffer,
513 gmx::ArrayRef<double> force) const
514 {
515 for (size_t d = 0; d < dimParams.size(); d++)
516 {
517 force[d] = 0;
518 }
519
520 /* Only neighboring points have non-negligible contribution. */
521 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
522 gmx::ArrayRef<double> forceFromNeighbor = forceWorkBuffer;
523 for (size_t n = 0; n < neighbor.size(); n++)
524 {
525 double weightNeighbor = probWeightNeighbor[n];
526 int indexNeighbor = neighbor[n];
527
528 /* Get the umbrella force from this point. The returned potential is ignored here. */
529 calcUmbrellaForceAndPotential(dimParams, grid, indexNeighbor, neighborLambdaDhdl, forceFromNeighbor);
530
531 /* Add the weighted umbrella force to the convolved force. */
532 for (size_t d = 0; d < dimParams.size(); d++)
533 {
534 force[d] += forceFromNeighbor[d] * weightNeighbor;
535 }
536 }
537 }
538
moveUmbrella(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor,ArrayRef<const double> neighborLambdaDhdl,gmx::ArrayRef<double> biasForce,int64_t step,int64_t seed,int indexSeed)539 double BiasState::moveUmbrella(const std::vector<DimParams>& dimParams,
540 const BiasGrid& grid,
541 gmx::ArrayRef<const double> probWeightNeighbor,
542 ArrayRef<const double> neighborLambdaDhdl,
543 gmx::ArrayRef<double> biasForce,
544 int64_t step,
545 int64_t seed,
546 int indexSeed)
547 {
548 /* Generate and set a new coordinate reference value */
549 coordState_.sampleUmbrellaGridpoint(grid, coordState_.gridpointIndex(), probWeightNeighbor,
550 step, seed, indexSeed);
551
552 std::vector<double> newForce(dimParams.size());
553 double newPotential = calcUmbrellaForceAndPotential(
554 dimParams, grid, coordState_.umbrellaGridpoint(), neighborLambdaDhdl, newForce);
555
556 /* A modification of the reference value at time t will lead to a different
557 force over t-dt/2 to t and over t to t+dt/2. For high switching rates
558 this means the force and velocity will change signs roughly as often.
559 To avoid any issues we take the average of the previous and new force
560 at steps when the reference value has been moved. E.g. if the ref. value
561 is set every step to (coord dvalue +/- delta) would give zero force.
562 */
563 for (gmx::index d = 0; d < biasForce.ssize(); d++)
564 {
565 /* Average of the current and new force */
566 biasForce[d] = 0.5 * (biasForce[d] + newForce[d]);
567 }
568
569 return newPotential;
570 }
571
572 namespace
573 {
574
575 /*! \brief
576 * Sets the histogram rescaling factors needed to control the histogram size.
577 *
578 * For sake of robustness, the reference weight histogram can grow at a rate
579 * different from the actual sampling rate. Typically this happens for a limited
580 * initial time, alternatively growth is scaled down by a constant factor for all
581 * times. Since the size of the reference histogram sets the size of the free
582 * energy update this should be reflected also in the PMF. Thus the PMF histogram
583 * needs to be rescaled too.
584 *
585 * This function should only be called by the bias update function or wrapped by a function that
586 * knows what scale factors should be applied when, e.g,
587 * getSkippedUpdateHistogramScaleFactors().
588 *
589 * \param[in] params The bias parameters.
590 * \param[in] newHistogramSize New reference weight histogram size.
591 * \param[in] oldHistogramSize Previous reference weight histogram size (before adding new samples).
592 * \param[out] weightHistScaling Scaling factor for the reference weight histogram.
593 * \param[out] logPmfSumScaling Log of the scaling factor for the PMF histogram.
594 */
setHistogramUpdateScaleFactors(const BiasParams & params,double newHistogramSize,double oldHistogramSize,double * weightHistScaling,double * logPmfSumScaling)595 void setHistogramUpdateScaleFactors(const BiasParams& params,
596 double newHistogramSize,
597 double oldHistogramSize,
598 double* weightHistScaling,
599 double* logPmfSumScaling)
600 {
601
602 /* The two scaling factors below are slightly different (ignoring the log factor) because the
603 reference and the PMF histogram apply weight scaling differently. The weight histogram
604 applies is locally, i.e. each sample is scaled down meaning all samples get equal weight.
605 It is done this way because that is what target type local Boltzmann (for which
606 target = weight histogram) needs. In contrast, the PMF histogram is rescaled globally
607 by repeatedly scaling down the whole histogram. The reasons for doing it this way are:
608 1) empirically this is necessary for converging the PMF; 2) since the extraction of
609 the PMF is theoretically only valid for a constant bias, new samples should get more
610 weight than old ones for which the bias is fluctuating more. */
611 *weightHistScaling =
612 newHistogramSize / (oldHistogramSize + params.updateWeight * params.localWeightScaling);
613 *logPmfSumScaling = std::log(newHistogramSize / (oldHistogramSize + params.updateWeight));
614 }
615
616 } // namespace
617
getSkippedUpdateHistogramScaleFactors(const BiasParams & params,double * weightHistScaling,double * logPmfSumScaling) const618 void BiasState::getSkippedUpdateHistogramScaleFactors(const BiasParams& params,
619 double* weightHistScaling,
620 double* logPmfSumScaling) const
621 {
622 GMX_ASSERT(params.skipUpdates(),
623 "Calling function for skipped updates when skipping updates is not allowed");
624
625 if (inInitialStage())
626 {
627 /* In between global updates the reference histogram size is kept constant so we trivially
628 know what the histogram size was at the time of the skipped update. */
629 double histogramSize = histogramSize_.histogramSize();
630 setHistogramUpdateScaleFactors(params, histogramSize, histogramSize, weightHistScaling,
631 logPmfSumScaling);
632 }
633 else
634 {
635 /* In the final stage, the reference histogram grows at the sampling rate which gives trivial scale factors. */
636 *weightHistScaling = 1;
637 *logPmfSumScaling = 0;
638 }
639 }
640
doSkippedUpdatesForAllPoints(const BiasParams & params)641 void BiasState::doSkippedUpdatesForAllPoints(const BiasParams& params)
642 {
643 double weightHistScaling;
644 double logPmfsumScaling;
645
646 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
647
648 for (auto& pointState : points_)
649 {
650 bool didUpdate = pointState.performPreviouslySkippedUpdates(
651 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
652
653 /* Update the bias for this point only if there were skipped updates in the past to avoid calculating the log unneccessarily */
654 if (didUpdate)
655 {
656 pointState.updateBias();
657 }
658 }
659 }
660
doSkippedUpdatesInNeighborhood(const BiasParams & params,const BiasGrid & grid)661 void BiasState::doSkippedUpdatesInNeighborhood(const BiasParams& params, const BiasGrid& grid)
662 {
663 double weightHistScaling;
664 double logPmfsumScaling;
665
666 getSkippedUpdateHistogramScaleFactors(params, &weightHistScaling, &logPmfsumScaling);
667
668 /* For each neighbor point of the center point, refresh its state by adding the results of all past, skipped updates. */
669 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
670 for (auto& neighbor : neighbors)
671 {
672 bool didUpdate = points_[neighbor].performPreviouslySkippedUpdates(
673 params, histogramSize_.numUpdates(), weightHistScaling, logPmfsumScaling);
674
675 if (didUpdate)
676 {
677 points_[neighbor].updateBias();
678 }
679 }
680 }
681
682 namespace
683 {
684
685 /*! \brief
686 * Merge update lists from multiple sharing simulations.
687 *
688 * \param[in,out] updateList Update list for this simulation (assumed >= npoints long).
689 * \param[in] numPoints Total number of points.
690 * \param[in] commRecord Struct for intra-simulation communication.
691 * \param[in] multiSimComm Struct for multi-simulation communication.
692 */
mergeSharedUpdateLists(std::vector<int> * updateList,int numPoints,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm)693 void mergeSharedUpdateLists(std::vector<int>* updateList,
694 int numPoints,
695 const t_commrec* commRecord,
696 const gmx_multisim_t* multiSimComm)
697 {
698 std::vector<int> numUpdatesOfPoint;
699
700 /* Flag the update points of this sim.
701 TODO: we can probably avoid allocating this array and just use the input array. */
702 numUpdatesOfPoint.resize(numPoints, 0);
703 for (auto& pointIndex : *updateList)
704 {
705 numUpdatesOfPoint[pointIndex] = 1;
706 }
707
708 /* Sum over the sims to get all the flagged points */
709 sumOverSimulations(arrayRefFromArray(numUpdatesOfPoint.data(), numPoints), commRecord, multiSimComm);
710
711 /* Collect the indices of the flagged points in place. The resulting array will be the merged update list.*/
712 updateList->clear();
713 for (int m = 0; m < numPoints; m++)
714 {
715 if (numUpdatesOfPoint[m] > 0)
716 {
717 updateList->push_back(m);
718 }
719 }
720 }
721
722 /*! \brief
723 * Generate an update list of points sampled since the last update.
724 *
725 * \param[in] grid The AWH bias.
726 * \param[in] points The point state.
727 * \param[in] originUpdatelist The origin of the rectangular region that has been sampled since
728 * last update. \param[in] endUpdatelist The end of the rectangular that has been sampled since
729 * last update. \param[in,out] updateList Local update list to set (assumed >= npoints long).
730 */
makeLocalUpdateList(const BiasGrid & grid,const std::vector<PointState> & points,const awh_ivec originUpdatelist,const awh_ivec endUpdatelist,std::vector<int> * updateList)731 void makeLocalUpdateList(const BiasGrid& grid,
732 const std::vector<PointState>& points,
733 const awh_ivec originUpdatelist,
734 const awh_ivec endUpdatelist,
735 std::vector<int>* updateList)
736 {
737 awh_ivec origin;
738 awh_ivec numPoints;
739
740 /* Define the update search grid */
741 for (int d = 0; d < grid.numDimensions(); d++)
742 {
743 origin[d] = originUpdatelist[d];
744 numPoints[d] = endUpdatelist[d] - originUpdatelist[d] + 1;
745
746 /* Because the end_updatelist is unwrapped it can be > (npoints - 1) so that numPoints can be > npoints in grid.
747 This helps for calculating the distance/number of points but should be removed and fixed when the way of
748 updating origin/end updatelist is changed (see sampleProbabilityWeights). */
749 numPoints[d] = std::min(grid.axis(d).numPoints(), numPoints[d]);
750 }
751
752 /* Make the update list */
753 updateList->clear();
754 int pointIndex = -1;
755 bool pointExists = true;
756 while (pointExists)
757 {
758 pointExists = advancePointInSubgrid(grid, origin, numPoints, &pointIndex);
759
760 if (pointExists && points[pointIndex].inTargetRegion())
761 {
762 updateList->push_back(pointIndex);
763 }
764 }
765 }
766
767 } // namespace
768
resetLocalUpdateRange(const BiasGrid & grid)769 void BiasState::resetLocalUpdateRange(const BiasGrid& grid)
770 {
771 const int gridpointIndex = coordState_.gridpointIndex();
772 for (int d = 0; d < grid.numDimensions(); d++)
773 {
774 /* This gives the minimum range consisting only of the current closest point. */
775 originUpdatelist_[d] = grid.point(gridpointIndex).index[d];
776 endUpdatelist_[d] = grid.point(gridpointIndex).index[d];
777 }
778 }
779
780 namespace
781 {
782
783 /*! \brief
784 * Add partial histograms (accumulating between updates) to accumulating histograms.
785 *
786 * \param[in,out] pointState The state of the points in the bias.
787 * \param[in,out] weightSumCovering The weights for checking covering.
788 * \param[in] numSharedUpdate The number of biases sharing the histrogram.
789 * \param[in] commRecord Struct for intra-simulation communication.
790 * \param[in] multiSimComm Struct for multi-simulation communication.
791 * \param[in] localUpdateList List of points with data.
792 */
sumHistograms(gmx::ArrayRef<PointState> pointState,gmx::ArrayRef<double> weightSumCovering,int numSharedUpdate,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm,const std::vector<int> & localUpdateList)793 void sumHistograms(gmx::ArrayRef<PointState> pointState,
794 gmx::ArrayRef<double> weightSumCovering,
795 int numSharedUpdate,
796 const t_commrec* commRecord,
797 const gmx_multisim_t* multiSimComm,
798 const std::vector<int>& localUpdateList)
799 {
800 /* The covering checking histograms are added before summing over simulations, so that the
801 weights from different simulations are kept distinguishable. */
802 for (int globalIndex : localUpdateList)
803 {
804 weightSumCovering[globalIndex] += pointState[globalIndex].weightSumIteration();
805 }
806
807 /* Sum histograms over multiple simulations if needed. */
808 if (numSharedUpdate > 1)
809 {
810 GMX_ASSERT(numSharedUpdate == multiSimComm->numSimulations_,
811 "Sharing within a simulation is not implemented (yet)");
812
813 /* Collect the weights and counts in linear arrays to be able to use gmx_sumd_sim. */
814 std::vector<double> weightSum;
815 std::vector<double> coordVisits;
816
817 weightSum.resize(localUpdateList.size());
818 coordVisits.resize(localUpdateList.size());
819
820 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
821 {
822 const PointState& ps = pointState[localUpdateList[localIndex]];
823
824 weightSum[localIndex] = ps.weightSumIteration();
825 coordVisits[localIndex] = ps.numVisitsIteration();
826 }
827
828 sumOverSimulations(gmx::ArrayRef<double>(weightSum), commRecord, multiSimComm);
829 sumOverSimulations(gmx::ArrayRef<double>(coordVisits), commRecord, multiSimComm);
830
831 /* Transfer back the result */
832 for (size_t localIndex = 0; localIndex < localUpdateList.size(); localIndex++)
833 {
834 PointState& ps = pointState[localUpdateList[localIndex]];
835
836 ps.setPartialWeightAndCount(weightSum[localIndex], coordVisits[localIndex]);
837 }
838 }
839
840 /* Now add the partial counts and weights to the accumulating histograms.
841 Note: we still need to use the weights for the update so we wait
842 with resetting them until the end of the update. */
843 for (int globalIndex : localUpdateList)
844 {
845 pointState[globalIndex].addPartialWeightAndCount();
846 }
847 }
848
849 /*! \brief
850 * Label points along an axis as covered or not.
851 *
852 * A point is covered if it is surrounded by visited points up to a radius = coverRadius.
853 *
854 * \param[in] visited Visited? For each point.
855 * \param[in] checkCovering Check for covering? For each point.
856 * \param[in] numPoints The number of grid points along this dimension.
857 * \param[in] period Period in number of points.
858 * \param[in] coverRadius Cover radius, in points, needed for defining a point as covered.
859 * \param[in,out] covered In this array elements are 1 for covered points and 0 for
860 * non-covered points, this routine assumes that \p covered has at least size \p numPoints.
861 */
labelCoveredPoints(const std::vector<bool> & visited,const std::vector<bool> & checkCovering,int numPoints,int period,int coverRadius,gmx::ArrayRef<int> covered)862 void labelCoveredPoints(const std::vector<bool>& visited,
863 const std::vector<bool>& checkCovering,
864 int numPoints,
865 int period,
866 int coverRadius,
867 gmx::ArrayRef<int> covered)
868 {
869 GMX_ASSERT(covered.ssize() >= numPoints, "covered should be at least as large as the grid");
870
871 bool haveFirstNotVisited = false;
872 int firstNotVisited = -1;
873 int notVisitedLow = -1;
874 int notVisitedHigh = -1;
875
876 for (int n = 0; n < numPoints; n++)
877 {
878 if (checkCovering[n] && !visited[n])
879 {
880 if (!haveFirstNotVisited)
881 {
882 notVisitedLow = n;
883 firstNotVisited = n;
884 haveFirstNotVisited = true;
885 }
886 else
887 {
888 notVisitedHigh = n;
889
890 /* Have now an interval I = [notVisitedLow,notVisitedHigh] of visited points bounded
891 by unvisited points. The unvisted end points affect the coveredness of the
892 visited with a reach equal to the cover radius. */
893 int notCoveredLow = notVisitedLow + coverRadius;
894 int notCoveredHigh = notVisitedHigh - coverRadius;
895 for (int i = notVisitedLow; i <= notVisitedHigh; i++)
896 {
897 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
898 }
899
900 /* Find a new interval to set covering for. Make the notVisitedHigh of this interval
901 the notVisitedLow of the next. */
902 notVisitedLow = notVisitedHigh;
903 }
904 }
905 }
906
907 /* Have labelled all the internal points. Now take care of the boundary regions. */
908 if (!haveFirstNotVisited)
909 {
910 /* No non-visited points <=> all points visited => all points covered. */
911
912 for (int n = 0; n < numPoints; n++)
913 {
914 covered[n] = 1;
915 }
916 }
917 else
918 {
919 int lastNotVisited = notVisitedLow;
920
921 /* For periodic boundaries, non-visited points can influence points
922 on the other side of the boundary so we need to wrap around. */
923
924 /* Lower end. For periodic boundaries the last upper end not visited point becomes the low-end not visited point.
925 For non-periodic boundaries there is no lower end point so a dummy value is used. */
926 int notVisitedHigh = firstNotVisited;
927 int notVisitedLow = period > 0 ? (lastNotVisited - period) : -(coverRadius + 1);
928
929 int notCoveredLow = notVisitedLow + coverRadius;
930 int notCoveredHigh = notVisitedHigh - coverRadius;
931
932 for (int i = 0; i <= notVisitedHigh; i++)
933 {
934 /* For non-periodic boundaries notCoveredLow = -1 will impose no restriction. */
935 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
936 }
937
938 /* Upper end. Same as for lower end but in the other direction. */
939 notVisitedHigh = period > 0 ? (firstNotVisited + period) : (numPoints + coverRadius);
940 notVisitedLow = lastNotVisited;
941
942 notCoveredLow = notVisitedLow + coverRadius;
943 notCoveredHigh = notVisitedHigh - coverRadius;
944
945 for (int i = notVisitedLow; i <= numPoints - 1; i++)
946 {
947 /* For non-periodic boundaries notCoveredHigh = numPoints will impose no restriction. */
948 covered[i] = static_cast<int>((i > notCoveredLow) && (i < notCoveredHigh));
949 }
950 }
951 }
952
953 } // namespace
954
isSamplingRegionCovered(const BiasParams & params,const std::vector<DimParams> & dimParams,const BiasGrid & grid,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm) const955 bool BiasState::isSamplingRegionCovered(const BiasParams& params,
956 const std::vector<DimParams>& dimParams,
957 const BiasGrid& grid,
958 const t_commrec* commRecord,
959 const gmx_multisim_t* multiSimComm) const
960 {
961 /* Allocate and initialize arrays: one for checking visits along each dimension,
962 one for keeping track of which points to check and one for the covered points.
963 Possibly these could be kept as AWH variables to avoid these allocations. */
964 struct CheckDim
965 {
966 std::vector<bool> visited;
967 std::vector<bool> checkCovering;
968 // We use int for the covering array since we might use gmx_sumi_sim.
969 std::vector<int> covered;
970 };
971
972 std::vector<CheckDim> checkDim;
973 checkDim.resize(grid.numDimensions());
974
975 for (int d = 0; d < grid.numDimensions(); d++)
976 {
977 const size_t numPoints = grid.axis(d).numPoints();
978 checkDim[d].visited.resize(numPoints, false);
979 checkDim[d].checkCovering.resize(numPoints, false);
980 checkDim[d].covered.resize(numPoints, 0);
981 }
982
983 /* Set visited points along each dimension and which points should be checked for covering.
984 Specifically, points above the free energy cutoff (if there is one) or points outside
985 of the target region are ignored. */
986
987 /* Set the free energy cutoff */
988 double maxFreeEnergy = GMX_FLOAT_MAX;
989
990 if (params.eTarget == eawhtargetCUTOFF)
991 {
992 maxFreeEnergy = freeEnergyMinimumValue(points_) + params.freeEnergyCutoffInKT;
993 }
994
995 /* Set the threshold weight for a point to be considered visited. */
996 double weightThreshold = 1;
997 for (int d = 0; d < grid.numDimensions(); d++)
998 {
999 if (grid.axis(d).isFepLambdaAxis())
1000 {
1001 /* Do not modify the weight threshold based on a FEP lambda axis. The spread
1002 * of the sampling weights is not depending on a Gaussian distribution (like
1003 * below). */
1004 weightThreshold *= 1.0;
1005 }
1006 else
1007 {
1008 /* The spacing is proportional to 1/sqrt(betak). The weight threshold will be
1009 * approximately (given that the spacing can be modified if the dimension is periodic)
1010 * proportional to sqrt(1/(2*pi)). */
1011 weightThreshold *= grid.axis(d).spacing()
1012 * std::sqrt(dimParams[d].pullDimParams().betak * 0.5 * M_1_PI);
1013 }
1014 }
1015
1016 /* Project the sampling weights onto each dimension */
1017 for (size_t m = 0; m < grid.numPoints(); m++)
1018 {
1019 const PointState& pointState = points_[m];
1020
1021 for (int d = 0; d < grid.numDimensions(); d++)
1022 {
1023 int n = grid.point(m).index[d];
1024
1025 /* Is visited if it was already visited or if there is enough weight at the current point */
1026 checkDim[d].visited[n] = checkDim[d].visited[n] || (weightSumCovering_[m] > weightThreshold);
1027
1028 /* Check for covering if there is at least point in this slice that is in the target region and within the cutoff */
1029 checkDim[d].checkCovering[n] =
1030 checkDim[d].checkCovering[n]
1031 || (pointState.inTargetRegion() && pointState.freeEnergy() < maxFreeEnergy);
1032 }
1033 }
1034
1035 /* Label each point along each dimension as covered or not. */
1036 for (int d = 0; d < grid.numDimensions(); d++)
1037 {
1038 labelCoveredPoints(checkDim[d].visited, checkDim[d].checkCovering, grid.axis(d).numPoints(),
1039 grid.axis(d).numPointsInPeriod(), params.coverRadius()[d], checkDim[d].covered);
1040 }
1041
1042 /* Now check for global covering. Each dimension needs to be covered separately.
1043 A dimension is covered if each point is covered. Multiple simulations collectively
1044 cover the points, i.e. a point is covered if any of the simulations covered it.
1045 However, visited points are not shared, i.e. if a point is covered or not is
1046 determined by the visits of a single simulation. In general the covering criterion is
1047 all points covered => all points are surrounded by visited points up to a radius = coverRadius.
1048 For 1 simulation, all points covered <=> all points visited. For multiple simulations
1049 however, all points visited collectively !=> all points covered, except for coverRadius = 0.
1050 In the limit of large coverRadius, all points covered => all points visited by at least one
1051 simulation (since no point will be covered until all points have been visited by a
1052 single simulation). Basically coverRadius sets how much "connectedness" (or mixing) a point
1053 needs with surrounding points before sharing covering information with other simulations. */
1054
1055 /* Communicate the covered points between sharing simulations if needed. */
1056 if (params.numSharedUpdate > 1)
1057 {
1058 /* For multiple dimensions this may not be the best way to do it. */
1059 for (int d = 0; d < grid.numDimensions(); d++)
1060 {
1061 sumOverSimulations(
1062 gmx::arrayRefFromArray(checkDim[d].covered.data(), grid.axis(d).numPoints()),
1063 commRecord, multiSimComm);
1064 }
1065 }
1066
1067 /* Now check if for each dimension all points are covered. Break if not true. */
1068 bool allPointsCovered = true;
1069 for (int d = 0; d < grid.numDimensions() && allPointsCovered; d++)
1070 {
1071 for (int n = 0; n < grid.axis(d).numPoints() && allPointsCovered; n++)
1072 {
1073 allPointsCovered = (checkDim[d].covered[n] != 0);
1074 }
1075 }
1076
1077 return allPointsCovered;
1078 }
1079
1080 /*! \brief
1081 * Normalizes the free energy and PMF sum.
1082 *
1083 * \param[in] pointState The state of the points.
1084 */
normalizeFreeEnergyAndPmfSum(std::vector<PointState> * pointState)1085 static void normalizeFreeEnergyAndPmfSum(std::vector<PointState>* pointState)
1086 {
1087 double minF = freeEnergyMinimumValue(*pointState);
1088
1089 for (PointState& ps : *pointState)
1090 {
1091 ps.normalizeFreeEnergyAndPmfSum(minF);
1092 }
1093 }
1094
updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams> & dimParams,const BiasGrid & grid,const BiasParams & params,const t_commrec * commRecord,const gmx_multisim_t * multiSimComm,double t,int64_t step,FILE * fplog,std::vector<int> * updateList)1095 void BiasState::updateFreeEnergyAndAddSamplesToHistogram(const std::vector<DimParams>& dimParams,
1096 const BiasGrid& grid,
1097 const BiasParams& params,
1098 const t_commrec* commRecord,
1099 const gmx_multisim_t* multiSimComm,
1100 double t,
1101 int64_t step,
1102 FILE* fplog,
1103 std::vector<int>* updateList)
1104 {
1105 /* Note hat updateList is only used in this scope and is always
1106 * re-initialized. We do not use a local vector, because that would
1107 * cause reallocation every time this funtion is called and the vector
1108 * can be the size of the whole grid.
1109 */
1110
1111 /* Make a list of all local points, i.e. those that could have been touched since
1112 the last update. These are the points needed for summing histograms below
1113 (non-local points only add zeros). For local updates, this will also be the
1114 final update list. */
1115 makeLocalUpdateList(grid, points_, originUpdatelist_, endUpdatelist_, updateList);
1116 if (params.numSharedUpdate > 1)
1117 {
1118 mergeSharedUpdateLists(updateList, points_.size(), commRecord, multiSimComm);
1119 }
1120
1121 /* Reset the range for the next update */
1122 resetLocalUpdateRange(grid);
1123
1124 /* Add samples to histograms for all local points and sync simulations if needed */
1125 sumHistograms(points_, weightSumCovering_, params.numSharedUpdate, commRecord, multiSimComm, *updateList);
1126
1127 sumPmf(points_, params.numSharedUpdate, commRecord, multiSimComm);
1128
1129 /* Renormalize the free energy if values are too large. */
1130 bool needToNormalizeFreeEnergy = false;
1131 for (int& globalIndex : *updateList)
1132 {
1133 /* We want to keep the absolute value of the free energies to be less
1134 c_largePositiveExponent to be able to safely pass these values to exp(). The check below
1135 ensures this as long as the free energy values grow less than 0.5*c_largePositiveExponent
1136 in a return time to this neighborhood. For reasonable update sizes it's unlikely that
1137 this requirement would be broken. */
1138 if (std::abs(points_[globalIndex].freeEnergy()) > 0.5 * detail::c_largePositiveExponent)
1139 {
1140 needToNormalizeFreeEnergy = true;
1141 break;
1142 }
1143 }
1144
1145 /* Update target distribution? */
1146 bool needToUpdateTargetDistribution =
1147 (params.eTarget != eawhtargetCONSTANT && params.isUpdateTargetStep(step));
1148
1149 /* In the initial stage, the histogram grows dynamically as a function of the number of coverings. */
1150 bool detectedCovering = false;
1151 if (inInitialStage())
1152 {
1153 detectedCovering =
1154 (params.isCheckCoveringStep(step)
1155 && isSamplingRegionCovered(params, dimParams, grid, commRecord, multiSimComm));
1156 }
1157
1158 /* The weighthistogram size after this update. */
1159 double newHistogramSize = histogramSize_.newHistogramSize(params, t, detectedCovering, points_,
1160 weightSumCovering_, fplog);
1161
1162 /* Make the update list. Usually we try to only update local points,
1163 * but if the update has non-trivial or non-deterministic effects
1164 * on non-local points a global update is needed. This is the case when:
1165 * 1) a covering occurred in the initial stage, leading to non-trivial
1166 * histogram rescaling factors; or
1167 * 2) the target distribution will be updated, since we don't make any
1168 * assumption on its form; or
1169 * 3) the AWH parameters are such that we never attempt to skip non-local
1170 * updates; or
1171 * 4) the free energy values have grown so large that a renormalization
1172 * is needed.
1173 */
1174 if (needToUpdateTargetDistribution || detectedCovering || !params.skipUpdates() || needToNormalizeFreeEnergy)
1175 {
1176 /* Global update, just add all points. */
1177 updateList->clear();
1178 for (size_t m = 0; m < points_.size(); m++)
1179 {
1180 if (points_[m].inTargetRegion())
1181 {
1182 updateList->push_back(m);
1183 }
1184 }
1185 }
1186
1187 /* Set histogram scale factors. */
1188 double weightHistScalingSkipped = 0;
1189 double logPmfsumScalingSkipped = 0;
1190 if (params.skipUpdates())
1191 {
1192 getSkippedUpdateHistogramScaleFactors(params, &weightHistScalingSkipped, &logPmfsumScalingSkipped);
1193 }
1194 double weightHistScalingNew;
1195 double logPmfsumScalingNew;
1196 setHistogramUpdateScaleFactors(params, newHistogramSize, histogramSize_.histogramSize(),
1197 &weightHistScalingNew, &logPmfsumScalingNew);
1198
1199 /* Update free energy and reference weight histogram for points in the update list. */
1200 for (int pointIndex : *updateList)
1201 {
1202 PointState* pointStateToUpdate = &points_[pointIndex];
1203
1204 /* Do updates from previous update steps that were skipped because this point was at that time non-local. */
1205 if (params.skipUpdates())
1206 {
1207 pointStateToUpdate->performPreviouslySkippedUpdates(params, histogramSize_.numUpdates(),
1208 weightHistScalingSkipped,
1209 logPmfsumScalingSkipped);
1210 }
1211
1212 /* Now do an update with new sampling data. */
1213 pointStateToUpdate->updateWithNewSampling(params, histogramSize_.numUpdates(),
1214 weightHistScalingNew, logPmfsumScalingNew);
1215 }
1216
1217 /* Only update the histogram size after we are done with the local point updates */
1218 histogramSize_.setHistogramSize(newHistogramSize, weightHistScalingNew);
1219
1220 if (needToNormalizeFreeEnergy)
1221 {
1222 normalizeFreeEnergyAndPmfSum(&points_);
1223 }
1224
1225 if (needToUpdateTargetDistribution)
1226 {
1227 /* The target distribution is always updated for all points at once. */
1228 updateTargetDistribution(points_, params);
1229 }
1230
1231 /* Update the bias. The bias is updated separately and last since it simply a function of
1232 the free energy and the target distribution and we want to avoid doing extra work. */
1233 for (int pointIndex : *updateList)
1234 {
1235 points_[pointIndex].updateBias();
1236 }
1237
1238 /* Increase the update counter. */
1239 histogramSize_.incrementNumUpdates();
1240 }
1241
updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> neighborLambdaEnergies,std::vector<double,AlignedAllocator<double>> * weight) const1242 double BiasState::updateProbabilityWeightsAndConvolvedBias(const std::vector<DimParams>& dimParams,
1243 const BiasGrid& grid,
1244 gmx::ArrayRef<const double> neighborLambdaEnergies,
1245 std::vector<double, AlignedAllocator<double>>* weight) const
1246 {
1247 /* Only neighbors of the current coordinate value will have a non-negligible chance of getting sampled */
1248 const std::vector<int>& neighbors = grid.point(coordState_.gridpointIndex()).neighbor;
1249
1250 #if GMX_SIMD_HAVE_DOUBLE
1251 typedef SimdDouble PackType;
1252 constexpr int packSize = GMX_SIMD_DOUBLE_WIDTH;
1253 #else
1254 typedef double PackType;
1255 constexpr int packSize = 1;
1256 #endif
1257 /* Round the size of the weight array up to packSize */
1258 const int weightSize = ((neighbors.size() + packSize - 1) / packSize) * packSize;
1259 weight->resize(weightSize);
1260
1261 double* gmx_restrict weightData = weight->data();
1262 PackType weightSumPack(0.0);
1263 for (size_t i = 0; i < neighbors.size(); i += packSize)
1264 {
1265 for (size_t n = i; n < i + packSize; n++)
1266 {
1267 if (n < neighbors.size())
1268 {
1269 const int neighbor = neighbors[n];
1270 (*weight)[n] = biasedLogWeightFromPoint(
1271 dimParams, points_, grid, neighbor, points_[neighbor].bias(),
1272 coordState_.coordValue(), neighborLambdaEnergies, coordState_.gridpointIndex());
1273 }
1274 else
1275 {
1276 /* Pad with values that don't affect the result */
1277 (*weight)[n] = detail::c_largeNegativeExponent;
1278 }
1279 }
1280 PackType weightPack = load<PackType>(weightData + i);
1281 weightPack = gmx::exp(weightPack);
1282 weightSumPack = weightSumPack + weightPack;
1283 store(weightData + i, weightPack);
1284 }
1285 /* Sum of probability weights */
1286 double weightSum = reduce(weightSumPack);
1287 GMX_RELEASE_ASSERT(weightSum > 0,
1288 "zero probability weight when updating AWH probability weights.");
1289
1290 /* Normalize probabilities to sum to 1 */
1291 double invWeightSum = 1 / weightSum;
1292
1293 /* When there is a free energy lambda state axis remove the convolved contributions along that
1294 * axis from the total bias. This must be done after calculating invWeightSum (since weightSum
1295 * will be modified), but before normalizing the weights (below). */
1296 if (grid.hasLambdaAxis())
1297 {
1298 /* If there is only one axis the bias will not be convolved in any dimension. */
1299 if (grid.axis().size() == 1)
1300 {
1301 weightSum = gmx::exp(points_[coordState_.gridpointIndex()].bias());
1302 }
1303 else
1304 {
1305 for (size_t i = 0; i < neighbors.size(); i++)
1306 {
1307 const int neighbor = neighbors[i];
1308 if (pointsHaveDifferentLambda(grid, coordState_.gridpointIndex(), neighbor))
1309 {
1310 weightSum -= weightData[i];
1311 }
1312 }
1313 /* Subtracting potentially very large values above may lead to rounding errors, causing
1314 * negative weightSum, even in double precision. */
1315 weightSum = std::max(weightSum, GMX_DOUBLE_MIN);
1316 }
1317 }
1318
1319 for (double& w : *weight)
1320 {
1321 w *= invWeightSum;
1322 }
1323
1324 /* Return the convolved bias */
1325 return std::log(weightSum);
1326 }
1327
calcConvolvedBias(const std::vector<DimParams> & dimParams,const BiasGrid & grid,const awh_dvec & coordValue) const1328 double BiasState::calcConvolvedBias(const std::vector<DimParams>& dimParams,
1329 const BiasGrid& grid,
1330 const awh_dvec& coordValue) const
1331 {
1332 int point = grid.nearestIndex(coordValue);
1333 const GridPoint& gridPoint = grid.point(point);
1334
1335 /* Sum the probability weights from the neighborhood of the given point */
1336 double weightSum = 0;
1337 for (int neighbor : gridPoint.neighbor)
1338 {
1339 /* No convolution is required along the lambda dimension. */
1340 if (pointsHaveDifferentLambda(grid, point, neighbor))
1341 {
1342 continue;
1343 }
1344 double logWeight = biasedLogWeightFromPoint(dimParams, points_, grid, neighbor,
1345 points_[neighbor].bias(), coordValue, {}, point);
1346 weightSum += std::exp(logWeight);
1347 }
1348
1349 /* Returns -GMX_FLOAT_MAX if no neighboring points were in the target region. */
1350 return (weightSum > 0) ? std::log(weightSum) : -GMX_FLOAT_MAX;
1351 }
1352
sampleProbabilityWeights(const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor)1353 void BiasState::sampleProbabilityWeights(const BiasGrid& grid, gmx::ArrayRef<const double> probWeightNeighbor)
1354 {
1355 const std::vector<int>& neighbor = grid.point(coordState_.gridpointIndex()).neighbor;
1356
1357 /* Save weights for next update */
1358 for (size_t n = 0; n < neighbor.size(); n++)
1359 {
1360 points_[neighbor[n]].increaseWeightSumIteration(probWeightNeighbor[n]);
1361 }
1362
1363 /* Update the local update range. Two corner points define this rectangular
1364 * domain. We need to choose two new corner points such that the new domain
1365 * contains both the old update range and the current neighborhood.
1366 * In the simplest case when an update is performed every sample,
1367 * the update range would simply equal the current neighborhood.
1368 */
1369 int neighborStart = neighbor[0];
1370 int neighborLast = neighbor[neighbor.size() - 1];
1371 for (int d = 0; d < grid.numDimensions(); d++)
1372 {
1373 int origin = grid.point(neighborStart).index[d];
1374 int last = grid.point(neighborLast).index[d];
1375
1376 if (origin > last)
1377 {
1378 /* Unwrap if wrapped around the boundary (only happens for periodic
1379 * boundaries). This has been already for the stored index interval.
1380 */
1381 /* TODO: what we want to do is to find the smallest the update
1382 * interval that contains all points that need to be updated.
1383 * This amounts to combining two intervals, the current
1384 * [origin, end] update interval and the new touched neighborhood
1385 * into a new interval that contains all points from both the old
1386 * intervals.
1387 *
1388 * For periodic boundaries it becomes slightly more complicated
1389 * than for closed boundaries because then it needs not be
1390 * true that origin < end (so one can't simply relate the origin/end
1391 * in the min()/max() below). The strategy here is to choose the
1392 * origin closest to a reference point (index 0) and then unwrap
1393 * the end index if needed and choose the largest end index.
1394 * This ensures that both intervals are in the new interval
1395 * but it's not necessarily the smallest.
1396 * Currently we solve this by going through each possibility
1397 * and checking them.
1398 */
1399 last += grid.axis(d).numPointsInPeriod();
1400 }
1401
1402 originUpdatelist_[d] = std::min(originUpdatelist_[d], origin);
1403 endUpdatelist_[d] = std::max(endUpdatelist_[d], last);
1404 }
1405 }
1406
sampleCoordAndPmf(const std::vector<DimParams> & dimParams,const BiasGrid & grid,gmx::ArrayRef<const double> probWeightNeighbor,double convolvedBias)1407 void BiasState::sampleCoordAndPmf(const std::vector<DimParams>& dimParams,
1408 const BiasGrid& grid,
1409 gmx::ArrayRef<const double> probWeightNeighbor,
1410 double convolvedBias)
1411 {
1412 /* Sampling-based deconvolution extracting the PMF.
1413 * Update the PMF histogram with the current coordinate value.
1414 *
1415 * Because of the finite width of the harmonic potential, the free energy
1416 * defined for each coordinate point does not exactly equal that of the
1417 * actual coordinate, the PMF. However, the PMF can be estimated by applying
1418 * the relation exp(-PMF) = exp(-bias_convolved)*P_biased/Z, i.e. by keeping a
1419 * reweighted histogram of the coordinate value. Strictly, this relies on
1420 * the unknown normalization constant Z being either constant or known. Here,
1421 * neither is true except in the long simulation time limit. Empirically however,
1422 * it works (mainly because how the PMF histogram is rescaled).
1423 */
1424
1425 const int gridPointIndex = coordState_.gridpointIndex();
1426 const std::optional<int> lambdaAxisIndex = grid.lambdaAxisIndex();
1427
1428 /* Update the PMF of points along a lambda axis with their bias. */
1429 if (lambdaAxisIndex)
1430 {
1431 const std::vector<int>& neighbors = grid.point(gridPointIndex).neighbor;
1432
1433 std::vector<double> lambdaMarginalDistribution =
1434 calculateFELambdaMarginalDistribution(grid, neighbors, probWeightNeighbor);
1435
1436 awh_dvec coordValueAlongLambda = { coordState_.coordValue()[0], coordState_.coordValue()[1],
1437 coordState_.coordValue()[2], coordState_.coordValue()[3] };
1438 for (size_t i = 0; i < neighbors.size(); i++)
1439 {
1440 const int neighbor = neighbors[i];
1441 double bias;
1442 if (pointsAlongLambdaAxis(grid, gridPointIndex, neighbor))
1443 {
1444 const double neighborLambda = grid.point(neighbor).coordValue[lambdaAxisIndex.value()];
1445 if (neighbor == gridPointIndex)
1446 {
1447 bias = convolvedBias;
1448 }
1449 else
1450 {
1451 coordValueAlongLambda[lambdaAxisIndex.value()] = neighborLambda;
1452 bias = calcConvolvedBias(dimParams, grid, coordValueAlongLambda);
1453 }
1454
1455 const double probWeight = lambdaMarginalDistribution[neighborLambda];
1456 const double weightedBias = bias - std::log(std::max(probWeight, GMX_DOUBLE_MIN)); // avoid log(0)
1457
1458 if (neighbor == gridPointIndex && grid.covers(coordState_.coordValue()))
1459 {
1460 points_[neighbor].samplePmf(weightedBias);
1461 }
1462 else
1463 {
1464 points_[neighbor].updatePmfUnvisited(weightedBias);
1465 }
1466 }
1467 }
1468 }
1469 else
1470 {
1471 /* Only save coordinate data that is in range (the given index is always
1472 * in range even if the coordinate value is not).
1473 */
1474 if (grid.covers(coordState_.coordValue()))
1475 {
1476 /* Save PMF sum and keep a histogram of the sampled coordinate values */
1477 points_[gridPointIndex].samplePmf(convolvedBias);
1478 }
1479 }
1480
1481 /* Save probability weights for the update */
1482 sampleProbabilityWeights(grid, probWeightNeighbor);
1483 }
1484
initHistoryFromState(AwhBiasHistory * biasHistory) const1485 void BiasState::initHistoryFromState(AwhBiasHistory* biasHistory) const
1486 {
1487 biasHistory->pointState.resize(points_.size());
1488 }
1489
updateHistory(AwhBiasHistory * biasHistory,const BiasGrid & grid) const1490 void BiasState::updateHistory(AwhBiasHistory* biasHistory, const BiasGrid& grid) const
1491 {
1492 GMX_RELEASE_ASSERT(biasHistory->pointState.size() == points_.size(),
1493 "The AWH history setup does not match the AWH state.");
1494
1495 AwhBiasStateHistory* stateHistory = &biasHistory->state;
1496 stateHistory->umbrellaGridpoint = coordState_.umbrellaGridpoint();
1497
1498 for (size_t m = 0; m < biasHistory->pointState.size(); m++)
1499 {
1500 AwhPointStateHistory* psh = &biasHistory->pointState[m];
1501
1502 points_[m].storeState(psh);
1503
1504 psh->weightsum_covering = weightSumCovering_[m];
1505 }
1506
1507 histogramSize_.storeState(stateHistory);
1508
1509 stateHistory->origin_index_updatelist = multiDimGridIndexToLinear(grid, originUpdatelist_);
1510 stateHistory->end_index_updatelist = multiDimGridIndexToLinear(grid, endUpdatelist_);
1511 }
1512
restoreFromHistory(const AwhBiasHistory & biasHistory,const BiasGrid & grid)1513 void BiasState::restoreFromHistory(const AwhBiasHistory& biasHistory, const BiasGrid& grid)
1514 {
1515 const AwhBiasStateHistory& stateHistory = biasHistory.state;
1516
1517 coordState_.restoreFromHistory(stateHistory);
1518
1519 if (biasHistory.pointState.size() != points_.size())
1520 {
1521 GMX_THROW(
1522 InvalidInputError("Bias grid size in checkpoint and simulation do not match. "
1523 "Likely you provided a checkpoint from a different simulation."));
1524 }
1525 for (size_t m = 0; m < points_.size(); m++)
1526 {
1527 points_[m].setFromHistory(biasHistory.pointState[m]);
1528 }
1529
1530 for (size_t m = 0; m < weightSumCovering_.size(); m++)
1531 {
1532 weightSumCovering_[m] = biasHistory.pointState[m].weightsum_covering;
1533 }
1534
1535 histogramSize_.restoreFromHistory(stateHistory);
1536
1537 linearGridindexToMultiDim(grid, stateHistory.origin_index_updatelist, originUpdatelist_);
1538 linearGridindexToMultiDim(grid, stateHistory.end_index_updatelist, endUpdatelist_);
1539 }
1540
broadcast(const t_commrec * commRecord)1541 void BiasState::broadcast(const t_commrec* commRecord)
1542 {
1543 gmx_bcast(sizeof(coordState_), &coordState_, commRecord->mpi_comm_mygroup);
1544
1545 gmx_bcast(points_.size() * sizeof(PointState), points_.data(), commRecord->mpi_comm_mygroup);
1546
1547 gmx_bcast(weightSumCovering_.size() * sizeof(double), weightSumCovering_.data(),
1548 commRecord->mpi_comm_mygroup);
1549
1550 gmx_bcast(sizeof(histogramSize_), &histogramSize_, commRecord->mpi_comm_mygroup);
1551 }
1552
setFreeEnergyToConvolvedPmf(const std::vector<DimParams> & dimParams,const BiasGrid & grid)1553 void BiasState::setFreeEnergyToConvolvedPmf(const std::vector<DimParams>& dimParams, const BiasGrid& grid)
1554 {
1555 std::vector<float> convolvedPmf;
1556
1557 calcConvolvedPmf(dimParams, grid, &convolvedPmf);
1558
1559 for (size_t m = 0; m < points_.size(); m++)
1560 {
1561 points_[m].setFreeEnergy(convolvedPmf[m]);
1562 }
1563 }
1564
1565 /*! \brief
1566 * Count trailing data rows containing only zeros.
1567 *
1568 * \param[in] data 2D data array.
1569 * \param[in] numRows Number of rows in array.
1570 * \param[in] numColumns Number of cols in array.
1571 * \returns the number of trailing zero rows.
1572 */
countTrailingZeroRows(const double * const * data,int numRows,int numColumns)1573 static int countTrailingZeroRows(const double* const* data, int numRows, int numColumns)
1574 {
1575 int numZeroRows = 0;
1576 for (int m = numRows - 1; m >= 0; m--)
1577 {
1578 bool rowIsZero = true;
1579 for (int d = 0; d < numColumns; d++)
1580 {
1581 if (data[d][m] != 0)
1582 {
1583 rowIsZero = false;
1584 break;
1585 }
1586 }
1587
1588 if (!rowIsZero)
1589 {
1590 /* At a row with non-zero data */
1591 break;
1592 }
1593 else
1594 {
1595 /* Still at a zero data row, keep checking rows higher up. */
1596 numZeroRows++;
1597 }
1598 }
1599
1600 return numZeroRows;
1601 }
1602
1603 /*! \brief
1604 * Initializes the PMF and target with data read from an input table.
1605 *
1606 * \param[in] dimParams The dimension parameters.
1607 * \param[in] grid The grid.
1608 * \param[in] filename The filename to read PMF and target from.
1609 * \param[in] numBias Number of biases.
1610 * \param[in] biasIndex The index of the bias.
1611 * \param[in,out] pointState The state of the points in this bias.
1612 */
readUserPmfAndTargetDistribution(const std::vector<DimParams> & dimParams,const BiasGrid & grid,const std::string & filename,int numBias,int biasIndex,std::vector<PointState> * pointState)1613 static void readUserPmfAndTargetDistribution(const std::vector<DimParams>& dimParams,
1614 const BiasGrid& grid,
1615 const std::string& filename,
1616 int numBias,
1617 int biasIndex,
1618 std::vector<PointState>* pointState)
1619 {
1620 /* Read the PMF and target distribution.
1621 From the PMF, the convolved PMF, or the reference value free energy, can be calculated
1622 base on the force constant. The free energy and target together determine the bias.
1623 */
1624 std::string filenameModified(filename);
1625 if (numBias > 1)
1626 {
1627 size_t n = filenameModified.rfind('.');
1628 GMX_RELEASE_ASSERT(n != std::string::npos,
1629 "The filename should contain an extension starting with .");
1630 filenameModified.insert(n, formatString("%d", biasIndex));
1631 }
1632
1633 std::string correctFormatMessage = formatString(
1634 "%s is expected in the following format. "
1635 "The first ndim column(s) should contain the coordinate values for each point, "
1636 "each column containing values of one dimension (in ascending order). "
1637 "For a multidimensional coordinate, points should be listed "
1638 "in the order obtained by traversing lower dimensions first. "
1639 "E.g. for two-dimensional grid of size nxn: "
1640 "(1, 1), (1, 2),..., (1, n), (2, 1), (2, 2), ..., , (n, n - 1), (n, n). "
1641 "Column ndim + 1 should contain the PMF value for each coordinate value. "
1642 "The target distribution values should be in column ndim + 2 or column ndim + 5. "
1643 "Make sure the input file ends with a new line but has no trailing new lines.",
1644 filename.c_str());
1645 gmx::TextLineWrapper wrapper;
1646 wrapper.settings().setLineLength(c_linewidth);
1647 correctFormatMessage = wrapper.wrapToString(correctFormatMessage);
1648
1649 double** data;
1650 int numColumns;
1651 int numRows = read_xvg(filenameModified.c_str(), &data, &numColumns);
1652
1653 /* Check basic data properties here. BiasGrid takes care of more complicated things. */
1654
1655 if (numRows <= 0)
1656 {
1657 std::string mesg = gmx::formatString("%s is empty!.\n\n%s", filename.c_str(),
1658 correctFormatMessage.c_str());
1659 GMX_THROW(InvalidInputError(mesg));
1660 }
1661
1662 /* Less than 2 points is not useful for PMF or target. */
1663 if (numRows < 2)
1664 {
1665 std::string mesg = gmx::formatString(
1666 "%s contains too few data points (%d)."
1667 "The minimum number of points is 2.",
1668 filename.c_str(), numRows);
1669 GMX_THROW(InvalidInputError(mesg));
1670 }
1671
1672 /* Make sure there are enough columns of data.
1673
1674 Two formats are allowed. Either with columns {coords, PMF, target} or
1675 {coords, PMF, x, y, z, target, ...}. The latter format is allowed since that
1676 is how AWH output is written (x, y, z being other AWH variables). For this format,
1677 trailing columns are ignored.
1678 */
1679 int columnIndexTarget;
1680 int numColumnsMin = dimParams.size() + 2;
1681 int columnIndexPmf = dimParams.size();
1682 if (numColumns == numColumnsMin)
1683 {
1684 columnIndexTarget = columnIndexPmf + 1;
1685 }
1686 else
1687 {
1688 columnIndexTarget = columnIndexPmf + 4;
1689 }
1690
1691 if (numColumns < numColumnsMin)
1692 {
1693 std::string mesg = gmx::formatString(
1694 "The number of columns in %s should be at least %d."
1695 "\n\n%s",
1696 filename.c_str(), numColumnsMin, correctFormatMessage.c_str());
1697 GMX_THROW(InvalidInputError(mesg));
1698 }
1699
1700 /* read_xvg can give trailing zero data rows for trailing new lines in the input. We allow 1 zero row,
1701 since this could be real data. But multiple trailing zero rows cannot correspond to valid data. */
1702 int numZeroRows = countTrailingZeroRows(data, numRows, numColumns);
1703 if (numZeroRows > 1)
1704 {
1705 std::string mesg = gmx::formatString(
1706 "Found %d trailing zero data rows in %s. Please remove trailing empty lines and "
1707 "try again.",
1708 numZeroRows, filename.c_str());
1709 GMX_THROW(InvalidInputError(mesg));
1710 }
1711
1712 /* Convert from user units to internal units before sending the data of to grid. */
1713 for (size_t d = 0; d < dimParams.size(); d++)
1714 {
1715 double scalingFactor = dimParams[d].scaleUserInputToInternal(1);
1716 if (scalingFactor == 1)
1717 {
1718 continue;
1719 }
1720 for (size_t m = 0; m < pointState->size(); m++)
1721 {
1722 data[d][m] *= scalingFactor;
1723 }
1724 }
1725
1726 /* Get a data point for each AWH grid point so that they all get data. */
1727 std::vector<int> gridIndexToDataIndex(grid.numPoints());
1728 mapGridToDataGrid(&gridIndexToDataIndex, data, numRows, filename, grid, correctFormatMessage);
1729
1730 /* Extract the data for each grid point.
1731 * We check if the target distribution is zero for all points.
1732 */
1733 bool targetDistributionIsZero = true;
1734 for (size_t m = 0; m < pointState->size(); m++)
1735 {
1736 (*pointState)[m].setLogPmfSum(-data[columnIndexPmf][gridIndexToDataIndex[m]]);
1737 double target = data[columnIndexTarget][gridIndexToDataIndex[m]];
1738
1739 /* Check if the values are allowed. */
1740 if (target < 0)
1741 {
1742 std::string mesg = gmx::formatString(
1743 "Target distribution weight at point %zu (%g) in %s is negative.", m, target,
1744 filename.c_str());
1745 GMX_THROW(InvalidInputError(mesg));
1746 }
1747 if (target > 0)
1748 {
1749 targetDistributionIsZero = false;
1750 }
1751 (*pointState)[m].setTargetConstantWeight(target);
1752 }
1753
1754 if (targetDistributionIsZero)
1755 {
1756 std::string mesg =
1757 gmx::formatString("The target weights given in column %d in %s are all 0",
1758 columnIndexTarget, filename.c_str());
1759 GMX_THROW(InvalidInputError(mesg));
1760 }
1761
1762 /* Free the arrays. */
1763 for (int m = 0; m < numColumns; m++)
1764 {
1765 sfree(data[m]);
1766 }
1767 sfree(data);
1768 }
1769
normalizePmf(int numSharingSims)1770 void BiasState::normalizePmf(int numSharingSims)
1771 {
1772 /* The normalization of the PMF estimate matters because it determines how big effect the next sample has.
1773 Approximately (for large enough force constant) we should have:
1774 sum_x(exp(-pmf(x)) = nsamples*sum_xref(exp(-f(xref)).
1775 */
1776
1777 /* Calculate the normalization factor, i.e. divide by the pmf sum, multiply by the number of samples and the f sum */
1778 double expSumPmf = 0;
1779 double expSumF = 0;
1780 for (const PointState& pointState : points_)
1781 {
1782 if (pointState.inTargetRegion())
1783 {
1784 expSumPmf += std::exp(pointState.logPmfSum());
1785 expSumF += std::exp(-pointState.freeEnergy());
1786 }
1787 }
1788 double numSamples = histogramSize_.histogramSize() / numSharingSims;
1789
1790 /* Renormalize */
1791 double logRenorm = std::log(numSamples * expSumF / expSumPmf);
1792 for (PointState& pointState : points_)
1793 {
1794 if (pointState.inTargetRegion())
1795 {
1796 pointState.setLogPmfSum(pointState.logPmfSum() + logRenorm);
1797 }
1798 }
1799 }
1800
initGridPointState(const AwhBiasParams & awhBiasParams,const std::vector<DimParams> & dimParams,const BiasGrid & grid,const BiasParams & params,const std::string & filename,int numBias)1801 void BiasState::initGridPointState(const AwhBiasParams& awhBiasParams,
1802 const std::vector<DimParams>& dimParams,
1803 const BiasGrid& grid,
1804 const BiasParams& params,
1805 const std::string& filename,
1806 int numBias)
1807 {
1808 /* Modify PMF, free energy and the constant target distribution factor
1809 * to user input values if there is data given.
1810 */
1811 if (awhBiasParams.bUserData)
1812 {
1813 readUserPmfAndTargetDistribution(dimParams, grid, filename, numBias, params.biasIndex, &points_);
1814 setFreeEnergyToConvolvedPmf(dimParams, grid);
1815 }
1816
1817 /* The local Boltzmann distribution is special because the target distribution is updated as a function of the reference weighthistogram. */
1818 GMX_RELEASE_ASSERT(params.eTarget != eawhtargetLOCALBOLTZMANN || points_[0].weightSumRef() != 0,
1819 "AWH reference weight histogram not initialized properly with local "
1820 "Boltzmann target distribution.");
1821
1822 updateTargetDistribution(points_, params);
1823
1824 for (PointState& pointState : points_)
1825 {
1826 if (pointState.inTargetRegion())
1827 {
1828 pointState.updateBias();
1829 }
1830 else
1831 {
1832 /* Note that for zero target this is a value that represents -infinity but should not be used for biasing. */
1833 pointState.setTargetToZero();
1834 }
1835 }
1836
1837 /* Set the initial reference weighthistogram. */
1838 const double histogramSize = histogramSize_.histogramSize();
1839 for (auto& pointState : points_)
1840 {
1841 pointState.setInitialReferenceWeightHistogram(histogramSize);
1842 }
1843
1844 /* Make sure the pmf is normalized consistently with the histogram size.
1845 Note: the target distribution and free energy need to be set here. */
1846 normalizePmf(params.numSharedUpdate);
1847 }
1848
BiasState(const AwhBiasParams & awhBiasParams,double histogramSizeInitial,const std::vector<DimParams> & dimParams,const BiasGrid & grid)1849 BiasState::BiasState(const AwhBiasParams& awhBiasParams,
1850 double histogramSizeInitial,
1851 const std::vector<DimParams>& dimParams,
1852 const BiasGrid& grid) :
1853 coordState_(awhBiasParams, dimParams, grid),
1854 points_(grid.numPoints()),
1855 weightSumCovering_(grid.numPoints()),
1856 histogramSize_(awhBiasParams, histogramSizeInitial)
1857 {
1858 /* The minimum and maximum multidimensional point indices that are affected by the next update */
1859 for (size_t d = 0; d < dimParams.size(); d++)
1860 {
1861 int index = grid.point(coordState_.gridpointIndex()).index[d];
1862 originUpdatelist_[d] = index;
1863 endUpdatelist_[d] = index;
1864 }
1865 }
1866
1867 } // namespace gmx
1868