1 /*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
8 *
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
13 *
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 *
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
31 *
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
34 */
35 /*! \internal \file
36 *
37 * \brief Implements Leap-Frog using SYCL
38 *
39 * This file contains implementation of basic Leap-Frog integrator
40 * using SYCL, including class initialization, data-structures management
41 * and GPU kernel.
42 *
43 * \author Artem Zhmurov <zhmurov@gmail.com>
44 * \author Andrey Alekseenko <al42and@gmail.com>
45 *
46 * \ingroup module_mdlib
47 */
48 #include "gmxpre.h"
49
50 #include "gromacs/gpu_utils/devicebuffer.h"
51 #include "gromacs/gpu_utils/gmxsycl.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdlib/leapfrog_gpu.h"
54 #include "gromacs/mdtypes/group.h"
55 #include "gromacs/utility/arrayref.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/template_mp.h"
58
59 namespace gmx
60 {
61
62 using cl::sycl::access::mode;
63
64 /*! \brief Main kernel for the Leap-Frog integrator.
65 *
66 * The coordinates and velocities are updated on the GPU. Also saves the intermediate values of the coordinates for
67 * further use in constraints.
68 *
69 * Each GPU thread works with a single particle.
70 *
71 * \tparam numTempScaleValues The number of different T-couple values.
72 * \tparam velocityScaling Type of the Parrinello-Rahman velocity rescaling.
73 * \param cgh SYCL's command group handler.
74 * \param[in,out] a_x Coordinates to update upon integration.
75 * \param[out] a_xp A copy of the coordinates before the integration (for constraints).
76 * \param[in,out] a_v Velocities to update.
77 * \param[in] a_f Atomic forces.
78 * \param[in] a_inverseMasses Reciprocal masses.
79 * \param[in] dt Timestep.
80 * \param[in] a_lambdas Temperature scaling factors (one per group).
81 * \param[in] a_tempScaleGroups Mapping of atoms into groups.
82 * \param[in] prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix
83 */
84 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
leapFrogKernel(cl::sycl::handler & cgh,DeviceAccessor<float3,mode::read_write> a_x,DeviceAccessor<float3,mode::discard_write> a_xp,DeviceAccessor<float3,mode::read_write> a_v,DeviceAccessor<float3,mode::read> a_f,DeviceAccessor<float,mode::read> a_inverseMasses,float dt,OptionalAccessor<float,mode::read,numTempScaleValues!=NumTempScaleValues::None> a_lambdas,OptionalAccessor<unsigned short,mode::read,numTempScaleValues==NumTempScaleValues::Multiple> a_tempScaleGroups,float3 prVelocityScalingMatrixDiagonal)85 auto leapFrogKernel(
86 cl::sycl::handler& cgh,
87 DeviceAccessor<float3, mode::read_write> a_x,
88 DeviceAccessor<float3, mode::discard_write> a_xp,
89 DeviceAccessor<float3, mode::read_write> a_v,
90 DeviceAccessor<float3, mode::read> a_f,
91 DeviceAccessor<float, mode::read> a_inverseMasses,
92 float dt,
93 OptionalAccessor<float, mode::read, numTempScaleValues != NumTempScaleValues::None> a_lambdas,
94 OptionalAccessor<unsigned short, mode::read, numTempScaleValues == NumTempScaleValues::Multiple> a_tempScaleGroups,
95 float3 prVelocityScalingMatrixDiagonal)
96 {
97 cgh.require(a_x);
98 cgh.require(a_xp);
99 cgh.require(a_v);
100 cgh.require(a_f);
101 cgh.require(a_inverseMasses);
102 if constexpr (numTempScaleValues != NumTempScaleValues::None)
103 {
104 cgh.require(a_lambdas);
105 }
106 if constexpr (numTempScaleValues == NumTempScaleValues::Multiple)
107 {
108 cgh.require(a_tempScaleGroups);
109 }
110
111 return [=](cl::sycl::id<1> itemIdx) {
112 const float3 x = a_x[itemIdx];
113 const float3 v = a_v[itemIdx];
114 const float3 f = a_f[itemIdx];
115 const float im = a_inverseMasses[itemIdx];
116 const float imdt = im * dt;
117
118 // Swapping places for xp and x so that the x will contain the updated coordinates and xp -
119 // the coordinates before update. This should be taken into account when (if) constraints
120 // are applied after the update: x and xp have to be passed to constraints in the 'wrong'
121 // order. See Issue #3727
122 a_xp[itemIdx] = x;
123
124 const float lambda = [=]() {
125 if constexpr (numTempScaleValues == NumTempScaleValues::None)
126 {
127 return 1.0F;
128 }
129 else if constexpr (numTempScaleValues == NumTempScaleValues::Single)
130 {
131 return a_lambdas[0];
132 }
133 else if constexpr (numTempScaleValues == NumTempScaleValues::Multiple)
134 {
135 const int tempScaleGroup = a_tempScaleGroups[itemIdx];
136 return a_lambdas[tempScaleGroup];
137 }
138 }();
139
140 const float3 prVelocityDelta = [=]() {
141 if constexpr (velocityScaling == VelocityScalingType::Diagonal)
142 {
143 return float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
144 prVelocityScalingMatrixDiagonal[1] * v[1],
145 prVelocityScalingMatrixDiagonal[2] * v[2] };
146 }
147 else if constexpr (velocityScaling == VelocityScalingType::None)
148 {
149 return float3{ 0, 0, 0 };
150 }
151 }();
152
153 const float3 v_new = v * lambda - prVelocityDelta + f * imdt;
154 a_v[itemIdx] = v_new;
155 a_x[itemIdx] = x + v_new * dt;
156 };
157 }
158
159 // SYCL 1.2.1 requires providing a unique type for a kernel. Should not be needed for SYCL2020.
160 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
161 class LeapFrogKernelName;
162
163 template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling, class... Args>
launchLeapFrogKernel(const DeviceStream & deviceStream,int numAtoms,Args &&...args)164 static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, int numAtoms, Args&&... args)
165 {
166 // Should not be needed for SYCL2020.
167 using kernelNameType = LeapFrogKernelName<numTempScaleValues, velocityScaling>;
168
169 const cl::sycl::range<1> rangeAllAtoms(numAtoms);
170 cl::sycl::queue q = deviceStream.stream();
171
172 cl::sycl::event e = q.submit([&](cl::sycl::handler& cgh) {
173 auto kernel =
174 leapFrogKernel<numTempScaleValues, velocityScaling>(cgh, std::forward<Args>(args)...);
175 cgh.parallel_for<kernelNameType>(rangeAllAtoms, kernel);
176 });
177
178 return e;
179 }
180
getTempScalingType(bool doTemperatureScaling,int numTempScaleValues)181 static NumTempScaleValues getTempScalingType(bool doTemperatureScaling, int numTempScaleValues)
182 {
183 if (!doTemperatureScaling)
184 {
185 return NumTempScaleValues::None;
186 }
187 else if (numTempScaleValues == 1)
188 {
189 return NumTempScaleValues::Single;
190 }
191 else if (numTempScaleValues > 1)
192 {
193 return NumTempScaleValues::Multiple;
194 }
195 else
196 {
197 gmx_incons("Temperature coupling was requested with no temperature coupling groups.");
198 }
199 }
200
201 /*! \brief Select templated kernel and launch it. */
202 template<class... Args>
launchLeapFrogKernel(NumTempScaleValues tempScalingType,VelocityScalingType prVelocityScalingType,Args &&...args)203 static inline cl::sycl::event launchLeapFrogKernel(NumTempScaleValues tempScalingType,
204 VelocityScalingType prVelocityScalingType,
205 Args&&... args)
206 {
207 GMX_ASSERT(prVelocityScalingType == VelocityScalingType::None
208 || prVelocityScalingType == VelocityScalingType::Diagonal,
209 "Only isotropic Parrinello-Rahman pressure coupling is supported.");
210
211 return dispatchTemplatedFunction(
212 [&](auto tempScalingType_, auto prScalingType_) {
213 return launchLeapFrogKernel<tempScalingType_, prScalingType_>(std::forward<Args>(args)...);
214 },
215 tempScalingType, prVelocityScalingType);
216 }
217
integrate(DeviceBuffer<float3> d_x,DeviceBuffer<float3> d_xp,DeviceBuffer<float3> d_v,DeviceBuffer<float3> d_f,const real dt,const bool doTemperatureScaling,gmx::ArrayRef<const t_grp_tcstat> tcstat,const bool doParrinelloRahman,const float dtPressureCouple,const matrix prVelocityScalingMatrix)218 void LeapFrogGpu::integrate(DeviceBuffer<float3> d_x,
219 DeviceBuffer<float3> d_xp,
220 DeviceBuffer<float3> d_v,
221 DeviceBuffer<float3> d_f,
222 const real dt,
223 const bool doTemperatureScaling,
224 gmx::ArrayRef<const t_grp_tcstat> tcstat,
225 const bool doParrinelloRahman,
226 const float dtPressureCouple,
227 const matrix prVelocityScalingMatrix)
228 {
229 if (doTemperatureScaling)
230 {
231 GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_),
232 "Number of temperature scaling factors changed since it was set for the "
233 "last time.");
234 { // Explicitly limiting the scope of host accessor. Not strictly necessary here.
235 auto ha_lambdas_ = d_lambdas_.buffer_->get_access<mode::discard_write>();
236 for (int i = 0; i < numTempScaleValues_; i++)
237 {
238 ha_lambdas_[i] = tcstat[i].lambda;
239 }
240 }
241 }
242 NumTempScaleValues tempVelocityScalingType =
243 getTempScalingType(doTemperatureScaling, numTempScaleValues_);
244
245 VelocityScalingType prVelocityScalingType = VelocityScalingType::None;
246 if (doParrinelloRahman)
247 {
248 prVelocityScalingType = VelocityScalingType::Diagonal;
249 GMX_ASSERT(prVelocityScalingMatrix[YY][XX] == 0 && prVelocityScalingMatrix[ZZ][XX] == 0
250 && prVelocityScalingMatrix[ZZ][YY] == 0 && prVelocityScalingMatrix[XX][YY] == 0
251 && prVelocityScalingMatrix[XX][ZZ] == 0 && prVelocityScalingMatrix[YY][ZZ] == 0,
252 "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
253 "in GPU version of Leap-Frog integrator.");
254 prVelocityScalingMatrixDiagonal_ =
255 dtPressureCouple
256 * float3{ prVelocityScalingMatrix[XX][XX], prVelocityScalingMatrix[YY][YY],
257 prVelocityScalingMatrix[ZZ][ZZ] };
258 }
259
260 launchLeapFrogKernel(tempVelocityScalingType, prVelocityScalingType, deviceStream_, numAtoms_,
261 d_x, d_xp, d_v, d_f, d_inverseMasses_, dt, d_lambdas_, d_tempScaleGroups_,
262 prVelocityScalingMatrixDiagonal_);
263 }
264
LeapFrogGpu(const DeviceContext & deviceContext,const DeviceStream & deviceStream)265 LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext, const DeviceStream& deviceStream) :
266 deviceContext_(deviceContext),
267 deviceStream_(deviceStream),
268 numAtoms_(0)
269 {
270 }
271
~LeapFrogGpu()272 LeapFrogGpu::~LeapFrogGpu()
273 {
274 freeDeviceBuffer(&d_inverseMasses_);
275 }
276
set(const int numAtoms,const real * inverseMasses,const int numTempScaleValues,const unsigned short * tempScaleGroups)277 void LeapFrogGpu::set(const int numAtoms,
278 const real* inverseMasses,
279 const int numTempScaleValues,
280 const unsigned short* tempScaleGroups)
281 {
282 numAtoms_ = numAtoms;
283 numTempScaleValues_ = numTempScaleValues;
284
285 reallocateDeviceBuffer(&d_inverseMasses_, numAtoms_, &numInverseMasses_,
286 &numInverseMassesAlloc_, deviceContext_);
287 copyToDeviceBuffer(&d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_,
288 GpuApiCallBehavior::Sync, nullptr);
289
290 // Temperature scale group map only used if there are more then one group
291 if (numTempScaleValues_ > 1)
292 {
293 reallocateDeviceBuffer(&d_tempScaleGroups_, numAtoms_, &numTempScaleGroups_,
294 &numTempScaleGroupsAlloc_, deviceContext_);
295 copyToDeviceBuffer(&d_tempScaleGroups_, tempScaleGroups, 0, numAtoms_, deviceStream_,
296 GpuApiCallBehavior::Sync, nullptr);
297 }
298
299 // If the temperature coupling is enabled, we need to make space for scaling factors
300 if (numTempScaleValues_ > 0)
301 {
302 reallocateDeviceBuffer(&d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_,
303 deviceContext_);
304 }
305 }
306
307 } // namespace gmx
308