1 /* 2 * This file is part of the GROMACS molecular simulation package. 3 * 4 * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team. 5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by 6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, 7 * and including many others, as listed in the AUTHORS file in the 8 * top-level source directory and at http://www.gromacs.org. 9 * 10 * GROMACS is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Lesser General Public License 12 * as published by the Free Software Foundation; either version 2.1 13 * of the License, or (at your option) any later version. 14 * 15 * GROMACS is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Lesser General Public License for more details. 19 * 20 * You should have received a copy of the GNU Lesser General Public 21 * License along with GROMACS; if not, see 22 * http://www.gnu.org/licenses, or write to the Free Software Foundation, 23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 * If you want to redistribute modifications to GROMACS, please 26 * consider that scientific software is very special. Version 27 * control is crucial - bugs must be traceable. We will be happy to 28 * consider code for inclusion in the official distribution, but 29 * derived work must not be called official GROMACS. Details are found 30 * in the README & COPYING files - if they are missing, get the 31 * official version at http://www.gromacs.org. 32 * 33 * To help us fund GROMACS development, we humbly ask that you cite 34 * the research papers on the package. Check out http://www.gromacs.org. 35 */ 36 37 /*! \internal \file 38 * \brief 39 * Data types used internally in the nbnxm_ocl module. 40 * 41 * \author Anca Hamuraru <anca@streamcomputing.eu> 42 * \author Szilárd Páll <pszilard@kth.se> 43 * \ingroup module_nbnxm 44 */ 45 46 #ifndef GMX_NBNXM_NBNXM_OPENCL_TYPES_H 47 #define GMX_NBNXM_NBNXM_OPENCL_TYPES_H 48 49 #include "gromacs/gpu_utils/devicebuffer.h" 50 #include "gromacs/gpu_utils/gmxopencl.h" 51 #include "gromacs/gpu_utils/gputraits_ocl.h" 52 #include "gromacs/gpu_utils/oclutils.h" 53 #include "gromacs/mdtypes/interaction_const.h" 54 #include "gromacs/nbnxm/gpu_types_common.h" 55 #include "gromacs/nbnxm/nbnxm.h" 56 #include "gromacs/nbnxm/nbnxm_gpu.h" 57 #include "gromacs/nbnxm/pairlist.h" 58 #include "gromacs/utility/enumerationhelpers.h" 59 #include "gromacs/utility/fatalerror.h" 60 #include "gromacs/utility/real.h" 61 62 #include "nbnxm_ocl_consts.h" 63 64 struct gmx_wallclock_gpu_nbnxn_t; 65 66 /* kernel does #include "gromacs/math/utilities.h" */ 67 /* Move the actual useful stuff here: */ 68 69 //! Define 1/sqrt(pi) 70 #define M_FLOAT_1_SQRTPI 0.564189583547756f 71 72 /*! \brief Constants for platform-dependent defaults for the prune kernel's j4 processing concurrency. 73 * 74 * Initialized using macros that can be overridden at compile-time (using #GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY). 75 */ 76 /*! @{ */ 77 const int c_oclPruneKernelJ4ConcurrencyDEFAULT = GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY_DEFAULT; 78 /*! @} */ 79 80 /*! \brief Pruning kernel flavors. 81 * 82 * The values correspond to the first call of the pruning post-list generation 83 * and the rolling pruning, respectively. 84 */ 85 enum ePruneKind 86 { 87 epruneFirst, 88 epruneRolling, 89 ePruneNR 90 }; 91 92 /*! \internal 93 * \brief Staging area for temporary data downloaded from the GPU. 94 * 95 * The energies/shift forces get downloaded here first, before getting added 96 * to the CPU-side aggregate values. 97 */ 98 struct nb_staging_t 99 { 100 //! LJ energy 101 float* e_lj = nullptr; 102 //! electrostatic energy 103 float* e_el = nullptr; 104 //! float3 buffer with shift forces 105 float (*fshift)[3] = nullptr; 106 }; 107 108 /*! \internal 109 * \brief Nonbonded atom data - both inputs and outputs. 110 */ 111 typedef struct cl_atomdata 112 { 113 //! number of atoms 114 int natoms; 115 //! number of local atoms 116 int natoms_local; 117 //! allocation size for the atom data (xq, f) 118 int nalloc; 119 120 //! float4 buffer with atom coordinates + charges, size natoms 121 DeviceBuffer<float> xq; 122 123 //! float3 buffer with force output array, size natoms 124 DeviceBuffer<float> f; 125 126 //! LJ energy output, size 1 127 DeviceBuffer<float> e_lj; 128 //! Electrostatics energy input, size 1 129 DeviceBuffer<float> e_el; 130 131 //! float3 buffer with shift forces 132 DeviceBuffer<float> fshift; 133 134 //! number of atom types 135 int ntypes; 136 //! int buffer with atom type indices, size natoms 137 DeviceBuffer<int> atom_types; 138 //! float2 buffer with sqrt(c6),sqrt(c12), size natoms 139 DeviceBuffer<float> lj_comb; 140 141 //! float3 buffer with shifts values 142 DeviceBuffer<float> shift_vec; 143 144 //! true if the shift vector has been uploaded 145 bool bShiftVecUploaded; 146 } cl_atomdata_t; 147 148 /*! \internal 149 * \brief Data structure shared between the OpenCL device code and OpenCL host code 150 * 151 * Must not contain OpenCL objects (buffers) 152 * TODO: review, improve */ 153 typedef struct cl_nbparam_params 154 { 155 156 //! type of electrostatics, takes values from #eelType 157 int eeltype; 158 //! type of VdW impl., takes values from #evdwType 159 int vdwtype; 160 161 //! charge multiplication factor 162 float epsfac; 163 //! Reaction-field/plain cutoff electrostatics const. 164 float c_rf; 165 //! Reaction-field electrostatics constant 166 float two_k_rf; 167 //! Ewald/PME parameter 168 float ewald_beta; 169 //! Ewald/PME correction term substracted from the direct-space potential 170 float sh_ewald; 171 //! LJ-Ewald/PME correction term added to the correction potential 172 float sh_lj_ewald; 173 //! LJ-Ewald/PME coefficient 174 float ewaldcoeff_lj; 175 176 //! Coulomb cut-off squared 177 float rcoulomb_sq; 178 179 //! VdW cut-off squared 180 float rvdw_sq; 181 //! VdW switched cut-off 182 float rvdw_switch; 183 //! Full, outer pair-list cut-off squared 184 float rlistOuter_sq; 185 //! Inner, dynamic pruned pair-list cut-off squared 186 float rlistInner_sq; 187 188 //! VdW shift dispersion constants 189 shift_consts_t dispersion_shift; 190 //! VdW shift repulsion constants 191 shift_consts_t repulsion_shift; 192 //! VdW switch constants 193 switch_consts_t vdw_switch; 194 195 /* Ewald Coulomb force table data - accessed through texture memory */ 196 //! table scale/spacing 197 float coulomb_tab_scale; 198 } cl_nbparam_params_t; 199 200 201 /** \internal 202 * \brief Typedef of actual timer type. 203 */ 204 typedef struct Nbnxm::gpu_timers_t cl_timers_t; 205 206 /*! \internal 207 * \brief Main data structure for OpenCL nonbonded force calculations. 208 */ 209 struct NbnxmGpu 210 { 211 /* \brief OpenCL device context 212 * 213 * \todo Make it constant reference, once NbnxmGpu is a proper class. 214 */ 215 const DeviceContext* deviceContext_; 216 //! OpenCL runtime data (context, kernels) 217 struct gmx_device_runtime_data_t* dev_rundata = nullptr; 218 219 /**< Pointers to non-bonded kernel functions 220 * organized similar with nb_kfunc_xxx arrays in nbnxn_ocl.cpp */ 221 ///@{ 222 cl_kernel kernel_noener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; 223 cl_kernel kernel_ener_noprune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; 224 cl_kernel kernel_noener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; 225 cl_kernel kernel_ener_prune_ptr[eelTypeNR][evdwTypeNR] = { { nullptr } }; 226 ///@} 227 //! prune kernels, ePruneKind defined the kernel kinds 228 cl_kernel kernel_pruneonly[ePruneNR] = { nullptr }; 229 230 //! true if prefetching fg i-atom LJ parameters should be used in the kernels 231 bool bPrefetchLjParam = false; 232 233 /**< auxiliary kernels implementing memset-like functions */ 234 ///@{ 235 cl_kernel kernel_memset_f = nullptr; 236 cl_kernel kernel_memset_f2 = nullptr; 237 cl_kernel kernel_memset_f3 = nullptr; 238 cl_kernel kernel_zero_e_fshift = nullptr; 239 ///@} 240 241 //! true if doing both local/non-local NB work on GPU 242 bool bUseTwoStreams = false; 243 //! true indicates that the nonlocal_done event was enqueued 244 bool bNonLocalStreamActive = false; 245 246 //! atom data 247 cl_atomdata_t* atdat = nullptr; 248 //! parameters required for the non-bonded calc. 249 NBParamGpu* nbparam = nullptr; 250 //! pair-list data structures (local and non-local) 251 gmx::EnumerationArray<Nbnxm::InteractionLocality, Nbnxm::gpu_plist*> plist = { nullptr }; 252 //! staging area where fshift/energies get downloaded 253 nb_staging_t nbst; 254 255 //! local and non-local GPU queues 256 gmx::EnumerationArray<Nbnxm::InteractionLocality, const DeviceStream*> deviceStreams; 257 258 /*! \brief Events used for synchronization */ 259 /*! \{ */ 260 /*! \brief Event triggered when the non-local non-bonded 261 * kernel is done (and the local transfer can proceed) */ 262 cl_event nonlocal_done = nullptr; 263 /*! \brief Event triggered when the tasks issued in the local 264 * stream that need to precede the non-local force or buffer 265 * operation calculations are done (e.g. f buffer 0-ing, local 266 * x/q H2D, buffer op initialization in local stream that is 267 * required also by nonlocal stream ) */ 268 cl_event misc_ops_and_local_H2D_done = nullptr; 269 /*! \} */ 270 271 //! True if there has been local/nonlocal GPU work, either bonded or nonbonded, scheduled 272 // to be executed in the current domain. As long as bonded work is not split up into 273 // local/nonlocal, if there is bonded GPU work, both flags will be true. 274 gmx::EnumerationArray<Nbnxm::InteractionLocality, bool> haveWork; 275 276 277 //! True if event-based timing is enabled. 278 bool bDoTime = false; 279 //! OpenCL event-based timers. 280 cl_timers_t* timers = nullptr; 281 //! Timing data. TODO: deprecate this and query timers for accumulated data instead 282 gmx_wallclock_gpu_nbnxn_t* timings = nullptr; 283 }; 284 285 #endif /* NBNXN_OPENCL_TYPES_H */ 286