1 /* ---------------------------------------------------------------------- 2 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator 3 4 Original Version: 5 http://lammps.sandia.gov, Sandia National Laboratories 6 Steve Plimpton, sjplimp@sandia.gov 7 8 See the README file in the top-level LAMMPS directory. 9 10 ----------------------------------------------------------------------- 11 12 USER-CUDA Package and associated modifications: 13 https://sourceforge.net/projects/lammpscuda/ 14 15 Christian Trott, christian.trott@tu-ilmenau.de 16 Lars Winterfeld, lars.winterfeld@tu-ilmenau.de 17 Theoretical Physics II, University of Technology Ilmenau, Germany 18 19 See the README file in the USER-CUDA directory. 20 21 This software is distributed under the GNU General Public License. 22 ------------------------------------------------------------------------- */ 23 24 #ifndef _CUDA_COMMON_H_ 25 #define _CUDA_COMMON_H_ 26 27 //#include "cutil.h" 28 #include "cuda_precision.h" 29 #include "cuda_wrapper_cu.h" 30 31 #define CUDA_MAX_TYPES_PLUS_ONE 12 //for pair styles which use constant space for parameters, this needs to be one larger than the number of atom types 32 //this can not be arbitrarly large, since constant space is limited. 33 //in principle one could alter potentials to use global memory for parameters, some du that already since the first examples I encountered had a high number (20+) of atom types 34 //Christian 35 #define CUDA_MAX_TYPES2 (CUDA_MAX_TYPES_PLUS_ONE * CUDA_MAX_TYPES_PLUS_ONE) 36 #define CUDA_MAX_NSPECIAL 25 37 38 // define some easy-to-use debug and emulation macros 39 #ifdef _DEBUG 40 #define MYDBG(a) a 41 #else 42 #define MYDBG(a) 43 #endif 44 45 #if __DEVICE_EMULATION__ 46 #define MYEMU(a) a 47 #else 48 #define MYEMU(a) 49 #endif 50 51 #define MYEMUDBG(a) MYEMU(MYDBG(a)) 52 53 // Add Prefix (needed as workaround, same constant's names in different files causes conflict) 54 #define MY_ADD_PREFIX(prefix, var) prefix##_##var 55 #define MY_ADD_PREFIX2(prefix, var) MY_ADD_PREFIX(prefix, var) 56 #define MY_AP(var) MY_ADD_PREFIX2(MY_PREFIX, var) 57 58 #define MY_VAR_TO_STR(var) #var 59 #define MY_VAR_TO_STR2(var) MY_VAR_TO_STR(var) 60 //#define &MY_AP(var) (MY_VAR_TO_STR2(MY_PREFIX) "_" MY_VAR_TO_STR2(var)) 61 //#define &MY_AP(var) &(MY_AP(var)) 62 #define CUDA_USE_TEXTURE 63 #define CUDA_USE_FLOAT4 64 65 //constants used by many classes 66 67 //domain 68 #define _boxhi MY_AP(boxhi) 69 #define _boxlo MY_AP(boxlo) 70 #define _subhi MY_AP(subhi) 71 #define _sublo MY_AP(sublo) 72 #define _box_size MY_AP(box_size) 73 #define _prd MY_AP(prd) 74 #define _periodicity MY_AP(periodicity) 75 #define _triclinic MY_AP(triclinic) 76 #define _boxhi_lamda MY_AP(boxhi_lamda) 77 #define _boxlo_lamda MY_AP(boxlo_lamda) 78 #define _prd_lamda MY_AP(prd_lamda) 79 #define _h MY_AP(h) 80 #define _h_inv MY_AP(h_inv) 81 #define _h_rate MY_AP(h_rate) 82 __device__ __constant__ X_FLOAT _boxhi[3]; 83 __device__ __constant__ X_FLOAT _boxlo[3]; 84 __device__ __constant__ X_FLOAT _subhi[3]; 85 __device__ __constant__ X_FLOAT _sublo[3]; 86 __device__ __constant__ X_FLOAT _box_size[3]; 87 __device__ __constant__ X_FLOAT _prd[3]; 88 __device__ __constant__ int _periodicity[3]; 89 __device__ __constant__ int _triclinic; 90 __device__ __constant__ X_FLOAT _boxhi_lamda[3]; 91 __device__ __constant__ X_FLOAT _boxlo_lamda[3]; 92 __device__ __constant__ X_FLOAT _prd_lamda[3]; 93 __device__ __constant__ X_FLOAT _h[6]; 94 __device__ __constant__ X_FLOAT _h_inv[6]; 95 __device__ __constant__ V_FLOAT _h_rate[6]; 96 97 98 //atom properties 99 #define _x MY_AP(x) 100 #define _v MY_AP(v) 101 #define _f MY_AP(f) 102 #define _tag MY_AP(tag) 103 #define _type MY_AP(type) 104 #define _mask MY_AP(mask) 105 #define _image MY_AP(image) 106 #define _q MY_AP(q) 107 #define _mass MY_AP(mass) 108 #define _rmass MY_AP(rmass) 109 #define _rmass_flag MY_AP(rmass_flag) 110 #define _eatom MY_AP(eatom) 111 #define _vatom MY_AP(vatom) 112 #define _x_type MY_AP(x_type) 113 #define _radius MY_AP(radius) 114 #define _density MY_AP(density) 115 #define _omega MY_AP(omega) 116 #define _torque MY_AP(torque) 117 #define _special MY_AP(special) 118 #define _maxspecial MY_AP(maxspecial) 119 #define _nspecial MY_AP(nspecial) 120 #define _special_flag MY_AP(special_flag) 121 #define _molecule MY_AP(molecule) 122 #define _v_radius MY_AP(v_radius) 123 #define _omega_rmass MY_AP(omega_rmass) 124 #define _freeze_group_bit MY_AP(freeze_group_bit) 125 #define _map_array MY_AP(map_array) 126 __device__ __constant__ X_FLOAT* _x; //holds pointer to positions 127 __device__ __constant__ V_FLOAT* _v; 128 __device__ __constant__ F_FLOAT* _f; 129 __device__ __constant__ int* _tag; 130 __device__ __constant__ int* _type; 131 __device__ __constant__ int* _mask; 132 __device__ __constant__ int* _image; 133 __device__ __constant__ V_FLOAT* _mass; 134 __device__ __constant__ F_FLOAT* _q; 135 __device__ __constant__ V_FLOAT* _rmass; 136 __device__ __constant__ int _rmass_flag; 137 __device__ __constant__ ENERGY_FLOAT* _eatom; 138 __device__ __constant__ ENERGY_FLOAT* _vatom; 139 __device__ __constant__ X_FLOAT4* _x_type; //holds pointer to positions 140 __device__ __constant__ X_FLOAT* _radius; 141 __device__ __constant__ F_FLOAT* _density; 142 __device__ __constant__ V_FLOAT* _omega; 143 __device__ __constant__ F_FLOAT* _torque; 144 __device__ __constant__ int* _special; 145 __device__ __constant__ int _maxspecial; 146 __device__ __constant__ int* _nspecial; 147 __device__ __constant__ int _special_flag[4]; 148 __device__ __constant__ int* _molecule; 149 __device__ __constant__ V_FLOAT4* _v_radius; //holds pointer to positions 150 __device__ __constant__ V_FLOAT4* _omega_rmass; //holds pointer to positions 151 __device__ __constant__ int _freeze_group_bit; 152 __device__ __constant__ int* _map_array; 153 154 #ifdef CUDA_USE_TEXTURE 155 156 #define _x_tex MY_AP(x_tex) 157 #if X_PRECISION == 1 158 texture<float> _x_tex; 159 #else 160 texture<int2, 1> _x_tex; 161 #endif 162 163 #define _type_tex MY_AP(type_tex) 164 texture<int> _type_tex; 165 166 #define _x_type_tex MY_AP(x_type_tex) 167 #if X_PRECISION == 1 168 texture<float4, 1> _x_type_tex; 169 #else 170 texture<int4, 1> _x_type_tex; 171 #endif 172 173 #define _v_radius_tex MY_AP(v_radius_tex) 174 #if V_PRECISION == 1 175 texture<float4, 1> _v_radius_tex; 176 #else 177 texture<int4, 1> _v_radius_tex; 178 #endif 179 180 #define _omega_rmass_tex MY_AP(omega_rmass_tex) 181 #if V_PRECISION == 1 182 texture<float4, 1> _omega_rmass_tex; 183 #else 184 texture<int4, 1> _omega_rmass_tex; 185 #endif 186 187 #define _q_tex MY_AP(q_tex) 188 #if F_PRECISION == 1 189 texture<float> _q_tex; 190 #else 191 texture<int2, 1> _q_tex; 192 #endif 193 194 #endif 195 196 //neighbor 197 #ifdef IncludeCommonNeigh 198 #define _inum MY_AP(inum) 199 #define _inum_border MY_AP(inum_border) 200 #define _ilist MY_AP(ilist) 201 #define _ilist_border MY_AP(ilist_border) 202 #define _numneigh MY_AP(numneigh) 203 #define _numneigh_border MY_AP(numneigh_border) 204 #define _numneigh_inner MY_AP(numneigh_inner) 205 #define _firstneigh MY_AP(firstneigh) 206 #define _neighbors MY_AP(neighbors) 207 #define _neighbors_border MY_AP(neighbors_border) 208 #define _neighbors_inner MY_AP(neighbors_inner) 209 #define _reneigh_flag MY_AP(reneigh_flag) 210 #define _triggerneighsq MY_AP(triggerneighsq) 211 #define _xhold MY_AP(xhold) 212 #define _maxhold MY_AP(maxhold) 213 #define _dist_check MY_AP(dist_check) 214 #define _neighbor_maxlocal MY_AP(neighbor_maxlocal) 215 #define _maxneighbors MY_AP(maxneighbors) 216 #define _overlap_comm MY_AP(overlap_comm) 217 __device__ __constant__ int _inum; 218 __device__ __constant__ int* _inum_border; 219 __device__ __constant__ int* _ilist; 220 __device__ __constant__ int* _ilist_border; 221 __device__ __constant__ int* _numneigh; 222 __device__ __constant__ int* _numneigh_border; 223 __device__ __constant__ int* _numneigh_inner; 224 __device__ __constant__ int** _firstneigh; 225 __device__ __constant__ int* _neighbors; 226 __device__ __constant__ int* _neighbors_border; 227 __device__ __constant__ int* _neighbors_inner; 228 __device__ __constant__ int* _reneigh_flag; 229 __device__ __constant__ X_FLOAT _triggerneighsq; 230 __device__ __constant__ X_FLOAT* _xhold; //holds pointer to positions 231 __device__ __constant__ int _maxhold; 232 __device__ __constant__ int _dist_check; 233 __device__ __constant__ int _neighbor_maxlocal; 234 __device__ __constant__ int _maxneighbors; 235 __device__ __constant__ int _overlap_comm; 236 #endif 237 238 //system properties 239 #define _nall MY_AP(nall) 240 #define _nghost MY_AP(nghost) 241 #define _nlocal MY_AP(nlocal) 242 #define _nmax MY_AP(nmax) 243 #define _cuda_ntypes MY_AP(cuda_ntypes) 244 #define _dtf MY_AP(dtf) 245 #define _dtv MY_AP(dtv) 246 #define _factor MY_AP(factor) 247 #define _virial MY_AP(virial) 248 #define _eng_vdwl MY_AP(eng_vdwl) 249 #define _eng_coul MY_AP(eng_coul) 250 #define _molecular MY_AP(molecular) 251 __device__ __constant__ unsigned _nall; 252 __device__ __constant__ unsigned _nghost; 253 __device__ __constant__ unsigned _nlocal; 254 __device__ __constant__ unsigned _nmax; 255 __device__ __constant__ unsigned _cuda_ntypes; 256 __device__ __constant__ V_FLOAT _dtf; 257 __device__ __constant__ X_FLOAT _dtv; 258 __device__ __constant__ V_FLOAT _factor; 259 __device__ __constant__ ENERGY_FLOAT* _virial; 260 __device__ __constant__ ENERGY_FLOAT* _eng_vdwl; 261 __device__ __constant__ ENERGY_FLOAT* _eng_coul; 262 __device__ __constant__ int _molecular; 263 264 //other general constants 265 #define _buffer MY_AP(buffer) 266 #define _flag MY_AP(flag) 267 #define _debugdata MY_AP(debugdata) 268 __device__ __constant__ void* _buffer; 269 __device__ __constant__ int* _flag; 270 __device__ __constant__ int* _debugdata; 271 272 // pointers to data fields on GPU are hold in constant space 273 // -> reduces register usage and number of parameters for kernelcalls 274 // will be variables of file scope in cuda files 275 276 277 278 279 // maybe used to output cudaError_t 280 #define MY_OUTPUT_RESULT(result) \ 281 switch(result) \ 282 { \ 283 case cudaSuccess: printf(" => cudaSuccess\n"); break; \ 284 case cudaErrorInvalidValue: printf(" => cudaErrorInvalidValue\n"); break; \ 285 case cudaErrorInvalidSymbol: printf(" => cudaErrorInvalidSymbol\n"); break; \ 286 case cudaErrorInvalidDevicePointer: printf(" => cudaErrorInvalidDevicePointer\n"); break; \ 287 case cudaErrorInvalidMemcpyDirection: printf(" => cudaErrorInvalidMemcpyDirection\n"); break; \ 288 default: printf(" => unknown\n"); break; \ 289 } 290 291 #ifdef _DEBUG 292 # define CUT_CHECK_ERROR(errorMessage) { \ 293 cudaError_t err = cudaGetLastError(); \ 294 if( cudaSuccess != err) { \ 295 fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ 296 errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ 297 exit(EXIT_FAILURE); \ 298 } \ 299 err = cudaThreadSynchronize(); \ 300 if( cudaSuccess != err) { \ 301 fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ 302 errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ 303 exit(EXIT_FAILURE); \ 304 } \ 305 } 306 #else 307 # define CUT_CHECK_ERROR(errorMessage) { \ 308 cudaError_t err = cudaGetLastError(); \ 309 if( cudaSuccess != err) { \ 310 fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ 311 errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ 312 exit(EXIT_FAILURE); \ 313 } \ 314 } 315 #endif 316 317 # define CUDA_SAFE_CALL_NO_SYNC( call) { \ 318 cudaError err = call; \ 319 if( cudaSuccess != err) { \ 320 fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ 321 __FILE__, __LINE__, cudaGetErrorString( err) ); \ 322 exit(EXIT_FAILURE); \ 323 } } 324 325 # define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); 326 327 #define X_MASK 1 328 #define V_MASK 2 329 #define F_MASK 4 330 #define TAG_MASK 8 331 #define TYPE_MASK 16 332 #define MASK_MASK 32 333 #define IMAGE_MASK 64 334 #define Q_MASK 128 335 #define MOLECULE_MASK 256 336 #define RMASS_MASK 512 337 #define RADIUS_MASK 1024 338 #define DENSITY_MASK 2048 339 #define OMEGA_MASK 4096 340 #define TORQUE_MASK 8192 341 342 343 344 #endif // #ifdef _CUDA_COMMON_H_ 345