/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator Original Version: http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov See the README file in the top-level LAMMPS directory. ----------------------------------------------------------------------- USER-CUDA Package and associated modifications: https://sourceforge.net/projects/lammpscuda/ Christian Trott, christian.trott@tu-ilmenau.de Lars Winterfeld, lars.winterfeld@tu-ilmenau.de Theoretical Physics II, University of Technology Ilmenau, Germany See the README file in the USER-CUDA directory. This software is distributed under the GNU General Public License. ------------------------------------------------------------------------- */ #ifndef _CUDA_COMMON_H_ #define _CUDA_COMMON_H_ //#include "cutil.h" #include "cuda_precision.h" #include "cuda_wrapper_cu.h" #define CUDA_MAX_TYPES_PLUS_ONE 12 //for pair styles which use constant space for parameters, this needs to be one larger than the number of atom types //this can not be arbitrarly large, since constant space is limited. //in principle one could alter potentials to use global memory for parameters, some du that already since the first examples I encountered had a high number (20+) of atom types //Christian #define CUDA_MAX_TYPES2 (CUDA_MAX_TYPES_PLUS_ONE * CUDA_MAX_TYPES_PLUS_ONE) #define CUDA_MAX_NSPECIAL 25 // define some easy-to-use debug and emulation macros #ifdef _DEBUG #define MYDBG(a) a #else #define MYDBG(a) #endif #if __DEVICE_EMULATION__ #define MYEMU(a) a #else #define MYEMU(a) #endif #define MYEMUDBG(a) MYEMU(MYDBG(a)) // Add Prefix (needed as workaround, same constant's names in different files causes conflict) #define MY_ADD_PREFIX(prefix, var) prefix##_##var #define MY_ADD_PREFIX2(prefix, var) MY_ADD_PREFIX(prefix, var) #define MY_AP(var) MY_ADD_PREFIX2(MY_PREFIX, var) #define MY_VAR_TO_STR(var) #var #define MY_VAR_TO_STR2(var) MY_VAR_TO_STR(var) //#define &MY_AP(var) (MY_VAR_TO_STR2(MY_PREFIX) "_" MY_VAR_TO_STR2(var)) //#define &MY_AP(var) &(MY_AP(var)) #define CUDA_USE_TEXTURE #define CUDA_USE_FLOAT4 //constants used by many classes //domain #define _boxhi MY_AP(boxhi) #define _boxlo MY_AP(boxlo) #define _subhi MY_AP(subhi) #define _sublo MY_AP(sublo) #define _box_size MY_AP(box_size) #define _prd MY_AP(prd) #define _periodicity MY_AP(periodicity) #define _triclinic MY_AP(triclinic) #define _boxhi_lamda MY_AP(boxhi_lamda) #define _boxlo_lamda MY_AP(boxlo_lamda) #define _prd_lamda MY_AP(prd_lamda) #define _h MY_AP(h) #define _h_inv MY_AP(h_inv) #define _h_rate MY_AP(h_rate) __device__ __constant__ X_FLOAT _boxhi[3]; __device__ __constant__ X_FLOAT _boxlo[3]; __device__ __constant__ X_FLOAT _subhi[3]; __device__ __constant__ X_FLOAT _sublo[3]; __device__ __constant__ X_FLOAT _box_size[3]; __device__ __constant__ X_FLOAT _prd[3]; __device__ __constant__ int _periodicity[3]; __device__ __constant__ int _triclinic; __device__ __constant__ X_FLOAT _boxhi_lamda[3]; __device__ __constant__ X_FLOAT _boxlo_lamda[3]; __device__ __constant__ X_FLOAT _prd_lamda[3]; __device__ __constant__ X_FLOAT _h[6]; __device__ __constant__ X_FLOAT _h_inv[6]; __device__ __constant__ V_FLOAT _h_rate[6]; //atom properties #define _x MY_AP(x) #define _v MY_AP(v) #define _f MY_AP(f) #define _tag MY_AP(tag) #define _type MY_AP(type) #define _mask MY_AP(mask) #define _image MY_AP(image) #define _q MY_AP(q) #define _mass MY_AP(mass) #define _rmass MY_AP(rmass) #define _rmass_flag MY_AP(rmass_flag) #define _eatom MY_AP(eatom) #define _vatom MY_AP(vatom) #define _x_type MY_AP(x_type) #define _radius MY_AP(radius) #define _density MY_AP(density) #define _omega MY_AP(omega) #define _torque MY_AP(torque) #define _special MY_AP(special) #define _maxspecial MY_AP(maxspecial) #define _nspecial MY_AP(nspecial) #define _special_flag MY_AP(special_flag) #define _molecule MY_AP(molecule) #define _v_radius MY_AP(v_radius) #define _omega_rmass MY_AP(omega_rmass) #define _freeze_group_bit MY_AP(freeze_group_bit) #define _map_array MY_AP(map_array) __device__ __constant__ X_FLOAT* _x; //holds pointer to positions __device__ __constant__ V_FLOAT* _v; __device__ __constant__ F_FLOAT* _f; __device__ __constant__ int* _tag; __device__ __constant__ int* _type; __device__ __constant__ int* _mask; __device__ __constant__ int* _image; __device__ __constant__ V_FLOAT* _mass; __device__ __constant__ F_FLOAT* _q; __device__ __constant__ V_FLOAT* _rmass; __device__ __constant__ int _rmass_flag; __device__ __constant__ ENERGY_FLOAT* _eatom; __device__ __constant__ ENERGY_FLOAT* _vatom; __device__ __constant__ X_FLOAT4* _x_type; //holds pointer to positions __device__ __constant__ X_FLOAT* _radius; __device__ __constant__ F_FLOAT* _density; __device__ __constant__ V_FLOAT* _omega; __device__ __constant__ F_FLOAT* _torque; __device__ __constant__ int* _special; __device__ __constant__ int _maxspecial; __device__ __constant__ int* _nspecial; __device__ __constant__ int _special_flag[4]; __device__ __constant__ int* _molecule; __device__ __constant__ V_FLOAT4* _v_radius; //holds pointer to positions __device__ __constant__ V_FLOAT4* _omega_rmass; //holds pointer to positions __device__ __constant__ int _freeze_group_bit; __device__ __constant__ int* _map_array; #ifdef CUDA_USE_TEXTURE #define _x_tex MY_AP(x_tex) #if X_PRECISION == 1 texture _x_tex; #else texture _x_tex; #endif #define _type_tex MY_AP(type_tex) texture _type_tex; #define _x_type_tex MY_AP(x_type_tex) #if X_PRECISION == 1 texture _x_type_tex; #else texture _x_type_tex; #endif #define _v_radius_tex MY_AP(v_radius_tex) #if V_PRECISION == 1 texture _v_radius_tex; #else texture _v_radius_tex; #endif #define _omega_rmass_tex MY_AP(omega_rmass_tex) #if V_PRECISION == 1 texture _omega_rmass_tex; #else texture _omega_rmass_tex; #endif #define _q_tex MY_AP(q_tex) #if F_PRECISION == 1 texture _q_tex; #else texture _q_tex; #endif #endif //neighbor #ifdef IncludeCommonNeigh #define _inum MY_AP(inum) #define _inum_border MY_AP(inum_border) #define _ilist MY_AP(ilist) #define _ilist_border MY_AP(ilist_border) #define _numneigh MY_AP(numneigh) #define _numneigh_border MY_AP(numneigh_border) #define _numneigh_inner MY_AP(numneigh_inner) #define _firstneigh MY_AP(firstneigh) #define _neighbors MY_AP(neighbors) #define _neighbors_border MY_AP(neighbors_border) #define _neighbors_inner MY_AP(neighbors_inner) #define _reneigh_flag MY_AP(reneigh_flag) #define _triggerneighsq MY_AP(triggerneighsq) #define _xhold MY_AP(xhold) #define _maxhold MY_AP(maxhold) #define _dist_check MY_AP(dist_check) #define _neighbor_maxlocal MY_AP(neighbor_maxlocal) #define _maxneighbors MY_AP(maxneighbors) #define _overlap_comm MY_AP(overlap_comm) __device__ __constant__ int _inum; __device__ __constant__ int* _inum_border; __device__ __constant__ int* _ilist; __device__ __constant__ int* _ilist_border; __device__ __constant__ int* _numneigh; __device__ __constant__ int* _numneigh_border; __device__ __constant__ int* _numneigh_inner; __device__ __constant__ int** _firstneigh; __device__ __constant__ int* _neighbors; __device__ __constant__ int* _neighbors_border; __device__ __constant__ int* _neighbors_inner; __device__ __constant__ int* _reneigh_flag; __device__ __constant__ X_FLOAT _triggerneighsq; __device__ __constant__ X_FLOAT* _xhold; //holds pointer to positions __device__ __constant__ int _maxhold; __device__ __constant__ int _dist_check; __device__ __constant__ int _neighbor_maxlocal; __device__ __constant__ int _maxneighbors; __device__ __constant__ int _overlap_comm; #endif //system properties #define _nall MY_AP(nall) #define _nghost MY_AP(nghost) #define _nlocal MY_AP(nlocal) #define _nmax MY_AP(nmax) #define _cuda_ntypes MY_AP(cuda_ntypes) #define _dtf MY_AP(dtf) #define _dtv MY_AP(dtv) #define _factor MY_AP(factor) #define _virial MY_AP(virial) #define _eng_vdwl MY_AP(eng_vdwl) #define _eng_coul MY_AP(eng_coul) #define _molecular MY_AP(molecular) __device__ __constant__ unsigned _nall; __device__ __constant__ unsigned _nghost; __device__ __constant__ unsigned _nlocal; __device__ __constant__ unsigned _nmax; __device__ __constant__ unsigned _cuda_ntypes; __device__ __constant__ V_FLOAT _dtf; __device__ __constant__ X_FLOAT _dtv; __device__ __constant__ V_FLOAT _factor; __device__ __constant__ ENERGY_FLOAT* _virial; __device__ __constant__ ENERGY_FLOAT* _eng_vdwl; __device__ __constant__ ENERGY_FLOAT* _eng_coul; __device__ __constant__ int _molecular; //other general constants #define _buffer MY_AP(buffer) #define _flag MY_AP(flag) #define _debugdata MY_AP(debugdata) __device__ __constant__ void* _buffer; __device__ __constant__ int* _flag; __device__ __constant__ int* _debugdata; // pointers to data fields on GPU are hold in constant space // -> reduces register usage and number of parameters for kernelcalls // will be variables of file scope in cuda files // maybe used to output cudaError_t #define MY_OUTPUT_RESULT(result) \ switch(result) \ { \ case cudaSuccess: printf(" => cudaSuccess\n"); break; \ case cudaErrorInvalidValue: printf(" => cudaErrorInvalidValue\n"); break; \ case cudaErrorInvalidSymbol: printf(" => cudaErrorInvalidSymbol\n"); break; \ case cudaErrorInvalidDevicePointer: printf(" => cudaErrorInvalidDevicePointer\n"); break; \ case cudaErrorInvalidMemcpyDirection: printf(" => cudaErrorInvalidMemcpyDirection\n"); break; \ default: printf(" => unknown\n"); break; \ } #ifdef _DEBUG # define CUT_CHECK_ERROR(errorMessage) { \ cudaError_t err = cudaGetLastError(); \ if( cudaSuccess != err) { \ fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ exit(EXIT_FAILURE); \ } \ err = cudaThreadSynchronize(); \ if( cudaSuccess != err) { \ fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ exit(EXIT_FAILURE); \ } \ } #else # define CUT_CHECK_ERROR(errorMessage) { \ cudaError_t err = cudaGetLastError(); \ if( cudaSuccess != err) { \ fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\ exit(EXIT_FAILURE); \ } \ } #endif # define CUDA_SAFE_CALL_NO_SYNC( call) { \ cudaError err = call; \ if( cudaSuccess != err) { \ fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \ __FILE__, __LINE__, cudaGetErrorString( err) ); \ exit(EXIT_FAILURE); \ } } # define CUDA_SAFE_CALL( call) CUDA_SAFE_CALL_NO_SYNC(call); #define X_MASK 1 #define V_MASK 2 #define F_MASK 4 #define TAG_MASK 8 #define TYPE_MASK 16 #define MASK_MASK 32 #define IMAGE_MASK 64 #define Q_MASK 128 #define MOLECULE_MASK 256 #define RMASS_MASK 512 #define RADIUS_MASK 1024 #define DENSITY_MASK 2048 #define OMEGA_MASK 4096 #define TORQUE_MASK 8192 #endif // #ifdef _CUDA_COMMON_H_