1 /* ----------------------------------------------------------------------
2    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3 
4    Original Version:
5    http://lammps.sandia.gov, Sandia National Laboratories
6    Steve Plimpton, sjplimp@sandia.gov
7 
8    See the README file in the top-level LAMMPS directory.
9 
10    -----------------------------------------------------------------------
11 
12    USER-CUDA Package and associated modifications:
13    https://sourceforge.net/projects/lammpscuda/
14 
15    Christian Trott, christian.trott@tu-ilmenau.de
16    Lars Winterfeld, lars.winterfeld@tu-ilmenau.de
17    Theoretical Physics II, University of Technology Ilmenau, Germany
18 
19    See the README file in the USER-CUDA directory.
20 
21    This software is distributed under the GNU General Public License.
22 ------------------------------------------------------------------------- */
23 
24 #ifndef _CUDA_COMMON_H_
25 #define _CUDA_COMMON_H_
26 
27 //#include "cutil.h"
28 #include "cuda_precision.h"
29 #include "cuda_wrapper_cu.h"
30 
31 #define CUDA_MAX_TYPES_PLUS_ONE 12 //for pair styles which use constant space for parameters, this needs to be one larger than the number of atom types
32 //this can not be arbitrarly large, since constant space is limited.
33 //in principle one could alter potentials to use global memory for parameters, some du that already since the first examples I encountered had a high number (20+) of atom types
34 //Christian
35 #define CUDA_MAX_TYPES2 (CUDA_MAX_TYPES_PLUS_ONE * CUDA_MAX_TYPES_PLUS_ONE)
36 #define CUDA_MAX_NSPECIAL 25
37 
38 // define some easy-to-use debug and emulation macros
39 #ifdef _DEBUG
40 #define MYDBG(a) a
41 #else
42 #define MYDBG(a)
43 #endif
44 
45 #if __DEVICE_EMULATION__
46 #define MYEMU(a) a
47 #else
48 #define MYEMU(a)
49 #endif
50 
51 #define MYEMUDBG(a) MYEMU(MYDBG(a))
52 
53 // Add Prefix (needed as workaround, same constant's names in different files causes conflict)
54 #define MY_ADD_PREFIX(prefix, var) prefix##_##var
55 #define MY_ADD_PREFIX2(prefix, var) MY_ADD_PREFIX(prefix, var)
56 #define MY_AP(var) MY_ADD_PREFIX2(MY_PREFIX, var)
57 
58 #define MY_VAR_TO_STR(var) #var
59 #define MY_VAR_TO_STR2(var) MY_VAR_TO_STR(var)
60 //#define &MY_AP(var) (MY_VAR_TO_STR2(MY_PREFIX) "_" MY_VAR_TO_STR2(var))
61 //#define &MY_AP(var) &(MY_AP(var))
62 #define CUDA_USE_TEXTURE
63 #define CUDA_USE_FLOAT4
64 
65 //constants used by many classes
66 
67 //domain
68 #define _boxhi       MY_AP(boxhi)
69 #define _boxlo       MY_AP(boxlo)
70 #define _subhi       MY_AP(subhi)
71 #define _sublo       MY_AP(sublo)
72 #define _box_size    MY_AP(box_size)
73 #define _prd         MY_AP(prd)
74 #define _periodicity MY_AP(periodicity)
75 #define _triclinic	 MY_AP(triclinic)
76 #define _boxhi_lamda MY_AP(boxhi_lamda)
77 #define _boxlo_lamda MY_AP(boxlo_lamda)
78 #define _prd_lamda   MY_AP(prd_lamda)
79 #define _h		 	 MY_AP(h)
80 #define _h_inv	 	 MY_AP(h_inv)
81 #define _h_rate		 MY_AP(h_rate)
82 __device__ __constant__ X_FLOAT _boxhi[3];
83 __device__ __constant__ X_FLOAT _boxlo[3];
84 __device__ __constant__ X_FLOAT _subhi[3];
85 __device__ __constant__ X_FLOAT _sublo[3];
86 __device__ __constant__ X_FLOAT _box_size[3];
87 __device__ __constant__ X_FLOAT _prd[3];
88 __device__ __constant__ int _periodicity[3];
89 __device__ __constant__ int _triclinic;
90 __device__ __constant__ X_FLOAT _boxhi_lamda[3];
91 __device__ __constant__ X_FLOAT _boxlo_lamda[3];
92 __device__ __constant__ X_FLOAT _prd_lamda[3];
93 __device__ __constant__ X_FLOAT _h[6];
94 __device__ __constant__ X_FLOAT _h_inv[6];
95 __device__ __constant__ V_FLOAT _h_rate[6];
96 
97 
98 //atom properties
99 #define _x           MY_AP(x)
100 #define _v           MY_AP(v)
101 #define _f           MY_AP(f)
102 #define _tag         MY_AP(tag)
103 #define _type        MY_AP(type)
104 #define _mask        MY_AP(mask)
105 #define _image       MY_AP(image)
106 #define _q           MY_AP(q)
107 #define _mass        MY_AP(mass)
108 #define _rmass       MY_AP(rmass)
109 #define _rmass_flag  MY_AP(rmass_flag)
110 #define _eatom       MY_AP(eatom)
111 #define _vatom       MY_AP(vatom)
112 #define _x_type      MY_AP(x_type)
113 #define _radius      MY_AP(radius)
114 #define _density     MY_AP(density)
115 #define _omega       MY_AP(omega)
116 #define _torque      MY_AP(torque)
117 #define _special     MY_AP(special)
118 #define _maxspecial  MY_AP(maxspecial)
119 #define _nspecial    MY_AP(nspecial)
120 #define _special_flag  MY_AP(special_flag)
121 #define _molecule    MY_AP(molecule)
122 #define _v_radius    MY_AP(v_radius)
123 #define _omega_rmass MY_AP(omega_rmass)
124 #define _freeze_group_bit MY_AP(freeze_group_bit)
125 #define _map_array   MY_AP(map_array)
126 __device__ __constant__ X_FLOAT* _x;  //holds pointer to positions
127 __device__ __constant__ V_FLOAT* _v;
128 __device__ __constant__ F_FLOAT* _f;
129 __device__ __constant__ int* _tag;
130 __device__ __constant__ int* _type;
131 __device__ __constant__ int* _mask;
132 __device__ __constant__ int* _image;
133 __device__ __constant__ V_FLOAT* _mass;
134 __device__ __constant__ F_FLOAT* _q;
135 __device__ __constant__ V_FLOAT* _rmass;
136 __device__ __constant__ int _rmass_flag;
137 __device__ __constant__ ENERGY_FLOAT* _eatom;
138 __device__ __constant__ ENERGY_FLOAT* _vatom;
139 __device__ __constant__ X_FLOAT4* _x_type;  //holds pointer to positions
140 __device__ __constant__ X_FLOAT* _radius;
141 __device__ __constant__ F_FLOAT* _density;
142 __device__ __constant__ V_FLOAT* _omega;
143 __device__ __constant__ F_FLOAT* _torque;
144 __device__ __constant__ int* _special;
145 __device__ __constant__ int _maxspecial;
146 __device__ __constant__ int* _nspecial;
147 __device__ __constant__ int _special_flag[4];
148 __device__ __constant__ int* _molecule;
149 __device__ __constant__ V_FLOAT4* _v_radius;  //holds pointer to positions
150 __device__ __constant__ V_FLOAT4* _omega_rmass;  //holds pointer to positions
151 __device__ __constant__ int _freeze_group_bit;
152 __device__ __constant__ int* _map_array;
153 
154 #ifdef CUDA_USE_TEXTURE
155 
156 #define _x_tex         MY_AP(x_tex)
157 #if X_PRECISION == 1
158 texture<float> _x_tex;
159 #else
160 texture<int2, 1> _x_tex;
161 #endif
162 
163 #define _type_tex         MY_AP(type_tex)
164 texture<int> _type_tex;
165 
166 #define _x_type_tex         MY_AP(x_type_tex)
167 #if X_PRECISION == 1
168 texture<float4, 1> _x_type_tex;
169 #else
170 texture<int4, 1> _x_type_tex;
171 #endif
172 
173 #define _v_radius_tex         MY_AP(v_radius_tex)
174 #if V_PRECISION == 1
175 texture<float4, 1> _v_radius_tex;
176 #else
177 texture<int4, 1> _v_radius_tex;
178 #endif
179 
180 #define _omega_rmass_tex         MY_AP(omega_rmass_tex)
181 #if V_PRECISION == 1
182 texture<float4, 1> _omega_rmass_tex;
183 #else
184 texture<int4, 1> _omega_rmass_tex;
185 #endif
186 
187 #define _q_tex         MY_AP(q_tex)
188 #if F_PRECISION == 1
189 texture<float> _q_tex;
190 #else
191 texture<int2, 1> _q_tex;
192 #endif
193 
194 #endif
195 
196 //neighbor
197 #ifdef IncludeCommonNeigh
198 #define _inum        	MY_AP(inum)
199 #define _inum_border    MY_AP(inum_border)
200 #define _ilist       	MY_AP(ilist)
201 #define _ilist_border 	MY_AP(ilist_border)
202 #define _numneigh    	MY_AP(numneigh)
203 #define _numneigh_border 	MY_AP(numneigh_border)
204 #define _numneigh_inner		MY_AP(numneigh_inner)
205 #define _firstneigh  	MY_AP(firstneigh)
206 #define _neighbors 	MY_AP(neighbors)
207 #define _neighbors_border 	MY_AP(neighbors_border)
208 #define _neighbors_inner  	MY_AP(neighbors_inner)
209 #define _reneigh_flag 	MY_AP(reneigh_flag)
210 #define _triggerneighsq MY_AP(triggerneighsq)
211 #define _xhold       	MY_AP(xhold)
212 #define _maxhold     	MY_AP(maxhold)
213 #define _dist_check     MY_AP(dist_check)
214 #define _neighbor_maxlocal MY_AP(neighbor_maxlocal)
215 #define _maxneighbors   MY_AP(maxneighbors)
216 #define _overlap_comm   MY_AP(overlap_comm)
217 __device__ __constant__ int _inum;
218 __device__ __constant__ int* _inum_border;
219 __device__ __constant__ int* _ilist;
220 __device__ __constant__ int* _ilist_border;
221 __device__ __constant__ int* _numneigh;
222 __device__ __constant__ int* _numneigh_border;
223 __device__ __constant__ int* _numneigh_inner;
224 __device__ __constant__ int** _firstneigh;
225 __device__ __constant__ int* _neighbors;
226 __device__ __constant__ int* _neighbors_border;
227 __device__ __constant__ int* _neighbors_inner;
228 __device__ __constant__ int* _reneigh_flag;
229 __device__ __constant__ X_FLOAT _triggerneighsq;
230 __device__ __constant__ X_FLOAT* _xhold;  //holds pointer to positions
231 __device__ __constant__ int _maxhold;
232 __device__ __constant__ int _dist_check;
233 __device__ __constant__ int _neighbor_maxlocal;
234 __device__ __constant__ int _maxneighbors;
235 __device__ __constant__ int _overlap_comm;
236 #endif
237 
238 //system properties
239 #define _nall        MY_AP(nall)
240 #define _nghost      MY_AP(nghost)
241 #define _nlocal      MY_AP(nlocal)
242 #define _nmax        MY_AP(nmax)
243 #define _cuda_ntypes MY_AP(cuda_ntypes)
244 #define _dtf         MY_AP(dtf)
245 #define _dtv         MY_AP(dtv)
246 #define _factor      MY_AP(factor)
247 #define _virial      MY_AP(virial)
248 #define _eng_vdwl    MY_AP(eng_vdwl)
249 #define _eng_coul    MY_AP(eng_coul)
250 #define _molecular   MY_AP(molecular)
251 __device__ __constant__ unsigned _nall;
252 __device__ __constant__ unsigned _nghost;
253 __device__ __constant__ unsigned _nlocal;
254 __device__ __constant__ unsigned _nmax;
255 __device__ __constant__ unsigned _cuda_ntypes;
256 __device__ __constant__ V_FLOAT _dtf;
257 __device__ __constant__ X_FLOAT _dtv;
258 __device__ __constant__ V_FLOAT _factor;
259 __device__ __constant__ ENERGY_FLOAT* _virial;
260 __device__ __constant__ ENERGY_FLOAT* _eng_vdwl;
261 __device__ __constant__ ENERGY_FLOAT* _eng_coul;
262 __device__ __constant__ int _molecular;
263 
264 //other general constants
265 #define _buffer      MY_AP(buffer)
266 #define _flag		 MY_AP(flag)
267 #define _debugdata   MY_AP(debugdata)
268 __device__ __constant__ void* _buffer;
269 __device__ __constant__ int* _flag;
270 __device__ __constant__ int* _debugdata;
271 
272 // pointers to data fields on GPU are hold in constant space
273 // -> reduces register usage and number of parameters for kernelcalls
274 // will be variables of file scope in cuda files
275 
276 
277 
278 
279 // maybe used to output cudaError_t
280 #define MY_OUTPUT_RESULT(result) \
281   switch(result) \
282   { \
283   case cudaSuccess: printf(" => cudaSuccess\n"); break; \
284   case cudaErrorInvalidValue: printf(" => cudaErrorInvalidValue\n"); break; \
285   case cudaErrorInvalidSymbol: printf(" => cudaErrorInvalidSymbol\n"); break; \
286   case cudaErrorInvalidDevicePointer: printf(" => cudaErrorInvalidDevicePointer\n"); break; \
287   case cudaErrorInvalidMemcpyDirection: printf(" => cudaErrorInvalidMemcpyDirection\n"); break; \
288   default: printf(" => unknown\n"); break; \
289   }
290 
291 #ifdef _DEBUG
292 #  define CUT_CHECK_ERROR(errorMessage) {                                    \
293     cudaError_t err = cudaGetLastError();                                    \
294     if( cudaSuccess != err) {                                                \
295       fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",    \
296               errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
297       exit(EXIT_FAILURE);                                                  \
298     }                                                                        \
299     err = cudaThreadSynchronize();                                           \
300     if( cudaSuccess != err) {                                                \
301       fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",    \
302               errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
303       exit(EXIT_FAILURE);                                                  \
304     }                                                                        \
305   }
306 #else
307 #  define CUT_CHECK_ERROR(errorMessage) {                                    \
308     cudaError_t err = cudaGetLastError();                                    \
309     if( cudaSuccess != err) {                                                \
310       fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n",    \
311               errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
312       exit(EXIT_FAILURE);                                                  \
313     }                                                                        \
314   }
315 #endif
316 
317 #  define CUDA_SAFE_CALL_NO_SYNC( call) {                                    \
318     cudaError err = call;                                                    \
319     if( cudaSuccess != err) {                                                \
320       fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",        \
321               __FILE__, __LINE__, cudaGetErrorString( err) );              \
322       exit(EXIT_FAILURE);                                                  \
323     } }
324 
325 #  define CUDA_SAFE_CALL( call)     CUDA_SAFE_CALL_NO_SYNC(call);
326 
327 #define X_MASK 1
328 #define V_MASK 2
329 #define F_MASK 4
330 #define TAG_MASK 8
331 #define TYPE_MASK 16
332 #define MASK_MASK 32
333 #define IMAGE_MASK 64
334 #define Q_MASK 128
335 #define MOLECULE_MASK 256
336 #define RMASS_MASK 512
337 #define RADIUS_MASK 1024
338 #define DENSITY_MASK 2048
339 #define OMEGA_MASK 4096
340 #define TORQUE_MASK 8192
341 
342 
343 
344 #endif // #ifdef _CUDA_COMMON_H_
345