1 // **************************************************************************
2 // soft.cu
3 // -------------------
4 // Trung Dac Nguyen (ORNL)
5 //
6 // Device code for acceleration of the soft pair style
7 //
8 // __________________________________________________________________________
9 // This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10 // __________________________________________________________________________
11 //
12 // begin :
13 // email : nguyentd@ornl.gov
14 // ***************************************************************************
15
16 #if defined(NV_KERNEL) || defined(USE_HIP)
17 #include "lal_aux_fun1.h"
18 #ifndef _DOUBLE_DOUBLE
19 _texture( pos_tex,float4);
20 #else
21 _texture_2d( pos_tex,int4);
22 #endif
23 #else
24 #define pos_tex x_
25 #endif
26
27 #define MY_PI (acctyp)3.14159265358979323846
28
k_soft(const __global numtyp4 * restrict x_,const __global numtyp4 * restrict coeff,const int lj_types,const __global numtyp * restrict sp_lj_in,const __global int * dev_nbor,const __global int * dev_packed,__global acctyp4 * restrict ans,__global acctyp * restrict engv,const int eflag,const int vflag,const int inum,const int nbor_pitch,const int t_per_atom)29 __kernel void k_soft(const __global numtyp4 *restrict x_,
30 const __global numtyp4 *restrict coeff,
31 const int lj_types,
32 const __global numtyp *restrict sp_lj_in,
33 const __global int *dev_nbor,
34 const __global int *dev_packed,
35 __global acctyp4 *restrict ans,
36 __global acctyp *restrict engv,
37 const int eflag, const int vflag, const int inum,
38 const int nbor_pitch, const int t_per_atom) {
39 int tid, ii, offset;
40 atom_info(t_per_atom,ii,tid,offset);
41
42 __local numtyp sp_lj[4];
43 int n_stride;
44 local_allocate_store_pair();
45
46 sp_lj[0]=sp_lj_in[0];
47 sp_lj[1]=sp_lj_in[1];
48 sp_lj[2]=sp_lj_in[2];
49 sp_lj[3]=sp_lj_in[3];
50
51 acctyp4 f;
52 f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
53 acctyp energy, virial[6];
54 if (EVFLAG) {
55 energy=(acctyp)0;
56 for (int i=0; i<6; i++) virial[i]=(acctyp)0;
57 }
58
59 if (ii<inum) {
60 int nbor, nbor_end;
61 int i, numj;
62 nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
63 n_stride,nbor_end,nbor);
64
65 numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
66 int itype=ix.w;
67
68 numtyp factor_lj;
69 for ( ; nbor<nbor_end; nbor+=n_stride) {
70
71 int j=dev_packed[nbor];
72 factor_lj = sp_lj[sbmask(j)];
73 j &= NEIGHMASK;
74
75 numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
76 int jtype=jx.w;
77
78 // Compute r12
79 numtyp delx = ix.x-jx.x;
80 numtyp dely = ix.y-jx.y;
81 numtyp delz = ix.z-jx.z;
82 numtyp rsq = delx*delx+dely*dely+delz*delz;
83
84 int mtype=itype*lj_types+jtype;
85 if (rsq<coeff[mtype].z) {
86 numtyp force;
87 numtyp r = ucl_sqrt(rsq);
88 numtyp arg = MY_PI*r/coeff[mtype].y;
89 if (r > (numtyp)0.0) force = factor_lj * coeff[mtype].x *
90 sin(arg) * MY_PI/coeff[mtype].y*ucl_recip(r);
91 else force = (numtyp)0.0;
92
93 f.x+=delx*force;
94 f.y+=dely*force;
95 f.z+=delz*force;
96
97 if (EVFLAG && eflag) {
98 numtyp e=coeff[mtype].x * ((numtyp)1.0+cos(arg));
99 energy+=factor_lj*e;
100 }
101 if (EVFLAG && vflag) {
102 virial[0] += delx*delx*force;
103 virial[1] += dely*dely*force;
104 virial[2] += delz*delz*force;
105 virial[3] += delx*dely*force;
106 virial[4] += delx*delz*force;
107 virial[5] += dely*delz*force;
108 }
109 }
110
111 } // for nbor
112 } // if ii
113 store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
114 ans,engv);
115 }
116
k_soft_fast(const __global numtyp4 * restrict x_,const __global numtyp4 * restrict coeff_in,const __global numtyp * restrict sp_lj_in,const __global int * dev_nbor,const __global int * dev_packed,__global acctyp4 * restrict ans,__global acctyp * restrict engv,const int eflag,const int vflag,const int inum,const int nbor_pitch,const int t_per_atom)117 __kernel void k_soft_fast(const __global numtyp4 *restrict x_,
118 const __global numtyp4 *restrict coeff_in,
119 const __global numtyp *restrict sp_lj_in,
120 const __global int *dev_nbor,
121 const __global int *dev_packed,
122 __global acctyp4 *restrict ans,
123 __global acctyp *restrict engv,
124 const int eflag, const int vflag, const int inum,
125 const int nbor_pitch, const int t_per_atom) {
126 int tid, ii, offset;
127 atom_info(t_per_atom,ii,tid,offset);
128
129 __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
130 __local numtyp sp_lj[4];
131 int n_stride;
132 local_allocate_store_pair();
133
134 if (tid<4)
135 sp_lj[tid]=sp_lj_in[tid];
136 if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
137 coeff[tid]=coeff_in[tid];
138 }
139
140 acctyp4 f;
141 f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
142 acctyp energy, virial[6];
143 if (EVFLAG) {
144 energy=(acctyp)0;
145 for (int i=0; i<6; i++) virial[i]=(acctyp)0;
146 }
147
148 __syncthreads();
149
150 if (ii<inum) {
151 int nbor, nbor_end;
152 int i, numj;
153 nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
154 n_stride,nbor_end,nbor);
155
156 numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
157 int iw=ix.w;
158 int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
159
160 numtyp factor_lj;
161 for ( ; nbor<nbor_end; nbor+=n_stride) {
162
163 int j=dev_packed[nbor];
164 factor_lj = sp_lj[sbmask(j)];
165 j &= NEIGHMASK;
166
167 numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
168 int mtype=itype+jx.w;
169
170 // Compute r12
171 numtyp delx = ix.x-jx.x;
172 numtyp dely = ix.y-jx.y;
173 numtyp delz = ix.z-jx.z;
174 numtyp rsq = delx*delx+dely*dely+delz*delz;
175
176 if (rsq<coeff[mtype].z) {
177 numtyp force;
178 numtyp r = ucl_sqrt(rsq);
179 numtyp arg = MY_PI*r/coeff[mtype].y;
180 if (r > (numtyp)0.0) force = factor_lj * coeff[mtype].x *
181 sin(arg) * MY_PI/coeff[mtype].y*ucl_recip(r);
182 else force = (numtyp)0.0;
183
184 f.x+=delx*force;
185 f.y+=dely*force;
186 f.z+=delz*force;
187
188 if (EVFLAG && eflag) {
189 numtyp e=coeff[mtype].x * ((numtyp)1.0+cos(arg));
190 energy+=factor_lj*e;
191 }
192 if (EVFLAG && vflag) {
193 virial[0] += delx*delx*force;
194 virial[1] += dely*dely*force;
195 virial[2] += delz*delz*force;
196 virial[3] += delx*dely*force;
197 virial[4] += delx*delz*force;
198 virial[5] += dely*delz*force;
199 }
200 }
201
202 } // for nbor
203 } // if ii
204 store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
205 ans,engv);
206 }
207
208