1 // **************************************************************************
2 //                                   soft.cu
3 //                             -------------------
4 //                           Trung Dac Nguyen (ORNL)
5 //
6 //  Device code for acceleration of the soft pair style
7 //
8 // __________________________________________________________________________
9 //    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10 // __________________________________________________________________________
11 //
12 //    begin                :
13 //    email                : nguyentd@ornl.gov
14 // ***************************************************************************
15 
16 #if defined(NV_KERNEL) || defined(USE_HIP)
17 #include "lal_aux_fun1.h"
18 #ifndef _DOUBLE_DOUBLE
19 _texture( pos_tex,float4);
20 #else
21 _texture_2d( pos_tex,int4);
22 #endif
23 #else
24 #define pos_tex x_
25 #endif
26 
27 #define MY_PI (acctyp)3.14159265358979323846
28 
k_soft(const __global numtyp4 * restrict x_,const __global numtyp4 * restrict coeff,const int lj_types,const __global numtyp * restrict sp_lj_in,const __global int * dev_nbor,const __global int * dev_packed,__global acctyp4 * restrict ans,__global acctyp * restrict engv,const int eflag,const int vflag,const int inum,const int nbor_pitch,const int t_per_atom)29 __kernel void k_soft(const __global numtyp4 *restrict x_,
30                      const __global numtyp4 *restrict coeff,
31                      const int lj_types,
32                      const __global numtyp *restrict sp_lj_in,
33                      const __global int *dev_nbor,
34                      const __global int *dev_packed,
35                      __global acctyp4 *restrict ans,
36                      __global acctyp *restrict engv,
37                      const int eflag, const int vflag, const int inum,
38                      const int nbor_pitch, const int t_per_atom) {
39   int tid, ii, offset;
40   atom_info(t_per_atom,ii,tid,offset);
41 
42   __local numtyp sp_lj[4];
43   int n_stride;
44   local_allocate_store_pair();
45 
46   sp_lj[0]=sp_lj_in[0];
47   sp_lj[1]=sp_lj_in[1];
48   sp_lj[2]=sp_lj_in[2];
49   sp_lj[3]=sp_lj_in[3];
50 
51   acctyp4 f;
52   f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
53   acctyp energy, virial[6];
54   if (EVFLAG) {
55     energy=(acctyp)0;
56     for (int i=0; i<6; i++) virial[i]=(acctyp)0;
57   }
58 
59   if (ii<inum) {
60     int nbor, nbor_end;
61     int i, numj;
62     nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
63               n_stride,nbor_end,nbor);
64 
65     numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
66     int itype=ix.w;
67 
68     numtyp factor_lj;
69     for ( ; nbor<nbor_end; nbor+=n_stride) {
70 
71       int j=dev_packed[nbor];
72       factor_lj = sp_lj[sbmask(j)];
73       j &= NEIGHMASK;
74 
75       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
76       int jtype=jx.w;
77 
78       // Compute r12
79       numtyp delx = ix.x-jx.x;
80       numtyp dely = ix.y-jx.y;
81       numtyp delz = ix.z-jx.z;
82       numtyp rsq = delx*delx+dely*dely+delz*delz;
83 
84       int mtype=itype*lj_types+jtype;
85       if (rsq<coeff[mtype].z) {
86         numtyp force;
87         numtyp r = ucl_sqrt(rsq);
88         numtyp arg = MY_PI*r/coeff[mtype].y;
89         if (r > (numtyp)0.0) force = factor_lj * coeff[mtype].x *
90                        sin(arg) * MY_PI/coeff[mtype].y*ucl_recip(r);
91         else force = (numtyp)0.0;
92 
93         f.x+=delx*force;
94         f.y+=dely*force;
95         f.z+=delz*force;
96 
97         if (EVFLAG && eflag) {
98           numtyp e=coeff[mtype].x * ((numtyp)1.0+cos(arg));
99           energy+=factor_lj*e;
100         }
101         if (EVFLAG && vflag) {
102           virial[0] += delx*delx*force;
103           virial[1] += dely*dely*force;
104           virial[2] += delz*delz*force;
105           virial[3] += delx*dely*force;
106           virial[4] += delx*delz*force;
107           virial[5] += dely*delz*force;
108         }
109       }
110 
111     } // for nbor
112   } // if ii
113   store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
114                 ans,engv);
115 }
116 
k_soft_fast(const __global numtyp4 * restrict x_,const __global numtyp4 * restrict coeff_in,const __global numtyp * restrict sp_lj_in,const __global int * dev_nbor,const __global int * dev_packed,__global acctyp4 * restrict ans,__global acctyp * restrict engv,const int eflag,const int vflag,const int inum,const int nbor_pitch,const int t_per_atom)117 __kernel void k_soft_fast(const __global numtyp4 *restrict x_,
118                           const __global numtyp4 *restrict coeff_in,
119                           const __global numtyp *restrict sp_lj_in,
120                           const __global int *dev_nbor,
121                           const __global int *dev_packed,
122                           __global acctyp4 *restrict ans,
123                           __global acctyp *restrict engv,
124                           const int eflag, const int vflag, const int inum,
125                           const int nbor_pitch, const int t_per_atom) {
126   int tid, ii, offset;
127   atom_info(t_per_atom,ii,tid,offset);
128 
129   __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
130   __local numtyp sp_lj[4];
131   int n_stride;
132   local_allocate_store_pair();
133 
134   if (tid<4)
135     sp_lj[tid]=sp_lj_in[tid];
136   if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
137     coeff[tid]=coeff_in[tid];
138   }
139 
140   acctyp4 f;
141   f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
142   acctyp energy, virial[6];
143   if (EVFLAG) {
144     energy=(acctyp)0;
145     for (int i=0; i<6; i++) virial[i]=(acctyp)0;
146   }
147 
148   __syncthreads();
149 
150   if (ii<inum) {
151     int nbor, nbor_end;
152     int i, numj;
153     nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
154               n_stride,nbor_end,nbor);
155 
156     numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
157     int iw=ix.w;
158     int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
159 
160     numtyp factor_lj;
161     for ( ; nbor<nbor_end; nbor+=n_stride) {
162 
163       int j=dev_packed[nbor];
164       factor_lj = sp_lj[sbmask(j)];
165       j &= NEIGHMASK;
166 
167       numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
168       int mtype=itype+jx.w;
169 
170       // Compute r12
171       numtyp delx = ix.x-jx.x;
172       numtyp dely = ix.y-jx.y;
173       numtyp delz = ix.z-jx.z;
174       numtyp rsq = delx*delx+dely*dely+delz*delz;
175 
176       if (rsq<coeff[mtype].z) {
177         numtyp force;
178         numtyp r = ucl_sqrt(rsq);
179         numtyp arg = MY_PI*r/coeff[mtype].y;
180         if (r > (numtyp)0.0) force = factor_lj * coeff[mtype].x *
181                        sin(arg) * MY_PI/coeff[mtype].y*ucl_recip(r);
182         else force = (numtyp)0.0;
183 
184         f.x+=delx*force;
185         f.y+=dely*force;
186         f.z+=delz*force;
187 
188         if (EVFLAG && eflag) {
189           numtyp e=coeff[mtype].x * ((numtyp)1.0+cos(arg));
190           energy+=factor_lj*e;
191         }
192         if (EVFLAG && vflag) {
193           virial[0] += delx*delx*force;
194           virial[1] += dely*dely*force;
195           virial[2] += delz*delz*force;
196           virial[3] += delx*dely*force;
197           virial[4] += delx*delz*force;
198           virial[5] += dely*delz*force;
199         }
200       }
201 
202     } // for nbor
203   } // if ii
204   store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
205                 ans,engv);
206 }
207 
208