1 /***************************************************************************
2                                lj_coul_msm.cpp
3                              -------------------
4                             Trung Dac Nguyen (ORNL)
5 
6   Class for acceleration of the lj/cut/coul/msm pair style.
7 
8  __________________________________________________________________________
9     This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10  __________________________________________________________________________
11 
12     begin                :
13     email                : brownw@ornl.gov
14  ***************************************************************************/
15 
16 #if defined(USE_OPENCL)
17 #include "lj_coul_msm_cl.h"
18 #elif defined(USE_CUDART)
19 const char *lj_coul_msm=0;
20 #else
21 #include "lj_coul_msm_cubin.h"
22 #endif
23 
24 #include "lal_lj_coul_msm.h"
25 #include <cassert>
26 namespace LAMMPS_AL {
27 #define LJCoulMSMT LJCoulMSM<numtyp, acctyp>
28 
29 extern Device<PRECISION,ACC_PRECISION> device;
30 
31 template <class numtyp, class acctyp>
LJCoulMSM()32 LJCoulMSMT::LJCoulMSM() : BaseCharge<numtyp,acctyp>(),
33                                     _allocated(false) {
34 }
35 
36 template <class numtyp, class acctyp>
~LJCoulMSM()37 LJCoulMSMT::~LJCoulMSM() {
38   clear();
39 }
40 
41 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const42 int LJCoulMSMT::bytes_per_atom(const int max_nbors) const {
43   return this->bytes_per_atom_atomic(max_nbors);
44 }
45 
46 template <class numtyp, class acctyp>
init(const int ntypes,double ** host_cutsq,double ** host_lj1,double ** host_lj2,double ** host_lj3,double ** host_lj4,double ** host_gcons,double ** host_dgcons,double ** host_offset,double * host_special_lj,const int nlocal,const int nall,const int max_nbors,const int maxspecial,const double cell_size,const double gpu_split,FILE * _screen,double ** host_cut_ljsq,const double host_cut_coulsq,double * host_special_coul,const int order,const double qqrd2e)47 int LJCoulMSMT::init(const int ntypes,
48                      double **host_cutsq, double **host_lj1,
49                      double **host_lj2, double **host_lj3,
50                      double **host_lj4, double **host_gcons,
51                      double **host_dgcons, double **host_offset,
52                      double *host_special_lj, const int nlocal,
53                      const int nall, const int max_nbors,
54                      const int maxspecial, const double cell_size,
55                      const double gpu_split, FILE *_screen,
56                      double **host_cut_ljsq, const double host_cut_coulsq,
57                      double *host_special_coul, const int order,
58                      const double qqrd2e) {
59   int success;
60   success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
61                             _screen,lj_coul_msm,"k_lj_coul_msm");
62   if (success!=0)
63     return success;
64 
65   // If atom type constants fit in shared memory use fast kernel
66   int lj_types=ntypes;
67   shared_types=false;
68   int max_shared_types=this->device->max_shared_types();
69   if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
70     lj_types=max_shared_types;
71     shared_types=true;
72   }
73   _lj_types=lj_types;
74 
75   // Allocate a host write buffer for data initialization
76   UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
77                                UCL_WRITE_ONLY);
78 
79   for (int i=0; i<lj_types*lj_types; i++)
80     host_write[i]=0.0;
81 
82   lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
83   this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
84                          host_cutsq, host_cut_ljsq);
85 
86   lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
87   this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
88                          host_offset);
89 
90   // pack gcons and dgcons
91   int nrows, ncols;
92   nrows = 7;
93   ncols = 7;
94   UCL_H_Vec<numtyp> dview_gcons(nrows*ncols,*(this->ucl_device),
95                                 UCL_WRITE_ONLY);
96 
97   for (int ix=0; ix<nrows; ix++)
98     for (int iy=0; iy<ncols; iy++)
99       dview_gcons[ix*ncols+iy]=host_gcons[ix][iy];
100 
101   gcons.alloc(nrows*ncols,*(this->ucl_device),UCL_READ_ONLY);
102   ucl_copy(gcons,dview_gcons,false);
103   gcons_tex.get_texture(*(this->pair_program),"gcons_tex");
104   gcons_tex.bind_float(gcons,1);
105 
106   nrows = 7;
107   ncols = 6;
108   UCL_H_Vec<numtyp> dview_dgcons(nrows*ncols,*(this->ucl_device),
109                                  UCL_WRITE_ONLY);
110 
111   for (int ix=0; ix<nrows; ix++)
112     for (int iy=0; iy<ncols; iy++)
113       dview_dgcons[ix*ncols+iy]=host_dgcons[ix][iy];
114 
115   dgcons.alloc(nrows*ncols,*(this->ucl_device),UCL_READ_ONLY);
116   ucl_copy(dgcons,dview_dgcons,false);
117   dgcons_tex.get_texture(*(this->pair_program),"dgcons_tex");
118   dgcons_tex.bind_float(dgcons,1);
119 
120   sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
121   for (int i=0; i<4; i++) {
122     host_write[i]=host_special_lj[i];
123     host_write[i+4]=host_special_coul[i];
124   }
125   ucl_copy(sp_lj,host_write,8,false);
126 
127   _cut_coulsq=host_cut_coulsq;
128   _qqrd2e=qqrd2e;
129   _order=order;
130 
131   _allocated=true;
132   this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+
133     gcons.row_bytes()+dgcons.row_bytes()+sp_lj.row_bytes();
134   return 0;
135 }
136 
137 template <class numtyp, class acctyp>
clear()138 void LJCoulMSMT::clear() {
139   if (!_allocated)
140     return;
141   _allocated=false;
142 
143   lj1.clear();
144   lj3.clear();
145   gcons.clear();
146   dgcons.clear();
147   sp_lj.clear();
148   this->clear_atomic();
149 }
150 
151 template <class numtyp, class acctyp>
host_memory_usage() const152 double LJCoulMSMT::host_memory_usage() const {
153   return this->host_memory_usage_atomic()+sizeof(LJCoulMSM<numtyp,acctyp>);
154 }
155 
156 // ---------------------------------------------------------------------------
157 // Calculate energies, forces, and torques
158 // ---------------------------------------------------------------------------
159 template <class numtyp, class acctyp>
loop(const int eflag,const int vflag)160 int LJCoulMSMT::loop(const int eflag, const int vflag) {
161   // Compute the block size and grid size to keep all cores busy
162   const int BX=this->block_size();
163   int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
164                                (BX/this->_threads_per_atom)));
165 
166   int ainum=this->ans->inum();
167   int nbor_pitch=this->nbor->nbor_pitch();
168   this->time_pair.start();
169   if (shared_types) {
170     this->k_pair_sel->set_size(GX,BX);
171     this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &gcons, &dgcons, &sp_lj,
172                           &this->nbor->dev_nbor, &this->_nbor_data->begin(),
173                           &this->ans->force, &this->ans->engv, &eflag,
174                           &vflag, &ainum, &nbor_pitch, &this->atom->q,
175                           &_cut_coulsq, &_qqrd2e, &_order,
176                           &this->_threads_per_atom);
177   } else {
178     this->k_pair.set_size(GX,BX);
179     this->k_pair.run(&this->atom->x, &lj1, &lj3, &gcons, &dgcons,
180                      &_lj_types, &sp_lj, &this->nbor->dev_nbor,
181                      &this->_nbor_data->begin(), &this->ans->force,
182                      &this->ans->engv, &eflag, &vflag, &ainum,
183                      &nbor_pitch, &this->atom->q, &_cut_coulsq,
184                      &_qqrd2e, &_order, &this->_threads_per_atom);
185   }
186   this->time_pair.stop();
187   return GX;
188 }
189 
190 template class LJCoulMSM<PRECISION,ACC_PRECISION>;
191 }
192