1 /***************************************************************************
2 lj_coul_msm.cpp
3 -------------------
4 Trung Dac Nguyen (ORNL)
5
6 Class for acceleration of the lj/cut/coul/msm pair style.
7
8 __________________________________________________________________________
9 This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10 __________________________________________________________________________
11
12 begin :
13 email : brownw@ornl.gov
14 ***************************************************************************/
15
16 #if defined(USE_OPENCL)
17 #include "lj_coul_msm_cl.h"
18 #elif defined(USE_CUDART)
19 const char *lj_coul_msm=0;
20 #else
21 #include "lj_coul_msm_cubin.h"
22 #endif
23
24 #include "lal_lj_coul_msm.h"
25 #include <cassert>
26 namespace LAMMPS_AL {
27 #define LJCoulMSMT LJCoulMSM<numtyp, acctyp>
28
29 extern Device<PRECISION,ACC_PRECISION> device;
30
31 template <class numtyp, class acctyp>
LJCoulMSM()32 LJCoulMSMT::LJCoulMSM() : BaseCharge<numtyp,acctyp>(),
33 _allocated(false) {
34 }
35
36 template <class numtyp, class acctyp>
~LJCoulMSM()37 LJCoulMSMT::~LJCoulMSM() {
38 clear();
39 }
40
41 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const42 int LJCoulMSMT::bytes_per_atom(const int max_nbors) const {
43 return this->bytes_per_atom_atomic(max_nbors);
44 }
45
46 template <class numtyp, class acctyp>
init(const int ntypes,double ** host_cutsq,double ** host_lj1,double ** host_lj2,double ** host_lj3,double ** host_lj4,double ** host_gcons,double ** host_dgcons,double ** host_offset,double * host_special_lj,const int nlocal,const int nall,const int max_nbors,const int maxspecial,const double cell_size,const double gpu_split,FILE * _screen,double ** host_cut_ljsq,const double host_cut_coulsq,double * host_special_coul,const int order,const double qqrd2e)47 int LJCoulMSMT::init(const int ntypes,
48 double **host_cutsq, double **host_lj1,
49 double **host_lj2, double **host_lj3,
50 double **host_lj4, double **host_gcons,
51 double **host_dgcons, double **host_offset,
52 double *host_special_lj, const int nlocal,
53 const int nall, const int max_nbors,
54 const int maxspecial, const double cell_size,
55 const double gpu_split, FILE *_screen,
56 double **host_cut_ljsq, const double host_cut_coulsq,
57 double *host_special_coul, const int order,
58 const double qqrd2e) {
59 int success;
60 success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
61 _screen,lj_coul_msm,"k_lj_coul_msm");
62 if (success!=0)
63 return success;
64
65 // If atom type constants fit in shared memory use fast kernel
66 int lj_types=ntypes;
67 shared_types=false;
68 int max_shared_types=this->device->max_shared_types();
69 if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
70 lj_types=max_shared_types;
71 shared_types=true;
72 }
73 _lj_types=lj_types;
74
75 // Allocate a host write buffer for data initialization
76 UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
77 UCL_WRITE_ONLY);
78
79 for (int i=0; i<lj_types*lj_types; i++)
80 host_write[i]=0.0;
81
82 lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
83 this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
84 host_cutsq, host_cut_ljsq);
85
86 lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
87 this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
88 host_offset);
89
90 // pack gcons and dgcons
91 int nrows, ncols;
92 nrows = 7;
93 ncols = 7;
94 UCL_H_Vec<numtyp> dview_gcons(nrows*ncols,*(this->ucl_device),
95 UCL_WRITE_ONLY);
96
97 for (int ix=0; ix<nrows; ix++)
98 for (int iy=0; iy<ncols; iy++)
99 dview_gcons[ix*ncols+iy]=host_gcons[ix][iy];
100
101 gcons.alloc(nrows*ncols,*(this->ucl_device),UCL_READ_ONLY);
102 ucl_copy(gcons,dview_gcons,false);
103 gcons_tex.get_texture(*(this->pair_program),"gcons_tex");
104 gcons_tex.bind_float(gcons,1);
105
106 nrows = 7;
107 ncols = 6;
108 UCL_H_Vec<numtyp> dview_dgcons(nrows*ncols,*(this->ucl_device),
109 UCL_WRITE_ONLY);
110
111 for (int ix=0; ix<nrows; ix++)
112 for (int iy=0; iy<ncols; iy++)
113 dview_dgcons[ix*ncols+iy]=host_dgcons[ix][iy];
114
115 dgcons.alloc(nrows*ncols,*(this->ucl_device),UCL_READ_ONLY);
116 ucl_copy(dgcons,dview_dgcons,false);
117 dgcons_tex.get_texture(*(this->pair_program),"dgcons_tex");
118 dgcons_tex.bind_float(dgcons,1);
119
120 sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
121 for (int i=0; i<4; i++) {
122 host_write[i]=host_special_lj[i];
123 host_write[i+4]=host_special_coul[i];
124 }
125 ucl_copy(sp_lj,host_write,8,false);
126
127 _cut_coulsq=host_cut_coulsq;
128 _qqrd2e=qqrd2e;
129 _order=order;
130
131 _allocated=true;
132 this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+
133 gcons.row_bytes()+dgcons.row_bytes()+sp_lj.row_bytes();
134 return 0;
135 }
136
137 template <class numtyp, class acctyp>
clear()138 void LJCoulMSMT::clear() {
139 if (!_allocated)
140 return;
141 _allocated=false;
142
143 lj1.clear();
144 lj3.clear();
145 gcons.clear();
146 dgcons.clear();
147 sp_lj.clear();
148 this->clear_atomic();
149 }
150
151 template <class numtyp, class acctyp>
host_memory_usage() const152 double LJCoulMSMT::host_memory_usage() const {
153 return this->host_memory_usage_atomic()+sizeof(LJCoulMSM<numtyp,acctyp>);
154 }
155
156 // ---------------------------------------------------------------------------
157 // Calculate energies, forces, and torques
158 // ---------------------------------------------------------------------------
159 template <class numtyp, class acctyp>
loop(const int eflag,const int vflag)160 int LJCoulMSMT::loop(const int eflag, const int vflag) {
161 // Compute the block size and grid size to keep all cores busy
162 const int BX=this->block_size();
163 int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
164 (BX/this->_threads_per_atom)));
165
166 int ainum=this->ans->inum();
167 int nbor_pitch=this->nbor->nbor_pitch();
168 this->time_pair.start();
169 if (shared_types) {
170 this->k_pair_sel->set_size(GX,BX);
171 this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &gcons, &dgcons, &sp_lj,
172 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
173 &this->ans->force, &this->ans->engv, &eflag,
174 &vflag, &ainum, &nbor_pitch, &this->atom->q,
175 &_cut_coulsq, &_qqrd2e, &_order,
176 &this->_threads_per_atom);
177 } else {
178 this->k_pair.set_size(GX,BX);
179 this->k_pair.run(&this->atom->x, &lj1, &lj3, &gcons, &dgcons,
180 &_lj_types, &sp_lj, &this->nbor->dev_nbor,
181 &this->_nbor_data->begin(), &this->ans->force,
182 &this->ans->engv, &eflag, &vflag, &ainum,
183 &nbor_pitch, &this->atom->q, &_cut_coulsq,
184 &_qqrd2e, &_order, &this->_threads_per_atom);
185 }
186 this->time_pair.stop();
187 return GX;
188 }
189
190 template class LJCoulMSM<PRECISION,ACC_PRECISION>;
191 }
192