1 /***************************************************************************
2 lj_dsf.cpp
3 -------------------
4 W. Michael Brown (ORNL)
5
6 Class for acceleration of the lj/cut/coul/dsf pair style.
7
8 __________________________________________________________________________
9 This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10 __________________________________________________________________________
11
12 begin : 7/12/2012
13 email : brownw@ornl.gov
14 ***************************************************************************/
15
16 #if defined(USE_OPENCL)
17 #include "lj_dsf_cl.h"
18 #elif defined(USE_CUDART)
19 const char *lj_dsf=0;
20 #else
21 #include "lj_dsf_cubin.h"
22 #endif
23
24 #include "lal_lj_dsf.h"
25 #include <cassert>
26 using namespace LAMMPS_AL;
27 #define LJDSFT LJDSF<numtyp, acctyp>
28
29 extern Device<PRECISION,ACC_PRECISION> device;
30
31 template <class numtyp, class acctyp>
LJDSF()32 LJDSFT::LJDSF() : BaseCharge<numtyp,acctyp>(),
33 _allocated(false) {
34 }
35
36 template <class numtyp, class acctyp>
~LJDSF()37 LJDSFT::~LJDSF() {
38 clear();
39 }
40
41 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const42 int LJDSFT::bytes_per_atom(const int max_nbors) const {
43 return this->bytes_per_atom_atomic(max_nbors);
44 }
45
46 template <class numtyp, class acctyp>
init(const int ntypes,double ** host_cutsq,double ** host_lj1,double ** host_lj2,double ** host_lj3,double ** host_lj4,double ** host_offset,double * host_special_lj,const int nlocal,const int nall,const int max_nbors,const int maxspecial,const double cell_size,const double gpu_split,FILE * _screen,double ** host_cut_ljsq,const double host_cut_coulsq,double * host_special_coul,const double qqrd2e,const double e_shift,const double f_shift,const double alpha)47 int LJDSFT::init(const int ntypes, double **host_cutsq, double **host_lj1,
48 double **host_lj2, double **host_lj3, double **host_lj4,
49 double **host_offset, double *host_special_lj,
50 const int nlocal, const int nall, const int max_nbors,
51 const int maxspecial, const double cell_size,
52 const double gpu_split, FILE *_screen,
53 double **host_cut_ljsq, const double host_cut_coulsq,
54 double *host_special_coul, const double qqrd2e,
55 const double e_shift, const double f_shift,
56 const double alpha) {
57 int success;
58 success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
59 _screen,lj_dsf,"k_lj_dsf");
60 if (success!=0)
61 return success;
62
63 _cut_coulsq=host_cut_coulsq;
64 _e_shift=e_shift;
65 _f_shift=f_shift;
66 _alpha=alpha;
67
68 // If atom type constants fit in shared memory use fast kernel
69 int lj_types=ntypes;
70 shared_types=false;
71 int max_shared_types=this->device->max_shared_types();
72 if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
73 lj_types=max_shared_types;
74 shared_types=true;
75 }
76 _lj_types=lj_types;
77
78 // Allocate a host write buffer for data initialization
79 UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
80 UCL_WRITE_ONLY);
81
82 for (int i=0; i<lj_types*lj_types; i++)
83 host_write[i]=0.0;
84
85 lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
86 this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
87 host_cut_ljsq, host_cutsq);
88
89 lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
90 this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
91 host_offset);
92
93 sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
94 for (int i=0; i<4; i++) {
95 host_write[i]=host_special_lj[i];
96 host_write[i+4]=host_special_coul[i];
97 }
98 ucl_copy(sp_lj,host_write,8,false);
99
100 _qqrd2e=qqrd2e;
101
102 _allocated=true;
103 this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes();
104 return 0;
105 }
106
107 template <class numtyp, class acctyp>
clear()108 void LJDSFT::clear() {
109 if (!_allocated)
110 return;
111 _allocated=false;
112
113 lj1.clear();
114 lj3.clear();
115 sp_lj.clear();
116 this->clear_atomic();
117 }
118
119 template <class numtyp, class acctyp>
host_memory_usage() const120 double LJDSFT::host_memory_usage() const {
121 return this->host_memory_usage_atomic()+sizeof(LJDSF<numtyp,acctyp>);
122 }
123
124 // ---------------------------------------------------------------------------
125 // Calculate energies, forces, and torques
126 // ---------------------------------------------------------------------------
127 template <class numtyp, class acctyp>
loop(const bool _eflag,const bool _vflag)128 void LJDSFT::loop(const bool _eflag, const bool _vflag) {
129 // Compute the block size and grid size to keep all cores busy
130 const int BX=this->block_size();
131 int eflag, vflag;
132 if (_eflag)
133 eflag=1;
134 else
135 eflag=0;
136
137 if (_vflag)
138 vflag=1;
139 else
140 vflag=0;
141
142 int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
143 (BX/this->_threads_per_atom)));
144
145 int ainum=this->ans->inum();
146 int nbor_pitch=this->nbor->nbor_pitch();
147 this->time_pair.start();
148 if (shared_types) {
149 this->k_pair_fast.set_size(GX,BX);
150 this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &sp_lj,
151 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
152 &this->ans->force, &this->ans->engv, &eflag,
153 &vflag, &ainum, &nbor_pitch, &this->atom->q,
154 &_cut_coulsq, &_qqrd2e, &_e_shift, &_f_shift, &_alpha,
155 &this->_threads_per_atom);
156 } else {
157 this->k_pair.set_size(GX,BX);
158 this->k_pair.run(&this->atom->x, &lj1, &lj3, &_lj_types, &sp_lj,
159 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
160 &this->ans->force, &this->ans->engv,
161 &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->q,
162 &_cut_coulsq, &_qqrd2e, &_e_shift, &_f_shift, &_alpha,
163 &this->_threads_per_atom);
164 }
165 this->time_pair.stop();
166 }
167
168 template class LJDSF<PRECISION,ACC_PRECISION>;
169