1 /***************************************************************************
2 charmm_long.cpp
3 -------------------
4 W. Michael Brown (ORNL)
5
6 Class for acceleration of the charmm/coul/long pair style.
7
8 __________________________________________________________________________
9 This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10 __________________________________________________________________________
11
12 begin :
13 email : brownw@ornl.gov
14 ***************************************************************************/
15
16 #if defined(USE_OPENCL)
17 #include "charmm_long_cl.h"
18 #elif defined(USE_CUDART)
19 const char *charmm_long=0;
20 #else
21 #include "charmm_long_cubin.h"
22 #endif
23
24 #include "lal_charmm_long.h"
25 #include <cassert>
26 using namespace LAMMPS_AL;
27 #define CHARMMLongT CHARMMLong<numtyp, acctyp>
28
29 extern Device<PRECISION,ACC_PRECISION> device;
30
31 template <class numtyp, class acctyp>
CHARMMLong()32 CHARMMLongT::CHARMMLong() : BaseCharge<numtyp,acctyp>(),
33 _allocated(false) {
34 }
35
36 template <class numtyp, class acctyp>
~CHARMMLong()37 CHARMMLongT::~CHARMMLong() {
38 clear();
39 }
40
41 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const42 int CHARMMLongT::bytes_per_atom(const int max_nbors) const {
43 return this->bytes_per_atom_atomic(max_nbors);
44 }
45
46 template <class numtyp, class acctyp>
init(const int ntypes,double host_cut_bothsq,double ** host_lj1,double ** host_lj2,double ** host_lj3,double ** host_lj4,double ** host_offset,double * host_special_lj,const int nlocal,const int nall,const int max_nbors,const int maxspecial,const double cell_size,const double gpu_split,FILE * _screen,double host_cut_ljsq,const double host_cut_coulsq,double * host_special_coul,const double qqrd2e,const double g_ewald,const double cut_lj_innersq,const double denom_lj,double ** epsilon,double ** sigma,const bool mix_arithmetic)47 int CHARMMLongT::init(const int ntypes,
48 double host_cut_bothsq, double **host_lj1,
49 double **host_lj2, double **host_lj3,
50 double **host_lj4, double **host_offset,
51 double *host_special_lj, const int nlocal,
52 const int nall, const int max_nbors,
53 const int maxspecial, const double cell_size,
54 const double gpu_split, FILE *_screen,
55 double host_cut_ljsq, const double host_cut_coulsq,
56 double *host_special_coul, const double qqrd2e,
57 const double g_ewald, const double cut_lj_innersq,
58 const double denom_lj, double **epsilon,
59 double **sigma, const bool mix_arithmetic) {
60 int success;
61 success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
62 _screen,charmm_long,"k_charmm_long");
63 if (success!=0)
64 return success;
65
66 // If atom type constants fit in shared memory use fast kernel
67 int lj_types=ntypes;
68 shared_types=false;
69 if (this->_block_bio_size>=64 && mix_arithmetic)
70 shared_types=true;
71 _lj_types=lj_types;
72
73 // Allocate a host write buffer for data initialization
74 int h_size=lj_types*lj_types;
75 int max_bio_shared_types=this->device->max_bio_shared_types();
76 if (h_size<max_bio_shared_types)
77 h_size=max_bio_shared_types;
78 UCL_H_Vec<numtyp> host_write(h_size*32,*(this->ucl_device),
79 UCL_WRITE_ONLY);
80 for (int i=0; i<h_size*32; i++)
81 host_write[i]=0.0;
82
83 lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
84 this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
85 host_lj3,host_lj4);
86
87 ljd.alloc(max_bio_shared_types,*(this->ucl_device),UCL_READ_ONLY);
88 this->atom->self_pack2(ntypes,ljd,host_write,epsilon,sigma);
89
90 sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
91 for (int i=0; i<4; i++) {
92 host_write[i]=host_special_lj[i];
93 host_write[i+4]=host_special_coul[i];
94 }
95 ucl_copy(sp_lj,host_write,8,false);
96
97 _cut_bothsq = host_cut_bothsq;
98 _cut_coulsq = host_cut_coulsq;
99 _cut_ljsq = host_cut_ljsq;
100 _cut_lj_innersq = cut_lj_innersq;
101 _qqrd2e=qqrd2e;
102 _g_ewald=g_ewald;
103 _denom_lj=denom_lj;
104
105 _allocated=true;
106 this->_max_bytes=lj1.row_bytes()+ljd.row_bytes()+sp_lj.row_bytes();
107 return 0;
108 }
109
110 template <class numtyp, class acctyp>
clear()111 void CHARMMLongT::clear() {
112 if (!_allocated)
113 return;
114 _allocated=false;
115
116 lj1.clear();
117 ljd.clear();
118 sp_lj.clear();
119 this->clear_atomic();
120 }
121
122 template <class numtyp, class acctyp>
host_memory_usage() const123 double CHARMMLongT::host_memory_usage() const {
124 return this->host_memory_usage_atomic()+sizeof(CHARMMLong<numtyp,acctyp>);
125 }
126
127 // ---------------------------------------------------------------------------
128 // Calculate energies, forces, and torques
129 // ---------------------------------------------------------------------------
130 template <class numtyp, class acctyp>
loop(const bool _eflag,const bool _vflag)131 void CHARMMLongT::loop(const bool _eflag, const bool _vflag) {
132 // Compute the block size and grid size to keep all cores busy
133 const int BX=this->_block_bio_size;
134 int eflag, vflag;
135 if (_eflag)
136 eflag=1;
137 else
138 eflag=0;
139
140 if (_vflag)
141 vflag=1;
142 else
143 vflag=0;
144
145 int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
146 (BX/this->_threads_per_atom)));
147
148 int ainum=this->ans->inum();
149 int nbor_pitch=this->nbor->nbor_pitch();
150 this->time_pair.start();
151 if (shared_types) {
152 this->k_pair_fast.set_size(GX,BX);
153 this->k_pair_fast.run(&this->atom->x, &ljd, &sp_lj,
154 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
155 &this->ans->force, &this->ans->engv, &eflag,
156 &vflag, &ainum, &nbor_pitch, &this->atom->q,
157 &_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj,
158 &_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq,
159 &this->_threads_per_atom);
160 } else {
161 this->k_pair.set_size(GX,BX);
162 this->k_pair.run(&this->atom->x, &lj1, &_lj_types, &sp_lj,
163 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
164 &this->ans->force, &this->ans->engv, &eflag, &vflag,
165 &ainum, &nbor_pitch, &this->atom->q,
166 &_cut_coulsq, &_qqrd2e, &_g_ewald, &_denom_lj,
167 &_cut_bothsq, &_cut_ljsq, &_cut_lj_innersq,
168 &this->_threads_per_atom);
169 }
170 this->time_pair.stop();
171 }
172
173 template class CHARMMLong<PRECISION,ACC_PRECISION>;
174