1 /***************************************************************************
2 sw.cpp
3 -------------------
4 W. Michael Brown (ORNL)
5
6 Class for acceleration of the sw pair style.
7
8 __________________________________________________________________________
9 This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10 __________________________________________________________________________
11
12 begin : Tue March 26, 2013
13 email : brownw@ornl.gov
14 ***************************************************************************/
15
16 #if defined(USE_OPENCL)
17 #include "sw_cl.h"
18 #elif defined(USE_CUDART)
19 const char *lj=0;
20 #else
21 #include "sw_cubin.h"
22 #endif
23
24 #include "lal_sw.h"
25 #include <cassert>
26 using namespace LAMMPS_AL;
27 #define SWT SW<numtyp, acctyp>
28
29 extern Device<PRECISION,ACC_PRECISION> device;
30
31 template <class numtyp, class acctyp>
SW()32 SWT::SW() : BaseThree<numtyp,acctyp>(), _allocated(false) {
33 }
34
35 template <class numtyp, class acctyp>
~SW()36 SWT::~SW() {
37 clear();
38 }
39
40 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const41 int SWT::bytes_per_atom(const int max_nbors) const {
42 return this->bytes_per_atom_atomic(max_nbors);
43 }
44
45 template <class numtyp, class acctyp>
init(const int nlocal,const int nall,const int max_nbors,const double cell_size,const double gpu_split,FILE * _screen,const double epsilon,const double sigma,const double lambda,const double gamma,const double costheta,const double biga,const double bigb,const double powerp,const double powerq,const double cut,const double cutsq)46 int SWT::init(const int nlocal, const int nall, const int max_nbors,
47 const double cell_size, const double gpu_split, FILE *_screen,
48 const double epsilon, const double sigma,
49 const double lambda, const double gamma,
50 const double costheta, const double biga,
51 const double bigb, const double powerp,
52 const double powerq, const double cut, const double cutsq) {
53
54 sw_epsilon=static_cast<numtyp>(epsilon);
55 sw_sigma=static_cast<numtyp>(sigma);
56 sw_lambda=static_cast<numtyp>(lambda);
57 sw_gamma=static_cast<numtyp>(gamma);
58 sw_costheta=static_cast<numtyp>(costheta);
59 sw_biga=static_cast<numtyp>(biga);
60 sw_bigb=static_cast<numtyp>(bigb);
61 sw_powerp=static_cast<numtyp>(powerp);
62 sw_powerq=static_cast<numtyp>(powerq);
63 sw_cut=static_cast<numtyp>(cut);
64 sw_cutsq=static_cast<numtyp>(cutsq);
65 if (sw_cutsq>=sw_cut*sw_cut)
66 sw_cutsq=sw_cut*sw_cut-1e-4;
67
68 int success;
69 success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
70 _screen,sw,"k_sw","k_sw_three_center",
71 "k_sw_three_end");
72 if (success!=0)
73 return success;
74
75 // If atom type constants fit in shared memory use fast kernel
76 shared_types=true;
77
78 _allocated=true;
79 this->_max_bytes=0;
80 return 0;
81 }
82
83 template <class numtyp, class acctyp>
clear()84 void SWT::clear() {
85 if (!_allocated)
86 return;
87 _allocated=false;
88
89 this->clear_atomic();
90 }
91
92 template <class numtyp, class acctyp>
host_memory_usage() const93 double SWT::host_memory_usage() const {
94 return this->host_memory_usage_atomic()+sizeof(SW<numtyp,acctyp>);
95 }
96
97 #define KTHREADS this->_threads_per_atom
98 #define JTHREADS this->_threads_per_atom
99 // ---------------------------------------------------------------------------
100 // Calculate energies, forces, and torques
101 // ---------------------------------------------------------------------------
102 template <class numtyp, class acctyp>
loop(const bool _eflag,const bool _vflag,const int evatom)103 void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
104 // Compute the block size and grid size to keep all cores busy
105 int BX=this->block_pair();
106 int eflag, vflag;
107 if (_eflag)
108 eflag=1;
109 else
110 eflag=0;
111
112 if (_vflag)
113 vflag=1;
114 else
115 vflag=0;
116
117 int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
118 (BX/this->_threads_per_atom)));
119
120 int ainum=this->ans->inum();
121 int nbor_pitch=this->nbor->nbor_pitch();
122 this->time_pair.start();
123 this->k_pair.set_size(GX,BX);
124 this->k_pair.run(&this->atom->x, &this->nbor->dev_nbor,
125 &this->_nbor_data->begin(), &this->ans->force,
126 &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch,
127 &this->_threads_per_atom, &sw_cut, &sw_epsilon, &sw_sigma,
128 &sw_biga, &sw_bigb, &sw_powerp, &sw_powerq, &sw_cutsq);
129
130 BX=this->block_size();
131 GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
132 (BX/(KTHREADS*JTHREADS))));
133 this->k_three_center.set_size(GX,BX);
134 this->k_three_center.run(&this->atom->x, &this->nbor->dev_nbor,
135 &this->_nbor_data->begin(), &this->ans->force,
136 &this->ans->engv, &eflag, &vflag, &ainum,
137 &nbor_pitch, &this->_threads_per_atom, &evatom,
138 &sw_cut, &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma,
139 &sw_costheta, &sw_cutsq);
140 Answer<numtyp,acctyp> *end_ans;
141 #ifdef THREE_CONCURRENT
142 end_ans=this->ans2;
143 #else
144 end_ans=this->ans;
145 #endif
146 if (evatom!=0) {
147 this->k_three_end_vatom.set_size(GX,BX);
148 this->k_three_end_vatom.run(&this->atom->x, &this->nbor->dev_nbor,
149 &this->_nbor_data->begin(), &end_ans->force,
150 &end_ans->engv, &eflag, &vflag, &ainum,
151 &nbor_pitch, &this->_threads_per_atom, &sw_cut,
152 &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma,
153 &sw_costheta, &sw_cutsq);
154 } else {
155 this->k_three_end.set_size(GX,BX);
156 this->k_three_end.run(&this->atom->x, &this->nbor->dev_nbor,
157 &this->_nbor_data->begin(), &end_ans->force,
158 &end_ans->engv, &eflag, &vflag, &ainum,
159 &nbor_pitch, &this->_threads_per_atom, &sw_cut,
160 &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma,
161 &sw_costheta, &sw_cutsq);
162 }
163 this->time_pair.stop();
164 }
165
166 template class SW<PRECISION,ACC_PRECISION>;
167
168