1 /***************************************************************************
2                                    sw.cpp
3                              -------------------
4                             W. Michael Brown (ORNL)
5 
6   Class for acceleration of the sw pair style.
7 
8  __________________________________________________________________________
9     This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
10  __________________________________________________________________________
11 
12     begin                : Tue March 26, 2013
13     email                : brownw@ornl.gov
14  ***************************************************************************/
15 
16 #if defined(USE_OPENCL)
17 #include "sw_cl.h"
18 #elif defined(USE_CUDART)
19 const char *lj=0;
20 #else
21 #include "sw_cubin.h"
22 #endif
23 
24 #include "lal_sw.h"
25 #include <cassert>
26 using namespace LAMMPS_AL;
27 #define SWT SW<numtyp, acctyp>
28 
29 extern Device<PRECISION,ACC_PRECISION> device;
30 
31 template <class numtyp, class acctyp>
SW()32 SWT::SW() : BaseThree<numtyp,acctyp>(), _allocated(false) {
33 }
34 
35 template <class numtyp, class acctyp>
~SW()36 SWT::~SW() {
37   clear();
38 }
39 
40 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const41 int SWT::bytes_per_atom(const int max_nbors) const {
42   return this->bytes_per_atom_atomic(max_nbors);
43 }
44 
45 template <class numtyp, class acctyp>
init(const int nlocal,const int nall,const int max_nbors,const double cell_size,const double gpu_split,FILE * _screen,const double epsilon,const double sigma,const double lambda,const double gamma,const double costheta,const double biga,const double bigb,const double powerp,const double powerq,const double cut,const double cutsq)46 int SWT::init(const int nlocal, const int nall, const int max_nbors,
47               const double cell_size, const double gpu_split, FILE *_screen,
48               const double epsilon, const double sigma,
49               const double lambda, const double gamma,
50               const double costheta, const double biga,
51               const double bigb, const double powerp,
52               const double powerq, const double cut, const double cutsq) {
53 
54   sw_epsilon=static_cast<numtyp>(epsilon);
55   sw_sigma=static_cast<numtyp>(sigma);
56   sw_lambda=static_cast<numtyp>(lambda);
57   sw_gamma=static_cast<numtyp>(gamma);
58   sw_costheta=static_cast<numtyp>(costheta);
59   sw_biga=static_cast<numtyp>(biga);
60   sw_bigb=static_cast<numtyp>(bigb);
61   sw_powerp=static_cast<numtyp>(powerp);
62   sw_powerq=static_cast<numtyp>(powerq);
63   sw_cut=static_cast<numtyp>(cut);
64   sw_cutsq=static_cast<numtyp>(cutsq);
65   if (sw_cutsq>=sw_cut*sw_cut)
66     sw_cutsq=sw_cut*sw_cut-1e-4;
67 
68   int success;
69   success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
70                            _screen,sw,"k_sw","k_sw_three_center",
71                            "k_sw_three_end");
72   if (success!=0)
73     return success;
74 
75   // If atom type constants fit in shared memory use fast kernel
76   shared_types=true;
77 
78   _allocated=true;
79   this->_max_bytes=0;
80   return 0;
81 }
82 
83 template <class numtyp, class acctyp>
clear()84 void SWT::clear() {
85   if (!_allocated)
86     return;
87   _allocated=false;
88 
89   this->clear_atomic();
90 }
91 
92 template <class numtyp, class acctyp>
host_memory_usage() const93 double SWT::host_memory_usage() const {
94   return this->host_memory_usage_atomic()+sizeof(SW<numtyp,acctyp>);
95 }
96 
97 #define KTHREADS this->_threads_per_atom
98 #define JTHREADS this->_threads_per_atom
99 // ---------------------------------------------------------------------------
100 // Calculate energies, forces, and torques
101 // ---------------------------------------------------------------------------
102 template <class numtyp, class acctyp>
loop(const bool _eflag,const bool _vflag,const int evatom)103 void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
104   // Compute the block size and grid size to keep all cores busy
105   int BX=this->block_pair();
106   int eflag, vflag;
107   if (_eflag)
108     eflag=1;
109   else
110     eflag=0;
111 
112   if (_vflag)
113     vflag=1;
114   else
115     vflag=0;
116 
117   int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
118                                (BX/this->_threads_per_atom)));
119 
120   int ainum=this->ans->inum();
121   int nbor_pitch=this->nbor->nbor_pitch();
122   this->time_pair.start();
123   this->k_pair.set_size(GX,BX);
124   this->k_pair.run(&this->atom->x, &this->nbor->dev_nbor,
125                    &this->_nbor_data->begin(), &this->ans->force,
126                    &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch,
127                    &this->_threads_per_atom, &sw_cut, &sw_epsilon, &sw_sigma,
128                    &sw_biga, &sw_bigb, &sw_powerp, &sw_powerq, &sw_cutsq);
129 
130   BX=this->block_size();
131   GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
132                            (BX/(KTHREADS*JTHREADS))));
133   this->k_three_center.set_size(GX,BX);
134   this->k_three_center.run(&this->atom->x, &this->nbor->dev_nbor,
135                    &this->_nbor_data->begin(), &this->ans->force,
136                    &this->ans->engv, &eflag, &vflag, &ainum,
137                    &nbor_pitch, &this->_threads_per_atom, &evatom,
138                    &sw_cut, &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma,
139                    &sw_costheta, &sw_cutsq);
140   Answer<numtyp,acctyp> *end_ans;
141   #ifdef THREE_CONCURRENT
142   end_ans=this->ans2;
143   #else
144   end_ans=this->ans;
145   #endif
146   if (evatom!=0) {
147     this->k_three_end_vatom.set_size(GX,BX);
148     this->k_three_end_vatom.run(&this->atom->x, &this->nbor->dev_nbor,
149                           &this->_nbor_data->begin(), &end_ans->force,
150                           &end_ans->engv, &eflag, &vflag, &ainum,
151                           &nbor_pitch, &this->_threads_per_atom, &sw_cut,
152                           &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma,
153                           &sw_costheta, &sw_cutsq);
154   } else {
155     this->k_three_end.set_size(GX,BX);
156     this->k_three_end.run(&this->atom->x, &this->nbor->dev_nbor,
157                           &this->_nbor_data->begin(), &end_ans->force,
158                           &end_ans->engv, &eflag, &vflag, &ainum,
159                           &nbor_pitch, &this->_threads_per_atom, &sw_cut,
160                           &sw_epsilon, &sw_sigma, &sw_lambda, &sw_gamma,
161                           &sw_costheta, &sw_cutsq);
162   }
163   this->time_pair.stop();
164 }
165 
166 template class SW<PRECISION,ACC_PRECISION>;
167 
168