1 /***************************************************************************
2 ufm.cpp
3 -------------------
4 Rodolfo Paula Leite (Unicamp/Brazil)
5 Maurice de Koning (Unicamp/Brazil)
6
7 Class for acceleration of the ufm pair style.
8
9 __________________________________________________________________________
10 This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
11 __________________________________________________________________________
12
13 begin :
14 email : pl.rodolfo@gmail.com
15 dekoning@ifi.unicamp.br
16 ***************************************************************************/
17
18 #if defined(USE_OPENCL)
19 #include "ufm_cl.h"
20 #elif defined(USE_CUDART)
21 const char *ufm=0;
22 #else
23 #include "ufm_cubin.h"
24 #endif
25
26 #include "lal_ufm.h"
27 #include <cassert>
28 namespace LAMMPS_AL {
29 #define UFMT UFM<numtyp, acctyp>
30
31 extern Device<PRECISION,ACC_PRECISION> device;
32
33 template <class numtyp, class acctyp>
UFM()34 UFMT::UFM() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
35 }
36
37 template <class numtyp, class acctyp>
~UFM()38 UFMT::~UFM() {
39 clear();
40 }
41
42 template <class numtyp, class acctyp>
bytes_per_atom(const int max_nbors) const43 int UFMT::bytes_per_atom(const int max_nbors) const {
44 return this->bytes_per_atom_atomic(max_nbors);
45 }
46
47 template <class numtyp, class acctyp>
init(const int ntypes,double ** host_cutsq,double ** host_uf1,double ** host_uf2,double ** host_uf3,double ** host_offset,double * host_special_lj,const int nlocal,const int nall,const int max_nbors,const int maxspecial,const double cell_size,const double gpu_split,FILE * _screen)48 int UFMT::init(const int ntypes,
49 double **host_cutsq, double **host_uf1,
50 double **host_uf2, double **host_uf3,
51 double **host_offset,
52 double *host_special_lj, const int nlocal,
53 const int nall, const int max_nbors,
54 const int maxspecial, const double cell_size,
55 const double gpu_split, FILE *_screen) {
56 int success;
57 success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
58 _screen,ufm,"k_ufm");
59 if (success!=0)
60 return success;
61
62 // If atom type constants fit in shared memory use fast kernel
63 int lj_types=ntypes;
64 shared_types=false;
65 int max_shared_types=this->device->max_shared_types();
66 if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
67 lj_types=max_shared_types;
68 shared_types=true;
69 }
70 _lj_types=lj_types;
71
72 // Allocate a host write buffer for data initialization
73 UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
74 UCL_WRITE_ONLY);
75
76 for (int i=0; i<lj_types*lj_types; i++)
77 host_write[i]=0.0;
78
79 uf1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
80 this->atom->type_pack4(ntypes,lj_types,uf1,host_write,host_uf1,host_uf2,
81 host_cutsq);
82
83 uf3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
84 this->atom->type_pack4(ntypes,lj_types,uf3,host_write,host_uf3,host_uf2,
85 host_offset);
86
87 UCL_H_Vec<double> dview;
88 sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
89 dview.view(host_special_lj,4,*(this->ucl_device));
90 ucl_copy(sp_lj,dview,false);
91
92 _allocated=true;
93 this->_max_bytes=uf1.row_bytes()+uf3.row_bytes()+sp_lj.row_bytes();
94 return 0;
95 }
96
97 template <class numtyp, class acctyp>
reinit(const int ntypes,double ** host_cutsq,double ** host_uf1,double ** host_uf2,double ** host_uf3,double ** host_offset)98 void UFMT::reinit(const int ntypes, double **host_cutsq, double **host_uf1,
99 double **host_uf2, double **host_uf3, double **host_offset) {
100 // Allocate a host write buffer for data initialization
101 UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
102 UCL_WRITE_ONLY);
103
104 for (int i=0; i<_lj_types*_lj_types; i++)
105 host_write[i]=0.0;
106
107 this->atom->type_pack4(ntypes,_lj_types,uf1,host_write,host_uf1,host_uf2,
108 host_cutsq);
109 this->atom->type_pack4(ntypes,_lj_types,uf3,host_write,host_uf3,host_uf2,
110 host_offset);
111 }
112
113 template <class numtyp, class acctyp>
clear()114 void UFMT::clear() {
115 if (!_allocated)
116 return;
117 _allocated=false;
118
119 uf1.clear();
120 uf3.clear();
121 sp_lj.clear();
122 this->clear_atomic();
123 }
124
125 template <class numtyp, class acctyp>
host_memory_usage() const126 double UFMT::host_memory_usage() const {
127 return this->host_memory_usage_atomic()+sizeof(UFM<numtyp,acctyp>);
128 }
129
130 // ---------------------------------------------------------------------------
131 // Calculate energies, forces, and torques
132 // ---------------------------------------------------------------------------
133 template <class numtyp, class acctyp>
loop(const int eflag,const int vflag)134 int UFMT::loop(const int eflag, const int vflag) {
135 // Compute the block size and grid size to keep all cores busy
136 const int BX=this->block_size();
137 int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
138 (BX/this->_threads_per_atom)));
139
140 int ainum=this->ans->inum();
141 int nbor_pitch=this->nbor->nbor_pitch();
142 this->time_pair.start();
143 if (shared_types) {
144 this->k_pair_sel->set_size(GX,BX);
145 this->k_pair_sel->run(&this->atom->x, &uf1, &uf3, &sp_lj,
146 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
147 &this->ans->force, &this->ans->engv, &eflag,
148 &vflag, &ainum, &nbor_pitch,
149 &this->_threads_per_atom);
150 } else {
151 this->k_pair.set_size(GX,BX);
152 this->k_pair.run(&this->atom->x, &uf1, &uf3, &_lj_types, &sp_lj,
153 &this->nbor->dev_nbor, &this->_nbor_data->begin(),
154 &this->ans->force, &this->ans->engv, &eflag, &vflag,
155 &ainum, &nbor_pitch, &this->_threads_per_atom);
156 }
157 this->time_pair.stop();
158 return GX;
159 }
160
161 template class UFM<PRECISION,ACC_PRECISION>;
162 }
163