1 // clang-format off 2 /* -*- c++ -*- ------------------------------------------------------------- 3 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator 4 https://www.lammps.org/, Sandia National Laboratories 5 Steve Plimpton, sjplimp@sandia.gov 6 7 Copyright (2003) Sandia Corporation. Under the terms of Contract 8 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains 9 certain rights in this software. This software is distributed under 10 the GNU General Public License. 11 12 See the README file in the top-level LAMMPS directory. 13 ------------------------------------------------------------------------- */ 14 15 /* ---------------------------------------------------------------------- 16 Contributing authors: Christian Trott (SNL), Stan Moore (SNL) 17 ------------------------------------------------------------------------- */ 18 19 #ifndef LMP_SNA_KOKKOS_H 20 #define LMP_SNA_KOKKOS_H 21 22 #include <complex> 23 #include <ctime> 24 #include <Kokkos_Core.hpp> 25 #include "kokkos_type.h" 26 27 #ifdef __SYCL_DEVICE_ONLY__ 28 #include <CL/sycl.hpp> 29 #endif 30 31 namespace LAMMPS_NS { 32 33 template<typename real_type_, int vector_length_> 34 struct WignerWrapper { 35 using real_type = real_type_; 36 using complex = SNAComplex<real_type>; 37 static constexpr int vector_length = vector_length_; 38 39 const int offset; // my offset into the vector (0, ..., vector_length - 1) 40 real_type* buffer; // buffer of real numbers 41 42 KOKKOS_INLINE_FUNCTION WignerWrapperWignerWrapper43 WignerWrapper(complex* buffer_, const int offset_) 44 : offset(offset_), buffer(reinterpret_cast<real_type*>(buffer_)) 45 { ; } 46 47 KOKKOS_INLINE_FUNCTION getWignerWrapper48 complex get(const int& ma) const { 49 return complex(buffer[offset + 2 * vector_length * ma], buffer[offset + vector_length + 2 * vector_length * ma]); 50 } 51 52 KOKKOS_INLINE_FUNCTION setWignerWrapper53 void set(const int& ma, const complex& store) const { 54 buffer[offset + 2 * vector_length * ma] = store.re; 55 buffer[offset + vector_length + 2 * vector_length * ma] = store.im; 56 } 57 }; 58 59 struct alignas(8) FullHalfMapper { 60 int idxu_half; 61 int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj 62 }; 63 64 template<class DeviceType, typename real_type_, int vector_length_> 65 class SNAKokkos { 66 67 public: 68 using real_type = real_type_; 69 using complex = SNAComplex<real_type>; 70 static constexpr int vector_length = vector_length_; 71 72 typedef Kokkos::View<int*, DeviceType> t_sna_1i; 73 typedef Kokkos::View<real_type*, DeviceType> t_sna_1d; 74 typedef Kokkos::View<real_type*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic; 75 typedef Kokkos::View<int**, DeviceType> t_sna_2i; 76 typedef Kokkos::View<real_type**, DeviceType> t_sna_2d; 77 typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll; 78 typedef Kokkos::View<real_type***, DeviceType> t_sna_3d; 79 typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll; 80 typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d; 81 typedef Kokkos::View<real_type****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll; 82 typedef Kokkos::View<real_type**[3], DeviceType> t_sna_3d3; 83 typedef Kokkos::View<real_type*****, DeviceType> t_sna_5d; 84 85 typedef Kokkos::View<complex*, DeviceType> t_sna_1c; 86 typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic; 87 typedef Kokkos::View<complex**, DeviceType> t_sna_2c; 88 typedef Kokkos::View<complex**, Kokkos::LayoutLeft, DeviceType> t_sna_2c_ll; 89 typedef Kokkos::View<complex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr; 90 typedef Kokkos::View<complex***, DeviceType> t_sna_3c; 91 typedef Kokkos::View<complex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll; 92 typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c; 93 typedef Kokkos::View<complex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll; 94 typedef Kokkos::View<complex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll; 95 typedef Kokkos::View<complex**[3], DeviceType> t_sna_3c3; 96 typedef Kokkos::View<complex*****, DeviceType> t_sna_5c; 97 98 inline SNAKokkos()99 SNAKokkos() {}; 100 KOKKOS_INLINE_FUNCTION 101 SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); 102 103 inline 104 SNAKokkos(real_type, int, real_type, int, int, int, int, int, int); 105 106 KOKKOS_INLINE_FUNCTION 107 ~SNAKokkos(); 108 109 inline 110 void build_indexlist(); // SNAKokkos() 111 112 inline 113 void init(); // 114 115 double memory_usage(); 116 117 int ncoeff; 118 int host_flag; 119 120 // functions for bispectrum coefficients, GPU only 121 KOKKOS_INLINE_FUNCTION 122 void compute_cayley_klein(const int&, const int&, const int&); 123 KOKKOS_INLINE_FUNCTION 124 void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP 125 126 // version of the code with parallelism over j_bend 127 KOKKOS_INLINE_FUNCTION 128 void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP 129 // version of the code without parallelism over j_bend 130 KOKKOS_INLINE_FUNCTION 131 void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); // ForceSNAP 132 133 KOKKOS_INLINE_FUNCTION 134 void compute_zi(const int&, const int&, const int&); // ForceSNAP 135 KOKKOS_INLINE_FUNCTION 136 void compute_yi(int,int,int, 137 const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP 138 KOKKOS_INLINE_FUNCTION 139 void compute_yi_with_zlist(int,int,int, 140 const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP 141 KOKKOS_INLINE_FUNCTION 142 void compute_bi(const int&, const int&, const int&); // ForceSNAP 143 144 // functions for derivatives, GPU only 145 // version of the code with parallelism over j_bend 146 template<int dir> 147 KOKKOS_INLINE_FUNCTION 148 void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP 149 // version of the code without parallelism over j_bend 150 template<int dir> 151 KOKKOS_INLINE_FUNCTION 152 void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); //ForceSNAP 153 154 // core "evaluation" functions that get plugged into "compute" functions 155 // plugged into compute_ui_small, compute_ui_large 156 KOKKOS_FORCEINLINE_FUNCTION 157 void evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, const int&, 158 const int&, const int&, const int&); 159 // plugged into compute_zi, compute_yi 160 KOKKOS_FORCEINLINE_FUNCTION 161 complex evaluate_zi(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, 162 const int&, const int&, const int&, const int&, const real_type*); 163 // plugged into compute_yi, compute_yi_with_zlist 164 KOKKOS_FORCEINLINE_FUNCTION 165 real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, 166 const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &); 167 // plugged into compute_fused_deidrj_small, compute_fused_deidrj_large 168 KOKKOS_FORCEINLINE_FUNCTION 169 real_type evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, 170 const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, 171 const int&, const int&, const int&, const int&); 172 173 // functions for bispectrum coefficients, CPU only 174 KOKKOS_INLINE_FUNCTION 175 void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP 176 KOKKOS_INLINE_FUNCTION 177 void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP 178 KOKKOS_INLINE_FUNCTION 179 void compute_zi_cpu(const int&); // ForceSNAP 180 KOKKOS_INLINE_FUNCTION 181 void compute_yi_cpu(int, 182 const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP 183 KOKKOS_INLINE_FUNCTION 184 void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP 185 186 // functions for derivatives, CPU only 187 KOKKOS_INLINE_FUNCTION 188 void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP 189 KOKKOS_INLINE_FUNCTION 190 void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP 191 192 KOKKOS_INLINE_FUNCTION 193 real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray 194 195 KOKKOS_INLINE_FUNCTION 196 real_type compute_dsfac(real_type, real_type); // compute_duarray 197 198 KOKKOS_INLINE_FUNCTION 199 void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein 200 201 #ifdef TIMING_INFO 202 double* timers; 203 timespec starttime, endtime; 204 int print; 205 int counter; 206 #endif 207 208 //per sna class instance for OMP use 209 210 // Per InFlight Particle 211 t_sna_3d rij; 212 t_sna_2i inside; 213 t_sna_2d wj; 214 t_sna_2d rcutij; 215 t_sna_2i element; 216 t_sna_3d dedr; 217 int natom, nmax; 218 219 void grow_rij(int, int); 220 221 int twojmax, diagonalstyle; 222 223 t_sna_3d blist; 224 t_sna_3c_ll ulisttot; 225 t_sna_3c_ll ulisttot_full; // un-folded ulisttot, cpu only 226 t_sna_3c_ll zlist; 227 228 t_sna_3c_ll ulist; 229 t_sna_3c_ll ylist; 230 231 // derivatives of data 232 t_sna_4c3_ll dulist; 233 234 // Modified structures for GPU backend 235 t_sna_3c_ll a_pack; // Cayley-Klein `a` 236 t_sna_3c_ll b_pack; // `b` 237 t_sna_4c_ll da_pack; // `da` 238 t_sna_4c_ll db_pack; // `db` 239 t_sna_4d_ll sfac_pack; // sfac, dsfac_{x,y,z} 240 241 t_sna_4d_ll ulisttot_re_pack; // split real, 242 t_sna_4d_ll ulisttot_im_pack; // imag, AoSoA, flattened 243 t_sna_4c_ll ulisttot_pack; // AoSoA layout 244 t_sna_4c_ll zlist_pack; // AoSoA layout 245 t_sna_4d_ll blist_pack; 246 t_sna_4d_ll ylist_pack_re; // split real, 247 t_sna_4d_ll ylist_pack_im; // imag AoSoA layout 248 249 int idxcg_max, idxu_max, idxu_half_max, idxu_cache_max, idxz_max, idxb_max; 250 251 // Chem snap counts 252 int nelements; 253 int ndoubles; 254 int ntriples; 255 256 private: 257 real_type rmin0, rfac0; 258 259 //use indexlist instead of loops, constructor generates these 260 // Same across all SNAKokkos 261 Kokkos::View<int*[10], DeviceType> idxz; 262 Kokkos::View<int*[3], DeviceType> idxb; 263 Kokkos::View<int***, DeviceType> idxcg_block; 264 265 public: 266 Kokkos::View<int*, DeviceType> idxu_block; 267 Kokkos::View<int*, DeviceType> idxu_half_block; 268 Kokkos::View<int*, DeviceType> idxu_cache_block; 269 Kokkos::View<FullHalfMapper*, DeviceType> idxu_full_half; 270 271 private: 272 Kokkos::View<int***, DeviceType> idxz_block; 273 Kokkos::View<int***, DeviceType> idxb_block; 274 275 // data for bispectrum coefficients 276 277 // Same across all SNAKokkos 278 t_sna_1d cglist; 279 t_sna_2d rootpqarray; 280 281 static const int nmaxfactorial = 167; 282 static const double nfac_table[]; 283 inline 284 double factorial(int); 285 286 KOKKOS_INLINE_FUNCTION 287 void create_team_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // SNAKokkos() 288 KOKKOS_INLINE_FUNCTION 289 void create_thread_scratch_arrays(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // SNAKokkos() 290 291 inline 292 void init_clebsch_gordan(); // init() 293 294 inline 295 void init_rootpqarray(); // init() 296 297 KOKKOS_INLINE_FUNCTION 298 void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui 299 300 KOKKOS_INLINE_FUNCTION 301 void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, 302 const real_type&, const real_type&, const real_type&, 303 const real_type&, const real_type&); // compute_ui_cpu 304 305 306 inline 307 double deltacg(int, int, int); // init_clebsch_gordan 308 309 inline 310 int compute_ncoeff(); // SNAKokkos() 311 KOKKOS_INLINE_FUNCTION 312 void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, 313 const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu 314 const real_type&, const real_type&, const real_type&, const real_type&, const real_type&); 315 316 // Sets the style for the switching function 317 // 0 = none 318 // 1 = cosine 319 int switch_flag; 320 321 // Chem snap flags 322 int chem_flag; 323 int bnorm_flag; 324 325 // Self-weight 326 real_type wself; 327 int wselfall_flag; 328 329 int bzero_flag; // 1 if bzero subtracted from barray 330 Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms 331 }; 332 333 } 334 335 #include "sna_kokkos_impl.h" 336 #endif 337 338 /* ERROR/WARNING messages: 339 340 E: Invalid argument to factorial %d 341 342 N must be >= 0 and <= 167, otherwise the factorial result is too 343 large. 344 345 */ 346