1 ////////////////////////////////////////////////////////////////////////////////////// 2 // This file is distributed under the University of Illinois/NCSA Open Source License. 3 // See LICENSE file in top directory for details. 4 // 5 // Copyright (c) 2020 QMCPACK developers. 6 // 7 // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory 8 // 9 // File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory 10 ////////////////////////////////////////////////////////////////////////////////////// 11 12 13 /** @file SplineC2COMPTarget.h 14 * 15 * class to handle complex splines to complex orbitals with splines of arbitrary precision 16 * splines storage and computation is offloaded to accelerators using OpenMP target 17 */ 18 #ifndef QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H 19 #define QMCPLUSPLUS_SPLINE_C2C_OMPTARGET_H 20 21 #include <memory> 22 #include "QMCWaveFunctions/BsplineFactory/BsplineSet.h" 23 #include "OhmmsSoA/VectorSoaContainer.h" 24 #include "spline2/MultiBspline.hpp" 25 #include "OMPTarget/OMPallocator.hpp" 26 #include "Platforms/PinnedAllocator.h" 27 #include "Utilities/FairDivide.h" 28 #include "Utilities/TimerManager.h" 29 #include "SplineOMPTargetMultiWalkerMem.h" 30 31 namespace qmcplusplus 32 { 33 /** class to match std::complex<ST> spline with BsplineSet::ValueType (complex) SPOs 34 * @tparam ST precision of spline 35 * 36 * Requires temporage storage and multiplication of phase vectors 37 * Internal storage use double sized arrays of ST type, aligned and padded. 38 */ 39 template<typename ST> 40 class SplineC2COMPTarget : public BsplineSet 41 { 42 public: 43 template<typename DT> 44 using OffloadAllocator = OMPallocator<DT, aligned_allocator<DT>>; 45 template<typename DT> 46 using OffloadPinnedAllocator = OMPallocator<DT, PinnedAlignedAllocator<DT>>; 47 48 using SplineType = typename bspline_traits<ST, 3>::SplineType; 49 using BCType = typename bspline_traits<ST, 3>::BCType; 50 using DataType = ST; 51 using PointType = TinyVector<ST, 3>; 52 using SingleSplineType = UBspline_3d_d; 53 // types for evaluation results 54 using ComplexT = typename BsplineSet::ValueType; 55 using BsplineSet::GGGVector_t; 56 using BsplineSet::GradVector_t; 57 using BsplineSet::HessVector_t; 58 using BsplineSet::ValueVector_t; 59 60 using vContainer_type = Vector<ST, aligned_allocator<ST>>; 61 using gContainer_type = VectorSoaContainer<ST, 3>; 62 using hContainer_type = VectorSoaContainer<ST, 6>; 63 using ghContainer_type = VectorSoaContainer<ST, 10>; 64 65 template<typename DT> 66 using OffloadVector = Vector<DT, OffloadAllocator<DT>>; 67 template<typename DT> 68 using OffloadPosVector = VectorSoaContainer<DT, 3, OffloadAllocator<DT>>; 69 70 private: 71 /// timer for offload portion 72 NewTimer& offload_timer_; 73 ///primitive cell 74 CrystalLattice<ST, 3> PrimLattice; 75 ///\f$GGt=G^t G \f$, transformation for tensor in LatticeUnit to CartesianUnit, e.g. Hessian 76 Tensor<ST, 3> GGt; 77 ///multi bspline set 78 std::shared_ptr<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>> SplineInst; 79 80 std::shared_ptr<OffloadVector<ST>> mKK; 81 std::shared_ptr<OffloadPosVector<ST>> myKcart; 82 std::shared_ptr<OffloadVector<ST>> GGt_offload; 83 std::shared_ptr<OffloadVector<ST>> PrimLattice_G_offload; 84 85 std::unique_ptr<SplineOMPTargetMultiWalkerMem<ST, ComplexT>> mw_mem_; 86 87 ///team private ratios for reduction, numVP x numTeams 88 Matrix<ComplexT, OffloadPinnedAllocator<ComplexT>> ratios_private; 89 ///offload scratch space, dynamically resized to the maximal need 90 Vector<ST, OffloadPinnedAllocator<ST>> offload_scratch; 91 ///result scratch space, dynamically resized to the maximal need 92 Vector<ComplexT, OffloadPinnedAllocator<ComplexT>> results_scratch; 93 ///psiinv and position scratch space, used to avoid allocation on the fly and faster transfer 94 Vector<ComplexT, OffloadPinnedAllocator<ComplexT>> psiinv_pos_copy; 95 ///position scratch space, used to avoid allocation on the fly and faster transfer 96 Vector<ST, OffloadPinnedAllocator<ST>> multi_pos_copy; 97 98 void evaluateVGLMultiPos(const Vector<ST, OffloadPinnedAllocator<ST>>& multi_pos_copy, 99 Vector<ST, OffloadPinnedAllocator<ST>>& offload_scratch, 100 Vector<ComplexT, OffloadPinnedAllocator<ComplexT>>& results_scratch, 101 const RefVector<ValueVector_t>& psi_v_list, 102 const RefVector<GradVector_t>& dpsi_v_list, 103 const RefVector<ValueVector_t>& d2psi_v_list) const; 104 105 protected: 106 /// intermediate result vectors 107 vContainer_type myV; 108 vContainer_type myL; 109 gContainer_type myG; 110 hContainer_type myH; 111 ghContainer_type mygH; 112 113 public: SplineC2COMPTarget()114 SplineC2COMPTarget() 115 : BsplineSet(true), 116 offload_timer_(*timer_manager.createTimer("SplineC2COMPTarget::offload", timer_level_fine)), 117 GGt_offload(std::make_shared<OffloadVector<ST>>(9)), 118 PrimLattice_G_offload(std::make_shared<OffloadVector<ST>>(9)) 119 { 120 is_complex = true; 121 className = "SplineC2COMPTarget"; 122 KeyWord = "SplineC2C"; 123 } 124 SplineC2COMPTarget(const SplineC2COMPTarget & in)125 SplineC2COMPTarget(const SplineC2COMPTarget& in) 126 : BsplineSet(in), 127 offload_timer_(in.offload_timer_), 128 PrimLattice(in.PrimLattice), 129 GGt(in.GGt), 130 SplineInst(in.SplineInst), 131 mKK(in.mKK), 132 myKcart(in.myKcart), 133 GGt_offload(in.GGt_offload), 134 PrimLattice_G_offload(in.PrimLattice_G_offload), 135 myV(in.myV), 136 myL(in.myL), 137 myG(in.myG), 138 myH(in.myH), 139 mygH(in.mygH) 140 {} 141 createResource(ResourceCollection & collection)142 void createResource(ResourceCollection& collection) const override 143 { 144 auto resource_index = collection.addResource(std::make_unique<SplineOMPTargetMultiWalkerMem<ST, ComplexT>>()); 145 } 146 acquireResource(ResourceCollection & collection)147 void acquireResource(ResourceCollection& collection) override 148 { 149 auto res_ptr = dynamic_cast<SplineOMPTargetMultiWalkerMem<ST, ComplexT>*>(collection.lendResource().release()); 150 if (!res_ptr) 151 throw std::runtime_error("SplineC2COMPTarget::acquireResource dynamic_cast failed"); 152 mw_mem_.reset(res_ptr); 153 } 154 releaseResource(ResourceCollection & collection)155 void releaseResource(ResourceCollection& collection) override { collection.takebackResource(std::move(mw_mem_)); } 156 makeClone()157 virtual SPOSet* makeClone() const override { return new SplineC2COMPTarget(*this); } 158 resizeStorage(size_t n,size_t nvals)159 inline void resizeStorage(size_t n, size_t nvals) 160 { 161 init_base(n); 162 size_t npad = getAlignedSize<ST>(2 * n); 163 myV.resize(npad); 164 myG.resize(npad); 165 myL.resize(npad); 166 myH.resize(npad); 167 mygH.resize(npad); 168 } 169 bcast_tables(Communicate * comm)170 void bcast_tables(Communicate* comm) { chunked_bcast(comm, SplineInst->getSplinePtr()); } 171 gather_tables(Communicate * comm)172 void gather_tables(Communicate* comm) 173 { 174 if (comm->size() == 1) 175 return; 176 const int Nbands = kPoints.size(); 177 const int Nbandgroups = comm->size(); 178 offset.resize(Nbandgroups + 1, 0); 179 FairDivideLow(Nbands, Nbandgroups, offset); 180 181 for (size_t ib = 0; ib < offset.size(); ib++) 182 offset[ib] *= 2; 183 gatherv(comm, SplineInst->getSplinePtr(), SplineInst->getSplinePtr()->z_stride, offset); 184 } 185 186 template<typename GT, typename BCT> create_spline(GT & xyz_g,BCT & xyz_bc)187 void create_spline(GT& xyz_g, BCT& xyz_bc) 188 { 189 resize_kpoints(); 190 SplineInst = std::make_shared<MultiBspline<ST, OffloadAllocator<ST>, OffloadAllocator<SplineType>>>(); 191 SplineInst->create(xyz_g, xyz_bc, myV.size()); 192 193 app_log() << "MEMORY " << SplineInst->sizeInByte() / (1 << 20) << " MB allocated " 194 << "for the coefficients in 3D spline orbital representation" << std::endl; 195 } 196 197 /// this routine can not be called from threaded region finalizeConstruction()198 void finalizeConstruction() override 199 { 200 // map the SplineInst->getSplinePtr() structure to GPU 201 auto* MultiSpline = SplineInst->getSplinePtr(); 202 auto* restrict coefs = MultiSpline->coefs; 203 // attach pointers on the device to achieve deep copy 204 PRAGMA_OFFLOAD("omp target map(always, to: MultiSpline[0:1], coefs[0:MultiSpline->coefs_size])") 205 { 206 MultiSpline->coefs = coefs; 207 } 208 209 // transfer static data to GPU 210 auto* mKK_ptr = mKK->data(); 211 PRAGMA_OFFLOAD("omp target update to(mKK_ptr[0:mKK->size()])") 212 auto* myKcart_ptr = myKcart->data(); 213 PRAGMA_OFFLOAD("omp target update to(myKcart_ptr[0:myKcart->capacity()*3])") 214 for (size_t i = 0; i < 9; i++) 215 { 216 (*GGt_offload)[i] = GGt[i]; 217 (*PrimLattice_G_offload)[i] = PrimLattice.G[i]; 218 } 219 auto* PrimLattice_G_ptr = PrimLattice_G_offload->data(); 220 PRAGMA_OFFLOAD("omp target update to(PrimLattice_G_ptr[0:9])") 221 auto* GGt_ptr = GGt_offload->data(); 222 PRAGMA_OFFLOAD("omp target update to(GGt_ptr[0:9])") 223 } 224 flush_zero()225 inline void flush_zero() { SplineInst->flush_zero(); } 226 227 /** remap kPoints to pack the double copy */ resize_kpoints()228 inline void resize_kpoints() 229 { 230 const size_t nk = kPoints.size(); 231 mKK = std::make_shared<OffloadVector<ST>>(nk); 232 myKcart = std::make_shared<OffloadPosVector<ST>>(nk); 233 for (size_t i = 0; i < nk; ++i) 234 { 235 (*mKK)[i] = -dot(kPoints[i], kPoints[i]); 236 (*myKcart)(i) = kPoints[i]; 237 } 238 } 239 240 void set_spline(SingleSplineType* spline_r, SingleSplineType* spline_i, int twist, int ispline, int level); 241 242 bool read_splines(hdf_archive& h5f); 243 244 bool write_splines(hdf_archive& h5f); 245 246 void assign_v(const PointType& r, const vContainer_type& myV, ValueVector_t& psi, int first, int last) const; 247 248 virtual void evaluateValue(const ParticleSet& P, const int iat, ValueVector_t& psi) override; 249 250 virtual void evaluateDetRatios(const VirtualParticleSet& VP, 251 ValueVector_t& psi, 252 const ValueVector_t& psiinv, 253 std::vector<ValueType>& ratios) override; 254 255 virtual void mw_evaluateDetRatios(const RefVectorWithLeader<SPOSet>& spo_list, 256 const RefVector<const VirtualParticleSet>& vp_list, 257 const RefVector<ValueVector_t>& psi_list, 258 const std::vector<const ValueType*>& invRow_ptr_list, 259 std::vector<std::vector<ValueType>>& ratios_list) const override; 260 261 /** assign_vgl_from_l can be used when myL is precomputed and myV,myG,myL in cartesian 262 */ 263 void assign_vgl_from_l(const PointType& r, ValueVector_t& psi, GradVector_t& dpsi, ValueVector_t& d2psi); 264 265 virtual void evaluateVGL(const ParticleSet& P, 266 const int iat, 267 ValueVector_t& psi, 268 GradVector_t& dpsi, 269 ValueVector_t& d2psi) override; 270 271 virtual void mw_evaluateVGL(const RefVectorWithLeader<SPOSet>& sa_list, 272 const RefVectorWithLeader<ParticleSet>& P_list, 273 int iat, 274 const RefVector<ValueVector_t>& psi_v_list, 275 const RefVector<GradVector_t>& dpsi_v_list, 276 const RefVector<ValueVector_t>& d2psi_v_list) const override; 277 278 virtual void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet>& spo_list, 279 const RefVectorWithLeader<ParticleSet>& P_list, 280 int iat, 281 const std::vector<const ValueType*>& invRow_ptr_list, 282 VGLVector_t& phi_vgl_v, 283 std::vector<ValueType>& ratios, 284 std::vector<GradType>& grads) const override; 285 286 void assign_vgh(const PointType& r, 287 ValueVector_t& psi, 288 GradVector_t& dpsi, 289 HessVector_t& grad_grad_psi, 290 int first, 291 int last) const; 292 293 virtual void evaluateVGH(const ParticleSet& P, 294 const int iat, 295 ValueVector_t& psi, 296 GradVector_t& dpsi, 297 HessVector_t& grad_grad_psi) override; 298 299 void assign_vghgh(const PointType& r, 300 ValueVector_t& psi, 301 GradVector_t& dpsi, 302 HessVector_t& grad_grad_psi, 303 GGGVector_t& grad_grad_grad_psi, 304 int first = 0, 305 int last = -1) const; 306 307 virtual void evaluateVGHGH(const ParticleSet& P, 308 const int iat, 309 ValueVector_t& psi, 310 GradVector_t& dpsi, 311 HessVector_t& grad_grad_psi, 312 GGGVector_t& grad_grad_grad_psi) override; 313 314 virtual void evaluate_notranspose(const ParticleSet& P, 315 int first, 316 int last, 317 ValueMatrix_t& logdet, 318 GradMatrix_t& dlogdet, 319 ValueMatrix_t& d2logdet) override; 320 321 template<class BSPLINESPO> 322 friend struct SplineSetReader; 323 friend struct BsplineReaderBase; 324 }; 325 326 extern template class SplineC2COMPTarget<float>; 327 extern template class SplineC2COMPTarget<double>; 328 329 } // namespace qmcplusplus 330 #endif 331