1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2020 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
8 //
9 // File refactored from: VMC.cpp
10 //////////////////////////////////////////////////////////////////////////////////////
11
12 #include "VMCBatched.h"
13 #include "Concurrency/ParallelExecutor.hpp"
14 #include "Concurrency/Info.hpp"
15 #include "Message/UniformCommunicateError.h"
16 #include "Message/CommOperators.h"
17 #include "Utilities/RunTimeManager.h"
18 #include "ParticleBase/RandomSeqGenerator.h"
19 #include "Particle/MCSample.h"
20 #include "MemoryUsage.h"
21
22 namespace qmcplusplus
23 {
24 /** Constructor maintains proper ownership of input parameters
25 */
VMCBatched(const ProjectData & project_data,QMCDriverInput && qmcdriver_input,VMCDriverInput && input,MCPopulation && pop,SampleStack & samples,Communicate * comm)26 VMCBatched::VMCBatched(const ProjectData& project_data,
27 QMCDriverInput&& qmcdriver_input,
28 VMCDriverInput&& input,
29 MCPopulation&& pop,
30 SampleStack& samples,
31 Communicate* comm)
32 : QMCDriverNew(project_data, std::move(qmcdriver_input), std::move(pop), "VMCBatched::", comm, "VMCBatched"),
33 vmcdriver_input_(input),
34 samples_(samples),
35 collect_samples_(false)
36 {}
37
advanceWalkers(const StateForThread & sft,Crowd & crowd,QMCDriverNew::DriverTimers & timers,ContextForSteps & step_context,bool recompute)38 void VMCBatched::advanceWalkers(const StateForThread& sft,
39 Crowd& crowd,
40 QMCDriverNew::DriverTimers& timers,
41 ContextForSteps& step_context,
42 bool recompute)
43 {
44 if (crowd.size() == 0)
45 return;
46 auto& ps_dispatcher = crowd.dispatchers_.ps_dispatcher_;
47 auto& twf_dispatcher = crowd.dispatchers_.twf_dispatcher_;
48 auto& ham_dispatcher = crowd.dispatchers_.ham_dispatcher_;
49 auto& walkers = crowd.get_walkers();
50 const RefVectorWithLeader<ParticleSet> walker_elecs(crowd.get_walker_elecs()[0], crowd.get_walker_elecs());
51 const RefVectorWithLeader<TrialWaveFunction> walker_twfs(crowd.get_walker_twfs()[0], crowd.get_walker_twfs());
52
53 ResourceCollectionTeamLock<ParticleSet> pset_res_lock(crowd.getSharedResource().pset_res, walker_elecs);
54 DriverWalkerResourceCollectionLock pbyp_lock(crowd.getSharedResource(), crowd.get_walker_twfs()[0],
55 crowd.get_walker_hamiltonians()[0]);
56
57 assert(QMCDriverNew::checkLogAndGL(crowd));
58
59 timers.movepbyp_timer.start();
60 const int num_walkers = crowd.size();
61 // Note std::vector<bool> is not like the rest of stl.
62 std::vector<bool> moved(num_walkers, false);
63 constexpr RealType mhalf(-0.5);
64 const bool use_drift = sft.vmcdrv_input.get_use_drift();
65 std::vector<TrialWaveFunction::GradType> grads_now(num_walkers);
66 std::vector<TrialWaveFunction::GradType> grads_new(num_walkers);
67 std::vector<TrialWaveFunction::PsiValueType> ratios(num_walkers);
68
69 std::vector<PosType> drifts(num_walkers);
70 std::vector<RealType> log_gf(num_walkers);
71 std::vector<RealType> log_gb(num_walkers);
72 std::vector<RealType> prob(num_walkers);
73
74 // local list to handle accept/reject
75 std::vector<bool> isAccepted;
76 std::vector<std::reference_wrapper<TrialWaveFunction>> twf_accept_list, twf_reject_list;
77 isAccepted.reserve(num_walkers);
78
79 for (int sub_step = 0; sub_step < sft.qmcdrv_input.get_sub_steps(); sub_step++)
80 {
81 //This generates an entire steps worth of deltas.
82 step_context.nextDeltaRs(num_walkers * sft.population.get_num_particles());
83
84 // up and down electrons are "species" within qmpack
85 for (int ig = 0; ig < step_context.get_num_groups(); ++ig) //loop over species
86 {
87 RealType tauovermass = sft.qmcdrv_input.get_tau() * sft.population.get_ptclgrp_inv_mass()[ig];
88 RealType oneover2tau = 0.5 / (tauovermass);
89 RealType sqrttau = std::sqrt(tauovermass);
90
91 twf_dispatcher.flex_prepareGroup(walker_twfs, walker_elecs, ig);
92
93 int start_index = step_context.getPtclGroupStart(ig);
94 int end_index = step_context.getPtclGroupEnd(ig);
95 for (int iat = start_index; iat < end_index; ++iat)
96 {
97 // step_context.deltaRsBegin returns an iterator to a flat series of PosTypes
98 // fastest in walkers then particles
99 auto delta_r_start = step_context.deltaRsBegin() + iat * num_walkers;
100 auto delta_r_end = delta_r_start + num_walkers;
101
102 if (use_drift)
103 {
104 twf_dispatcher.flex_evalGrad(walker_twfs, walker_elecs, iat, grads_now);
105 sft.drift_modifier.getDrifts(tauovermass, grads_now, drifts);
106
107 std::transform(drifts.begin(), drifts.end(), delta_r_start, drifts.begin(),
108 [sqrttau](const PosType& drift, const PosType& delta_r) {
109 return drift + (sqrttau * delta_r);
110 });
111 }
112 else
113 {
114 std::transform(delta_r_start, delta_r_end, drifts.begin(),
115 [sqrttau](const PosType& delta_r) { return sqrttau * delta_r; });
116 }
117
118 ps_dispatcher.flex_makeMove(walker_elecs, iat, drifts);
119
120 // This is inelegant
121 if (use_drift)
122 {
123 twf_dispatcher.flex_calcRatioGrad(walker_twfs, walker_elecs, iat, ratios, grads_new);
124 std::transform(delta_r_start, delta_r_end, log_gf.begin(),
125 [](const PosType& delta_r) { return mhalf * dot(delta_r, delta_r); });
126
127 sft.drift_modifier.getDrifts(tauovermass, grads_new, drifts);
128
129 std::transform(crowd.beginElectrons(), crowd.endElectrons(), drifts.begin(), drifts.begin(),
130 [iat](const ParticleSet& elecs, const PosType& drift) {
131 return elecs.R[iat] - elecs.activePos - drift;
132 });
133
134 std::transform(drifts.begin(), drifts.end(), log_gb.begin(),
135 [oneover2tau](const PosType& drift) { return -oneover2tau * dot(drift, drift); });
136 }
137 else
138 {
139 twf_dispatcher.flex_calcRatio(walker_twfs, walker_elecs, iat, ratios);
140 }
141
142 std::transform(ratios.begin(), ratios.end(), prob.begin(), [](auto ratio) { return std::norm(ratio); });
143
144 isAccepted.clear();
145
146 for (int i_accept = 0; i_accept < num_walkers; ++i_accept)
147 if (prob[i_accept] >= std::numeric_limits<RealType>::epsilon() &&
148 step_context.get_random_gen()() < prob[i_accept] * std::exp(log_gb[i_accept] - log_gf[i_accept]))
149 {
150 crowd.incAccept();
151 isAccepted.push_back(true);
152 }
153 else
154 {
155 crowd.incReject();
156 isAccepted.push_back(false);
157 }
158
159 twf_dispatcher.flex_accept_rejectMove(walker_twfs, walker_elecs, iat, isAccepted, true);
160
161 ps_dispatcher.flex_accept_rejectMove(walker_elecs, iat, isAccepted);
162 }
163 }
164 twf_dispatcher.flex_completeUpdates(walker_twfs);
165 }
166
167 ps_dispatcher.flex_donePbyP(walker_elecs);
168 timers.movepbyp_timer.stop();
169
170 timers.buffer_timer.start();
171 twf_dispatcher.flex_evaluateGL(walker_twfs, walker_elecs, recompute);
172
173 assert(QMCDriverNew::checkLogAndGL(crowd));
174 timers.buffer_timer.stop();
175
176 timers.hamiltonian_timer.start();
177 const RefVectorWithLeader<QMCHamiltonian> walker_hamiltonians(crowd.get_walker_hamiltonians()[0],
178 crowd.get_walker_hamiltonians());
179 std::vector<QMCHamiltonian::FullPrecRealType> local_energies(
180 ham_dispatcher.flex_evaluate(walker_hamiltonians, walker_elecs));
181 timers.hamiltonian_timer.stop();
182
183 auto resetSigNLocalEnergy = [](MCPWalker& walker, TrialWaveFunction& twf, auto& local_energy) {
184 walker.resetProperty(twf.getLogPsi(), twf.getPhase(), local_energy);
185 };
186 for (int iw = 0; iw < crowd.size(); ++iw)
187 resetSigNLocalEnergy(walkers[iw], walker_twfs[iw], local_energies[iw]);
188
189 // moved to be consistent with DMC
190 timers.collectables_timer.start();
191 auto evaluateNonPhysicalHamiltonianElements = [](QMCHamiltonian& ham, ParticleSet& pset, MCPWalker& walker) {
192 ham.auxHevaluate(pset, walker);
193 };
194 for (int iw = 0; iw < crowd.size(); ++iw)
195 evaluateNonPhysicalHamiltonianElements(walker_hamiltonians[iw], walker_elecs[iw], walkers[iw]);
196
197 auto savePropertiesIntoWalker = [](QMCHamiltonian& ham, MCPWalker& walker) {
198 ham.saveProperty(walker.getPropertyBase());
199 };
200 for (int iw = 0; iw < crowd.size(); ++iw)
201 savePropertiesIntoWalker(walker_hamiltonians[iw], walkers[iw]);
202 timers.collectables_timer.stop();
203 // TODO:
204 // check if all moves failed
205 }
206
207
208 /** Thread body for VMC step
209 *
210 */
runVMCStep(int crowd_id,const StateForThread & sft,DriverTimers & timers,std::vector<std::unique_ptr<ContextForSteps>> & context_for_steps,std::vector<std::unique_ptr<Crowd>> & crowds)211 void VMCBatched::runVMCStep(int crowd_id,
212 const StateForThread& sft,
213 DriverTimers& timers,
214 std::vector<std::unique_ptr<ContextForSteps>>& context_for_steps,
215 std::vector<std::unique_ptr<Crowd>>& crowds)
216 {
217 Crowd& crowd = *(crowds[crowd_id]);
218
219 crowd.setRNGForHamiltonian(context_for_steps[crowd_id]->get_random_gen());
220
221 int max_steps = sft.qmcdrv_input.get_max_steps();
222 // \todo delete
223 RealType cnorm = 1.0 / static_cast<RealType>(crowd.size());
224 IndexType step = sft.step;
225 // Are we entering the the last step of a block to recompute at?
226 bool recompute_this_step = (sft.is_recomputing_block && (step + 1) == max_steps);
227 advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], recompute_this_step);
228 crowd.accumulate();
229 }
230
process(xmlNodePtr node)231 void VMCBatched::process(xmlNodePtr node)
232 {
233 print_mem("VMCBatched before initialization", app_log());
234 // \todo get total walkers should be coming from VMCDriverInpu
235 try
236 {
237 QMCDriverNew::AdjustedWalkerCounts awc =
238 adjustGlobalWalkerCount(myComm->size(), myComm->rank(), qmcdriver_input_.get_total_walkers(),
239 qmcdriver_input_.get_walkers_per_rank(), 1.0, qmcdriver_input_.get_num_crowds());
240
241 Base::startup(node, awc);
242 }
243 catch (const UniformCommunicateError& ue)
244 {
245 myComm->barrier_and_abort(ue.what());
246 }
247 }
248
compute_samples_per_rank(const QMCDriverInput & qmcdriver_input,const IndexType local_walkers)249 int VMCBatched::compute_samples_per_rank(const QMCDriverInput& qmcdriver_input, const IndexType local_walkers)
250 {
251 int nblocks = qmcdriver_input.get_max_blocks();
252 int nsteps = qmcdriver_input.get_max_steps();
253 return nblocks * nsteps * local_walkers;
254 }
255
256
257 /** Runs the actual VMC section
258 *
259 * Dependent on base class state machine
260 * Assumes state already updated from the following calls:
261 * 1. QMCDriverNew::setStatus
262 * 2. QMCDriverNew::putWalkers
263 * 3. QMCDriverNew::process
264 *
265 * At the moment I don't care about 1st touch, prove it matters
266 * If does consider giving more to the thread by value that should
267 * end up thread local. (I think)
268 */
run()269 bool VMCBatched::run()
270 {
271 IndexType num_blocks = qmcdriver_input_.get_max_blocks();
272 //start the main estimator
273 estimator_manager_->startDriverRun();
274
275 StateForThread vmc_state(qmcdriver_input_, vmcdriver_input_, *drift_modifier_, population_);
276
277 LoopTimer<> vmc_loop;
278 RunTimeControl<> runtimeControl(run_time_manager, project_data_.getMaxCPUSeconds(), project_data_.getTitle(),
279 myComm->rank() == 0);
280
281 { // walker initialization
282 ScopedTimer local_timer(timers_.init_walkers_timer);
283 ParallelExecutor<> section_start_task;
284 section_start_task(crowds_.size(), initialLogEvaluation, std::ref(crowds_), std::ref(step_contexts_));
285 }
286
287 print_mem("VMCBatched after initialLogEvaluation", app_summary());
288
289 ParallelExecutor<> crowd_task;
290
291 if (qmcdriver_input_.get_warmup_steps() > 0)
292 {
293 // Run warm-up steps
294 auto runWarmupStep = [](int crowd_id, StateForThread& sft, DriverTimers& timers,
295 UPtrVector<ContextForSteps>& context_for_steps, UPtrVector<Crowd>& crowds) {
296 Crowd& crowd = *(crowds[crowd_id]);
297 advanceWalkers(sft, crowd, timers, *context_for_steps[crowd_id], false);
298 };
299
300 for (int step = 0; step < qmcdriver_input_.get_warmup_steps(); ++step)
301 {
302 ScopedTimer local_timer(timers_.run_steps_timer);
303 crowd_task(crowds_.size(), runWarmupStep, vmc_state, std::ref(timers_), std::ref(step_contexts_),
304 std::ref(crowds_));
305 }
306
307 app_log() << "Warm-up is completed!" << std::endl;
308 print_mem("VMCBatched after Warmup", app_log());
309 }
310
311 for (int block = 0; block < num_blocks; ++block)
312 {
313 vmc_loop.start();
314 vmc_state.recalculate_properties_period =
315 (qmc_driver_mode_[QMC_UPDATE_MODE]) ? qmcdriver_input_.get_recalculate_properties_period() : 0;
316 vmc_state.is_recomputing_block = qmcdriver_input_.get_blocks_between_recompute()
317 ? (1 + block) % qmcdriver_input_.get_blocks_between_recompute() == 0
318 : false;
319
320 estimator_manager_->startBlock(qmcdriver_input_.get_max_steps());
321
322 for (auto& crowd : crowds_)
323 crowd->startBlock(qmcdriver_input_.get_max_steps());
324 for (int step = 0; step < qmcdriver_input_.get_max_steps(); ++step)
325 {
326 ScopedTimer local_timer(timers_.run_steps_timer);
327 vmc_state.step = step;
328 crowd_task(crowds_.size(), runVMCStep, vmc_state, timers_, std::ref(step_contexts_), std::ref(crowds_));
329
330 if (collect_samples_)
331 {
332 const auto& elec_psets = population_.get_elec_particle_sets();
333 for (const auto& walker : elec_psets)
334 {
335 samples_.appendSample(MCSample(*walker));
336 }
337 }
338 }
339 print_mem("VMCBatched after a block", app_debug_stream());
340 endBlock();
341 vmc_loop.stop();
342
343 bool stop_requested = false;
344 // Rank 0 decides whether the time limit was reached
345 if (!myComm->rank())
346 stop_requested = runtimeControl.checkStop(vmc_loop);
347 myComm->bcast(stop_requested);
348
349 if (stop_requested)
350 {
351 if (!myComm->rank())
352 app_log() << runtimeControl.generateStopMessage("VMCBatched", block);
353 run_time_manager.markStop();
354 break;
355 }
356 }
357 // This is confusing logic from VMC.cpp want this functionality write documentation of this
358 // and clean it up
359 // bool wrotesamples = qmcdriver_input_.get_dump_config();
360 // if (qmcdriver_input_.get_dump_config())
361 // {
362 //wrotesamples = W.dumpEnsemble(wClones, wOut, myComm->size(), nBlocks);
363 //if (wrotesamples)
364 // app_log() << " samples are written to the config.h5" << std::endl;
365 // }
366
367 // second argument was !wrotesample so if W.dumpEnsemble returns false or
368 // dump_config is false from input then dump_walkers
369 {
370 std::ostringstream o;
371 FullPrecRealType ene, var;
372 estimator_manager_->getApproximateEnergyVariance(ene, var);
373 o << "====================================================";
374 o << "\n End of a VMC block";
375 o << "\n QMC counter = " << project_data_.getSeriesIndex();
376 o << "\n time step = " << qmcdriver_input_.get_tau();
377 o << "\n reference energy = " << ene;
378 o << "\n reference variance = " << var;
379 o << "\n====================================================";
380 app_log() << o.str() << std::endl;
381 }
382
383 print_mem("VMCBatched ends", app_log());
384
385 estimator_manager_->stopDriverRun();
386
387 return finalize(num_blocks, true);
388 }
389
enable_sample_collection()390 void VMCBatched::enable_sample_collection()
391 {
392 int samples = compute_samples_per_rank(qmcdriver_input_, population_.get_num_local_walkers());
393 samples_.setMaxSamples(samples);
394 collect_samples_ = true;
395
396 int total_samples = samples * population_.get_num_ranks();
397 app_log() << "VMCBatched Driver collecting samples, samples per rank = " << samples << '\n';
398 app_log() << " total samples = " << total_samples << '\n';
399 }
400
401 } // namespace qmcplusplus
402