1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2020 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
8 // Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
9 //
10 // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
11 //////////////////////////////////////////////////////////////////////////////////////
12
13 #include "catch.hpp"
14 #include "Utilities/RandomGenerator.h"
15 #include "OhmmsData/Libxml2Doc.h"
16 #include "OhmmsPETE/OhmmsMatrix.h"
17 #include "Particle/ParticleSet.h"
18 #include "Particle/MCWalkerConfiguration.h"
19 #include "QMCDrivers/WalkerControlBase.h"
20 #ifdef HAVE_MPI
21 #include "QMCDrivers/DMC/WalkerControlMPI.h"
22 #include "QMCDrivers/DMC/WalkerReconfigurationMPI.h"
23 #endif
24
25 #include <stdio.h>
26 #include <string>
27 #include <random>
28
29 using std::string;
30
31 namespace qmcplusplus
32 {
output_vector(const std::string & name,std::vector<int> & vec)33 void output_vector(const std::string& name, std::vector<int>& vec)
34 {
35 std::cout << name;
36 for (int i = 0; i < vec.size(); i++)
37 {
38 std::cout << vec[i] << " ";
39 }
40 std::cout << std::endl;
41 }
42
43 // uncomment the std::cout and output_vector lines to see the walker assignments
44 TEST_CASE("Walker control assign walkers", "[drivers][walker_control]")
45 {
46 int Cur_pop = 8;
47 int NumContexts = 4;
48 std::vector<int> FairOffset(NumContexts + 1);
49
50 // The in loop copy is necessary to support the assert at the end of the swaps.
51 // This was important for debugging but will go in a future PR as part of cleaning
52 // update determineNewWalkerPopulation
53 std::vector<int> NumPerRank = {4, 4, 0, 0};
54 std::vector<int> NewNum = NumPerRank;
55 for (int me = 0; me < NumContexts; me++)
56 {
57 std::vector<int> minus;
58 std::vector<int> plus;
59 std::vector<int> num_per_rank = NumPerRank;
60
61 //std::cout << "For processor number " << me << std::endl;
62 WalkerControlMPI::determineNewWalkerPopulation(Cur_pop, NumContexts, me, num_per_rank, FairOffset, minus, plus);
63
64 REQUIRE(minus.size() == plus.size());
65 //output_vector(" Minus: ", minus);
66
67 //output_vector(" Plus: ", plus);
68
69 for (int i = 0; i < plus.size(); i++)
70 {
71 if (me == plus[i])
72 NewNum[plus[i]]--;
73 }
74 for (int i = 0; i < minus.size(); i++)
75 {
76 if (me == minus[i])
77 NewNum[minus[i]]++;
78 }
79 }
80 //output_vector("New num per node: ", NewNum);
81
82 for (int i = 0; i < NewNum.size(); i++)
83 {
84 int num = FairOffset[i + 1] - FairOffset[i];
85 REQUIRE(NewNum[i] == num);
86 }
87 }
88
89 TEST_CASE("WalkerControl round trip index conversions", "[drivers][walker_control]")
90 {
91 std::vector<int> walker_counts; //= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23, 37};
92 for (int i = 0; i < 1000; ++i)
93 walker_counts.push_back(i);
94 for (int i = 0; i < 1000000; i += 1000)
95 walker_counts.push_back(i);
96 for (int i = 0; i < 100000000; i += 100000)
97 walker_counts.push_back(i);
98
99 std::vector<QMCTraits::FullPrecRealType> fp_counts(walker_counts.size());
100 std::vector<int> walker_count_results(walker_counts.size());
101 for (int iw = 0; iw < walker_counts.size(); ++iw)
102 {
103 fp_counts[iw] = walker_counts[iw];
104 }
105 for (int iw = 0; iw < fp_counts.size(); ++iw)
106 {
107 walker_count_results[iw] = static_cast<int>(fp_counts[iw]);
108 }
109 bool all_pass = true;
110 for (int iw = 0; iw < walker_counts.size(); ++iw)
111 {
112 all_pass &= (walker_counts[iw] == walker_count_results[iw]);
113 }
114 REQUIRE(all_pass);
115 }
116
117 // uncomment the std::cout and output_vector lines to see the walker assignments
118 TEST_CASE("Walker control assign walkers odd ranks", "[drivers][walker_control]")
119 {
120 int Cur_pop = 9;
121 int NumContexts = 3;
122 std::vector<int> NumPerRank = {5, 2, 2};
123 std::vector<int> FairOffset(NumContexts + 1);
124
125 std::vector<int> NewNum = NumPerRank;
126 for (int me = 0; me < NumContexts; me++)
127 {
128 std::vector<int> minus;
129 std::vector<int> plus;
130 std::vector<int> num_per_rank = NumPerRank;
131 //std::cout << "For processor number " << me << std::endl;
132 WalkerControlMPI::determineNewWalkerPopulation(Cur_pop, NumContexts, me, num_per_rank, FairOffset, minus, plus);
133
134 REQUIRE(minus.size() == plus.size());
135 //output_vector(" Minus: ", minus);
136
137 //output_vector(" Plus: ", plus);
138
139 for (int i = 0; i < plus.size(); i++)
140 {
141 if (me == plus[i])
142 NewNum[plus[i]]--;
143 }
144 for (int i = 0; i < minus.size(); i++)
145 {
146 if (me == minus[i])
147 NewNum[minus[i]]++;
148 }
149 }
150 //output_vector("New num per node: ", NewNum);
151
152 for (int i = 0; i < NewNum.size(); i++)
153 {
154 int num = FairOffset[i + 1] - FairOffset[i];
155 REQUIRE(NewNum[i] == num);
156 }
157 }
158
159 #ifdef PROPERTY_TESTING
160 // Eventually will create some way to build and run property-based tests, which
161 // are tests that use random inputs and verify that certain properties hold true.
162 //
163 // In this case, the test constructs random number of processors with a random
164 // number of walkers on each node. The test checks two properties - that the
165 // plus and minus lists have the same size, and that each node ultimately has
166 // the number of walkers chosen by FairDivideLow.
167
168 TEST_CASE("Walker control assign walkers many", "[drivers][walker_control][property]")
169 {
170 // Use random device for seed for coverage
171 //std::random_device rd;
172 //std::mt19937 mt(rd());
173 // Use fixed seed for reproducibility
174 std::mt19937 mt(100);
175 std::uniform_int_distribution<int> NumNodes(1, 1000);
176 std::uniform_int_distribution<int> TotalPop(0, 1000);
177
178 for (int nt = 0; nt < 1000; nt++)
179 {
180 int NumContexts = NumNodes(mt);
181 int Cur_pop = TotalPop(mt);
182 std::uniform_int_distribution<int> WalkerPop(0, 2 * Cur_pop / NumContexts);
183 std::vector<int> NumPerRank(NumContexts);
184 int current_pop = Cur_pop;
185 for (int i = 0; i < NumContexts; i++)
186 {
187 int p = WalkerPop(mt);
188 p = std::min(current_pop, p);
189 current_pop -= p;
190 // Make sure all walkers are accounted for on the last node
191 if (i == NumContexts - 1 && current_pop > 0)
192 {
193 p += current_pop;
194 }
195 NumPerRank[i] = p;
196 }
197 //std::cout << "NumNodes = " << NumContexts << std::endl;
198 //std::cout << "TotalPop = " << Cur_pop << std::endl;
199 //output_vector("Start: ",NumPerRank);
200
201 std::vector<int> NewNum = NumPerRank;
202 for (int me = 0; me < NumContexts; me++)
203 {
204 std::vector<int> minus;
205 std::vector<int> plus;
206
207 determineNewWalkerPopulation(Cur_pop, NumContexts, me, NumPerRank, minus, plus);
208 REQUIRE(minus.size() == plus.size());
209
210 for (int i = 0; i < plus.size(); i++)
211 {
212 if (me == plus[i])
213 NewNum[plus[i]]--;
214 }
215 for (int i = 0; i < minus.size(); i++)
216 {
217 if (me == minus[i])
218 NewNum[minus[i]]++;
219 }
220 }
221
222 std::vector<int> FairOffset;
223 FairDivideLow(Cur_pop, NumContexts, FairOffset);
224 for (int i = 0; i < NewNum.size(); i++)
225 {
226 int num = FairOffset[i + 1] - FairOffset[i];
227 REQUIRE(NewNum[i] == num);
228 }
229 }
230 }
231 #endif
232
233 #ifdef HAVE_MPI
234
235 struct WalkerControlMPITest
236 {
237 /** Currently only passes for 1,2, or 3 ranks
238 */
operator ()qmcplusplus::WalkerControlMPITest239 void operator()(bool use_nonblocking)
240 {
241 Communicate* c = OHMMS::Controller;
242 WalkerControlMPI wc(c);
243
244 wc.use_nonblocking = use_nonblocking;
245
246 MCWalkerConfiguration W; // Unused in the function
247 typedef MCWalkerConfiguration::Walker_t Walker_t;
248
249 //UPtrVector<Walker_t> walkers;
250 // Set up Cur_pop
251 // Set up good_w and bad_w
252
253 wc.Cur_pop = c->size();
254 for (int i = 0; i < c->size(); i++)
255 {
256 wc.NumPerRank[i] = 1;
257 }
258 // One walker on every node, should be no swapping
259 //walkers.push_back(std::make_unique<Walker_t>());
260 wc.good_w.push_back(new Walker_t());
261 wc.good_w[0]->ID = c->rank();
262 wc.ncopy_w.push_back(0);
263
264 wc.swapWalkersSimple(W);
265
266 REQUIRE(wc.good_w.size() == 1);
267 REQUIRE(wc.bad_w.size() == 0);
268
269
270 // 3 walkers on rank 0, 1 walker on others - should redistribute if
271 // there is more than one rank
272 if (c->size() > 1)
273 {
274 if (c->rank() == 0)
275 {
276 wc.good_w.push_back(new Walker_t());
277 wc.good_w.push_back(new Walker_t());
278
279 // Use the ID variable to check that the walker content was transmitted
280 wc.good_w[1]->ID = c->size();
281 wc.good_w[2]->ID = c->size() + 1;
282
283 wc.ncopy_w.push_back(0);
284 wc.ncopy_w.push_back(0);
285 }
286 wc.NumPerRank[0] = 3;
287 wc.Cur_pop += 2;
288
289 wc.swapWalkersSimple(W);
290
291 //std::cout << " Rank = " << c->rank() << " good size = " << wc.good_w.size() <<
292 // " ID = " << wc.good_w[0]->ID << std::endl;
293
294 if (c->rank() == c->size() - 2)
295 {
296 REQUIRE(wc.good_w.size() == 2);
297 // This check is a bit too restrictive - no guarantee the last walker was the
298 // one transmitted
299 bool okay1 = wc.good_w[1]->ID == c->size() || wc.good_w[1]->ID == c->size() + 1;
300 REQUIRE(okay1);
301 }
302 else if (c->rank() == c->size() - 1)
303 {
304 REQUIRE(wc.good_w.size() == 2);
305 bool okay2 = wc.good_w[1]->ID == c->size() || wc.good_w[1]->ID == c->size() + 1;
306 REQUIRE(okay2);
307 }
308 else
309 {
310 REQUIRE(wc.good_w.size() == 1);
311 REQUIRE(wc.good_w[0]->ID == c->rank());
312 }
313 wc.NumPerRank[0] = 1;
314 wc.NumPerRank[c->size() - 1] = 2;
315 wc.NumPerRank[c->size() - 2] = 2;
316 }
317
318
319 // And now the strange case
320 // 6 walkers on rank0, 2 on rank1, 2 on rank2
321 if (c->size() > 2)
322 {
323 if (c->rank() == 0)
324 {
325 wc.good_w.push_back(new Walker_t());
326 wc.good_w.push_back(new Walker_t());
327 // wc.good_w.push_back(new Walker_t());
328 // wc.good_w.push_back(new Walker_t());
329 int nwalkers_rank = wc.good_w.size();
330 wc.good_w[nwalkers_rank - 1]->ID = c->size() + 5;
331 wc.good_w[nwalkers_rank - 2]->ID = c->size() + 4;
332 // wc.good_w[nwalkers_rank - 3]->ID = c->size() + 3;
333 // wc.good_w[nwalkers_rank - 4]->ID = c->size() + 2;
334
335 wc.ncopy_w.push_back(2);
336 wc.ncopy_w.push_back(1);
337 // wc.ncopy_w.push_back(0);
338 // wc.ncopy_w.push_back(0);
339 }
340 else if (c->rank() == 1)
341 {
342 //wc.bad_w.push_back(wc.good_w[0]);
343 //wc.bad_w.push_back(wc.good_w[1]);
344 int nwalkers_rank = wc.good_w.size();
345 //wc.good_w.pop_back();
346 //wc.good_w.pop_back();
347 //wc.ncopy_w.pop_back();
348 //wc.ncopy_w.pop_back();
349 }
350 wc.NumPerRank[0] = 6;
351 wc.Cur_pop += 5;
352
353 reportWalkersPerRank(c, wc);
354
355 wc.swapWalkersSimple(W);
356
357 reportWalkersPerRank(c, wc);
358
359
360 // These are unique walkers
361 if (c->rank() == c->size() - 2)
362 {
363 CHECK(wc.good_w.size() == 3);
364 }
365 else if (c->rank() == c->size() - 1)
366 {
367 CHECK(wc.good_w.size() == 3);
368 }
369 else
370 {
371 CHECK(wc.good_w.size() == 2);
372 }
373
374 int walker_count = wc.copyWalkers(W);
375
376 reportWalkersPerRank(c, wc);
377
378 if (c->rank() == c->size() - 2)
379 {
380 CHECK(walker_count == 3);
381 }
382 else if (c->rank() == c->size() - 1)
383 {
384 CHECK(walker_count == 4);
385 }
386 else
387 {
388 CHECK(walker_count == 3);
389 }
390 }
391 }
392
393 private:
reportWalkersPerRankqmcplusplus::WalkerControlMPITest394 void reportWalkersPerRank(Communicate* c, WalkerControlMPI& wc)
395 {
396 std::vector<int> rank_walker_count(c->size(), 0);
397 rank_walker_count[c->rank()] = wc.good_w.size();
398 c->allreduce(rank_walker_count);
399 if (c->rank() == 0)
400 {
401 std::cout << "Walkers Per Rank (Total: " << wc.Cur_pop << ")\n";
402 for (int i = 0; i < rank_walker_count.size(); ++i)
403 {
404 std::cout << " " << i << " " << rank_walker_count[i] << "\n";
405 }
406 }
407 }
408 };
409
test_swap_walkers(bool use_nonblocking)410 void test_swap_walkers(bool use_nonblocking)
411 {
412 WalkerControlMPITest test;
413 test(use_nonblocking);
414 }
415
416 TEST_CASE("Walker control swap walkers blocking", "[drivers][walker_control]")
417 {
418 SECTION("blocking")
419 {
420 bool non_blocking = false;
421 test_swap_walkers(non_blocking);
422 }
423 }
424
425 TEST_CASE("Walker control swap walkers nonblocking", "[drivers][walker_control]")
426 {
427 SECTION("nonblocking")
428 {
429 bool non_blocking = true;
430 test_swap_walkers(non_blocking);
431 }
432 }
433
434 TEST_CASE("Walker control reconfiguration", "[drivers][walker_control]")
435 {
436 Communicate* c = OHMMS::Controller;
437 WalkerReconfigurationMPI wr(c);
438
439 wr.dN.resize(c->size());
440
441 wr.dN[c->rank()] = 0;
442 if (c->size() > 1)
443 {
444 wr.dN[0] = 1;
445 wr.dN[1] = -1;
446 }
447
448 MCWalkerConfiguration W;
449 W.createWalkers(1);
450 typedef MCWalkerConfiguration::Walker_t Walker_t;
451
452 if (c->rank() == 0)
453 {
454 W[0]->ID = 100;
455 }
456 else
457 {
458 W[0]->ID = 1;
459 }
460
461 std::vector<int> plus, minus;
462 if (c->size() > 1)
463 {
464 if (c->rank() == 0)
465 plus.push_back(0);
466 if (c->rank() == 1)
467 minus.push_back(0);
468 }
469
470 if (plus.size() > 0)
471 {
472 wr.sendWalkers(W, plus);
473 REQUIRE(W.WalkerList.size() == 1);
474 REQUIRE(W[0]->ID == 100);
475 }
476
477 if (minus.size() > 0)
478 {
479 wr.recvWalkers(W, minus);
480 REQUIRE(W.WalkerList.size() == 1);
481 // Use ID and ParentID to check the walker was transmitted
482 REQUIRE(W[0]->ID == 1 + c->size());
483 REQUIRE(W[0]->ParentID == 100);
484 }
485 }
486
487 #endif
488 } // namespace qmcplusplus
489