1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2020 QMCPACK developers.
6 //
7 // File developed by: Peter Doak, doakpw@ornl.gov, Oak Ridge National Laboratory
8 //                    Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
9 //
10 // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
11 //////////////////////////////////////////////////////////////////////////////////////
12 
13 #include "catch.hpp"
14 #include "Utilities/RandomGenerator.h"
15 #include "OhmmsData/Libxml2Doc.h"
16 #include "OhmmsPETE/OhmmsMatrix.h"
17 #include "Particle/ParticleSet.h"
18 #include "Particle/MCWalkerConfiguration.h"
19 #include "QMCDrivers/WalkerControlBase.h"
20 #ifdef HAVE_MPI
21 #include "QMCDrivers/DMC/WalkerControlMPI.h"
22 #include "QMCDrivers/DMC/WalkerReconfigurationMPI.h"
23 #endif
24 
25 #include <stdio.h>
26 #include <string>
27 #include <random>
28 
29 using std::string;
30 
31 namespace qmcplusplus
32 {
output_vector(const std::string & name,std::vector<int> & vec)33 void output_vector(const std::string& name, std::vector<int>& vec)
34 {
35   std::cout << name;
36   for (int i = 0; i < vec.size(); i++)
37   {
38     std::cout << vec[i] << " ";
39   }
40   std::cout << std::endl;
41 }
42 
43 // uncomment the std::cout and output_vector lines to see the walker assignments
44 TEST_CASE("Walker control assign walkers", "[drivers][walker_control]")
45 {
46   int Cur_pop                 = 8;
47   int NumContexts             = 4;
48   std::vector<int> FairOffset(NumContexts + 1);
49 
50   // The in loop copy is necessary to support the assert at the end of the swaps.
51   // This was important for debugging but will go in a future PR as part of cleaning
52   // update determineNewWalkerPopulation
53   std::vector<int> NumPerRank = {4, 4, 0, 0};
54   std::vector<int> NewNum = NumPerRank;
55   for (int me = 0; me < NumContexts; me++)
56   {
57     std::vector<int> minus;
58     std::vector<int> plus;
59     std::vector<int> num_per_rank = NumPerRank;
60 
61     //std::cout << "For processor number " << me << std::endl;
62     WalkerControlMPI::determineNewWalkerPopulation(Cur_pop, NumContexts, me, num_per_rank, FairOffset, minus, plus);
63 
64     REQUIRE(minus.size() == plus.size());
65     //output_vector("  Minus: ", minus);
66 
67     //output_vector("  Plus: ", plus);
68 
69     for (int i = 0; i < plus.size(); i++)
70     {
71       if (me == plus[i])
72         NewNum[plus[i]]--;
73     }
74     for (int i = 0; i < minus.size(); i++)
75     {
76       if (me == minus[i])
77         NewNum[minus[i]]++;
78     }
79   }
80   //output_vector("New num per node: ", NewNum);
81 
82   for (int i = 0; i < NewNum.size(); i++)
83   {
84     int num = FairOffset[i + 1] - FairOffset[i];
85     REQUIRE(NewNum[i] == num);
86   }
87 }
88 
89 TEST_CASE("WalkerControl round trip index conversions", "[drivers][walker_control]")
90 {
91   std::vector<int> walker_counts; //= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23, 37};
92   for (int i = 0; i < 1000; ++i)
93     walker_counts.push_back(i);
94   for (int i = 0; i < 1000000; i += 1000)
95     walker_counts.push_back(i);
96   for (int i = 0; i < 100000000; i += 100000)
97     walker_counts.push_back(i);
98 
99   std::vector<QMCTraits::FullPrecRealType> fp_counts(walker_counts.size());
100   std::vector<int> walker_count_results(walker_counts.size());
101   for (int iw = 0; iw < walker_counts.size(); ++iw)
102   {
103     fp_counts[iw] = walker_counts[iw];
104   }
105   for (int iw = 0; iw < fp_counts.size(); ++iw)
106   {
107     walker_count_results[iw] = static_cast<int>(fp_counts[iw]);
108   }
109   bool all_pass = true;
110   for (int iw = 0; iw < walker_counts.size(); ++iw)
111   {
112     all_pass &= (walker_counts[iw] == walker_count_results[iw]);
113   }
114   REQUIRE(all_pass);
115 }
116 
117 // uncomment the std::cout and output_vector lines to see the walker assignments
118 TEST_CASE("Walker control assign walkers odd ranks", "[drivers][walker_control]")
119 {
120   int Cur_pop                 = 9;
121   int NumContexts             = 3;
122   std::vector<int> NumPerRank = {5, 2, 2};
123   std::vector<int> FairOffset(NumContexts + 1);
124 
125   std::vector<int> NewNum = NumPerRank;
126   for (int me = 0; me < NumContexts; me++)
127   {
128     std::vector<int> minus;
129     std::vector<int> plus;
130     std::vector<int> num_per_rank = NumPerRank;
131     //std::cout << "For processor number " << me << std::endl;
132     WalkerControlMPI::determineNewWalkerPopulation(Cur_pop, NumContexts, me, num_per_rank, FairOffset, minus, plus);
133 
134     REQUIRE(minus.size() == plus.size());
135     //output_vector("  Minus: ", minus);
136 
137     //output_vector("  Plus: ", plus);
138 
139     for (int i = 0; i < plus.size(); i++)
140     {
141       if (me == plus[i])
142         NewNum[plus[i]]--;
143     }
144     for (int i = 0; i < minus.size(); i++)
145     {
146       if (me == minus[i])
147         NewNum[minus[i]]++;
148     }
149   }
150   //output_vector("New num per node: ", NewNum);
151 
152   for (int i = 0; i < NewNum.size(); i++)
153   {
154     int num = FairOffset[i + 1] - FairOffset[i];
155     REQUIRE(NewNum[i] == num);
156   }
157 }
158 
159 #ifdef PROPERTY_TESTING
160 // Eventually will create some way to build and run property-based tests, which
161 // are tests that use random inputs and verify that certain properties hold true.
162 //
163 // In this case, the test constructs random number of processors with a random
164 // number of walkers on each node. The test checks two properties - that the
165 // plus and minus lists have the same size, and that each node ultimately has
166 // the number of walkers chosen by FairDivideLow.
167 
168 TEST_CASE("Walker control assign walkers many", "[drivers][walker_control][property]")
169 {
170   // Use random device for seed for coverage
171   //std::random_device rd;
172   //std::mt19937 mt(rd());
173   // Use fixed seed for reproducibility
174   std::mt19937 mt(100);
175   std::uniform_int_distribution<int> NumNodes(1, 1000);
176   std::uniform_int_distribution<int> TotalPop(0, 1000);
177 
178   for (int nt = 0; nt < 1000; nt++)
179   {
180     int NumContexts = NumNodes(mt);
181     int Cur_pop     = TotalPop(mt);
182     std::uniform_int_distribution<int> WalkerPop(0, 2 * Cur_pop / NumContexts);
183     std::vector<int> NumPerRank(NumContexts);
184     int current_pop = Cur_pop;
185     for (int i = 0; i < NumContexts; i++)
186     {
187       int p = WalkerPop(mt);
188       p     = std::min(current_pop, p);
189       current_pop -= p;
190       // Make sure all walkers are accounted for on the last node
191       if (i == NumContexts - 1 && current_pop > 0)
192       {
193         p += current_pop;
194       }
195       NumPerRank[i] = p;
196     }
197     //std::cout << "NumNodes = " << NumContexts << std::endl;
198     //std::cout << "TotalPop = " << Cur_pop << std::endl;
199     //output_vector("Start: ",NumPerRank);
200 
201     std::vector<int> NewNum = NumPerRank;
202     for (int me = 0; me < NumContexts; me++)
203     {
204       std::vector<int> minus;
205       std::vector<int> plus;
206 
207       determineNewWalkerPopulation(Cur_pop, NumContexts, me, NumPerRank, minus, plus);
208       REQUIRE(minus.size() == plus.size());
209 
210       for (int i = 0; i < plus.size(); i++)
211       {
212         if (me == plus[i])
213           NewNum[plus[i]]--;
214       }
215       for (int i = 0; i < minus.size(); i++)
216       {
217         if (me == minus[i])
218           NewNum[minus[i]]++;
219       }
220     }
221 
222     std::vector<int> FairOffset;
223     FairDivideLow(Cur_pop, NumContexts, FairOffset);
224     for (int i = 0; i < NewNum.size(); i++)
225     {
226       int num = FairOffset[i + 1] - FairOffset[i];
227       REQUIRE(NewNum[i] == num);
228     }
229   }
230 }
231 #endif
232 
233 #ifdef HAVE_MPI
234 
235 struct WalkerControlMPITest
236 {
237   /** Currently only passes for 1,2, or 3 ranks
238    */
operator ()qmcplusplus::WalkerControlMPITest239   void operator()(bool use_nonblocking)
240   {
241     Communicate* c = OHMMS::Controller;
242     WalkerControlMPI wc(c);
243 
244     wc.use_nonblocking = use_nonblocking;
245 
246     MCWalkerConfiguration W; // Unused in the function
247     typedef MCWalkerConfiguration::Walker_t Walker_t;
248 
249     //UPtrVector<Walker_t> walkers;
250     // Set up Cur_pop
251     // Set up good_w and bad_w
252 
253     wc.Cur_pop = c->size();
254     for (int i = 0; i < c->size(); i++)
255     {
256       wc.NumPerRank[i] = 1;
257     }
258     // One walker on every node, should be no swapping
259     //walkers.push_back(std::make_unique<Walker_t>());
260     wc.good_w.push_back(new Walker_t());
261     wc.good_w[0]->ID = c->rank();
262     wc.ncopy_w.push_back(0);
263 
264     wc.swapWalkersSimple(W);
265 
266     REQUIRE(wc.good_w.size() == 1);
267     REQUIRE(wc.bad_w.size() == 0);
268 
269 
270     // 3 walkers on rank 0, 1 walker on others - should redistribute if
271     //   there is more than one rank
272     if (c->size() > 1)
273     {
274       if (c->rank() == 0)
275       {
276         wc.good_w.push_back(new Walker_t());
277         wc.good_w.push_back(new Walker_t());
278 
279         // Use the ID variable to check that the walker content was transmitted
280         wc.good_w[1]->ID = c->size();
281         wc.good_w[2]->ID = c->size() + 1;
282 
283         wc.ncopy_w.push_back(0);
284         wc.ncopy_w.push_back(0);
285       }
286       wc.NumPerRank[0] = 3;
287       wc.Cur_pop += 2;
288 
289       wc.swapWalkersSimple(W);
290 
291       //std::cout << " Rank = " << c->rank() << " good size = " << wc.good_w.size() <<
292       //          " ID = " << wc.good_w[0]->ID << std::endl;
293 
294       if (c->rank() == c->size() - 2)
295       {
296         REQUIRE(wc.good_w.size() == 2);
297         // This check is a bit too restrictive - no guarantee the last walker was the
298         //  one transmitted
299         bool okay1 = wc.good_w[1]->ID == c->size() || wc.good_w[1]->ID == c->size() + 1;
300         REQUIRE(okay1);
301       }
302       else if (c->rank() == c->size() - 1)
303       {
304         REQUIRE(wc.good_w.size() == 2);
305         bool okay2 = wc.good_w[1]->ID == c->size() || wc.good_w[1]->ID == c->size() + 1;
306         REQUIRE(okay2);
307       }
308       else
309       {
310         REQUIRE(wc.good_w.size() == 1);
311         REQUIRE(wc.good_w[0]->ID == c->rank());
312       }
313       wc.NumPerRank[0]             = 1;
314       wc.NumPerRank[c->size() - 1] = 2;
315       wc.NumPerRank[c->size() - 2] = 2;
316     }
317 
318 
319     // And now the strange case
320     // 6 walkers on rank0, 2 on rank1, 2 on rank2
321     if (c->size() > 2)
322     {
323       if (c->rank() == 0)
324       {
325         wc.good_w.push_back(new Walker_t());
326         wc.good_w.push_back(new Walker_t());
327         // wc.good_w.push_back(new Walker_t());
328         // wc.good_w.push_back(new Walker_t());
329         int nwalkers_rank                = wc.good_w.size();
330         wc.good_w[nwalkers_rank - 1]->ID = c->size() + 5;
331         wc.good_w[nwalkers_rank - 2]->ID = c->size() + 4;
332         // wc.good_w[nwalkers_rank - 3]->ID = c->size() + 3;
333         // wc.good_w[nwalkers_rank - 4]->ID = c->size() + 2;
334 
335         wc.ncopy_w.push_back(2);
336         wc.ncopy_w.push_back(1);
337         // wc.ncopy_w.push_back(0);
338         // wc.ncopy_w.push_back(0);
339       }
340       else if (c->rank() == 1)
341       {
342         //wc.bad_w.push_back(wc.good_w[0]);
343         //wc.bad_w.push_back(wc.good_w[1]);
344         int nwalkers_rank = wc.good_w.size();
345         //wc.good_w.pop_back();
346         //wc.good_w.pop_back();
347         //wc.ncopy_w.pop_back();
348         //wc.ncopy_w.pop_back();
349       }
350       wc.NumPerRank[0] = 6;
351       wc.Cur_pop += 5;
352 
353       reportWalkersPerRank(c, wc);
354 
355       wc.swapWalkersSimple(W);
356 
357       reportWalkersPerRank(c, wc);
358 
359 
360       // These are unique walkers
361       if (c->rank() == c->size() - 2)
362       {
363         CHECK(wc.good_w.size() == 3);
364       }
365       else if (c->rank() == c->size() - 1)
366       {
367         CHECK(wc.good_w.size() == 3);
368       }
369       else
370       {
371         CHECK(wc.good_w.size() == 2);
372       }
373 
374       int walker_count = wc.copyWalkers(W);
375 
376       reportWalkersPerRank(c, wc);
377 
378       if (c->rank() == c->size() - 2)
379       {
380         CHECK(walker_count == 3);
381       }
382       else if (c->rank() == c->size() - 1)
383       {
384         CHECK(walker_count == 4);
385       }
386       else
387       {
388         CHECK(walker_count == 3);
389       }
390     }
391   }
392 
393 private:
reportWalkersPerRankqmcplusplus::WalkerControlMPITest394   void reportWalkersPerRank(Communicate* c, WalkerControlMPI& wc)
395   {
396     std::vector<int> rank_walker_count(c->size(), 0);
397     rank_walker_count[c->rank()] = wc.good_w.size();
398     c->allreduce(rank_walker_count);
399     if (c->rank() == 0)
400     {
401       std::cout << "Walkers Per Rank (Total: " << wc.Cur_pop << ")\n";
402       for (int i = 0; i < rank_walker_count.size(); ++i)
403       {
404         std::cout << " " << i << "  " << rank_walker_count[i] << "\n";
405       }
406     }
407   }
408 };
409 
test_swap_walkers(bool use_nonblocking)410 void test_swap_walkers(bool use_nonblocking)
411 {
412   WalkerControlMPITest test;
413   test(use_nonblocking);
414 }
415 
416 TEST_CASE("Walker control swap walkers blocking", "[drivers][walker_control]")
417 {
418   SECTION("blocking")
419   {
420     bool non_blocking = false;
421     test_swap_walkers(non_blocking);
422   }
423 }
424 
425 TEST_CASE("Walker control swap walkers nonblocking", "[drivers][walker_control]")
426 {
427   SECTION("nonblocking")
428   {
429     bool non_blocking = true;
430     test_swap_walkers(non_blocking);
431   }
432 }
433 
434 TEST_CASE("Walker control reconfiguration", "[drivers][walker_control]")
435 {
436   Communicate* c = OHMMS::Controller;
437   WalkerReconfigurationMPI wr(c);
438 
439   wr.dN.resize(c->size());
440 
441   wr.dN[c->rank()] = 0;
442   if (c->size() > 1)
443   {
444     wr.dN[0] = 1;
445     wr.dN[1] = -1;
446   }
447 
448   MCWalkerConfiguration W;
449   W.createWalkers(1);
450   typedef MCWalkerConfiguration::Walker_t Walker_t;
451 
452   if (c->rank() == 0)
453   {
454     W[0]->ID = 100;
455   }
456   else
457   {
458     W[0]->ID = 1;
459   }
460 
461   std::vector<int> plus, minus;
462   if (c->size() > 1)
463   {
464     if (c->rank() == 0)
465       plus.push_back(0);
466     if (c->rank() == 1)
467       minus.push_back(0);
468   }
469 
470   if (plus.size() > 0)
471   {
472     wr.sendWalkers(W, plus);
473     REQUIRE(W.WalkerList.size() == 1);
474     REQUIRE(W[0]->ID == 100);
475   }
476 
477   if (minus.size() > 0)
478   {
479     wr.recvWalkers(W, minus);
480     REQUIRE(W.WalkerList.size() == 1);
481     // Use ID and ParentID to check the walker was transmitted
482     REQUIRE(W[0]->ID == 1 + c->size());
483     REQUIRE(W[0]->ParentID == 100);
484   }
485 }
486 
487 #endif
488 } // namespace qmcplusplus
489