1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2017 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11 
12 
13 /** @file RunTimeManager.cpp
14  *  @brief Class for determining elapsed run time enabling simulations to adjust to time limits.
15 
16  */
17 #include "RunTimeManager.h"
18 #include <sstream>
19 #include <fstream>
20 #include <cstdio>
21 
22 namespace qmcplusplus
23 {
24 RunTimeManager<CPUClock> run_time_manager;
25 
26 template class RunTimeManager<CPUClock>;
27 template class RunTimeManager<FakeCPUClock>;
28 
29 template<class CLOCK>
LoopTimer()30 LoopTimer<CLOCK>::LoopTimer() : nloop(0), ticking(false), start_time(0.0), total_time(0.0)
31 {}
32 
33 template<class CLOCK>
start()34 void LoopTimer<CLOCK>::start()
35 {
36   if (ticking)
37     throw std::runtime_error("LoopTimer started already!");
38   start_time = CLOCK()();
39   ticking    = true;
40 }
41 
42 template<class CLOCK>
stop()43 void LoopTimer<CLOCK>::stop()
44 {
45   if (!ticking)
46     throw std::runtime_error("LoopTimer didn't start but called stop!");
47   nloop++;
48   total_time += CLOCK()() - start_time;
49   ticking = false;
50 }
51 
52 template<class CLOCK>
get_time_per_iteration() const53 double LoopTimer<CLOCK>::get_time_per_iteration() const
54 {
55   if (nloop > 0)
56     return total_time / nloop;
57   return 0.0;
58 }
59 
60 template class LoopTimer<CPUClock>;
61 template class LoopTimer<FakeCPUClock>;
62 
63 template<class CLOCK>
RunTimeControl(RunTimeManager<CLOCK> & rm,int maxCPUSecs,const std::string & stop_file_prefix,bool cleanup)64 RunTimeControl<CLOCK>::RunTimeControl(RunTimeManager<CLOCK>& rm,
65                                       int maxCPUSecs,
66                                       const std::string& stop_file_prefix,
67                                       bool cleanup)
68     : MaxCPUSecs(maxCPUSecs),
69       runtimeManager(rm),
70       stop_filename_(stop_file_prefix + ".STOP"),
71       stop_status_(StopStatus::CONTINUE)
72 {
73   if (stop_file_prefix.empty())
74     throw std::runtime_error("Stop control filename prefix must not be empty!");
75 
76   if (cleanup)
77   {
78     std::remove(stop_filename_.c_str());
79     if (std::ifstream(stop_filename_.c_str()))
80       throw std::runtime_error("Failed to delete the existing stop control file \"" + stop_filename_ +
81                                "\", cannot continue!");
82   }
83 
84   m_runtime_safety_padding = 30.0; // generous 30 seconds to allow for shut down?
85   m_loop_margin            = 1.1;  // 10% margin on average loop time?
86 }
87 
88 template<class CLOCK>
enough_time_for_next_iteration(LoopTimer<CLOCK> & loop_timer)89 bool RunTimeControl<CLOCK>::enough_time_for_next_iteration(LoopTimer<CLOCK>& loop_timer)
90 {
91   m_loop_time = loop_timer.get_time_per_iteration();
92   m_elapsed   = runtimeManager.elapsed();
93 
94   if (m_elapsed >= MaxCPUSecs)
95   {
96     stop_status_ = StopStatus::MAX_SECONDS_PASSED;
97     return false;
98   }
99 
100   m_remaining      = MaxCPUSecs - m_elapsed;
101   bool enough_time = true;
102   if ((m_loop_margin * m_loop_time + m_runtime_safety_padding) > m_remaining)
103     enough_time = false;
104 
105   stop_status_ = StopStatus::NOT_ENOUGH_TIME;
106   return enough_time;
107 }
108 
109 template<class CLOCK>
stop_file_requested()110 bool RunTimeControl<CLOCK>::stop_file_requested()
111 {
112   if (std::ifstream(stop_filename_.c_str()))
113   {
114     stop_status_ = StopStatus::STOP_FILE;
115     return true;
116   }
117   else
118     return false;
119 }
120 
121 template<class CLOCK>
checkStop(LoopTimer<CLOCK> & loop_timer)122 bool RunTimeControl<CLOCK>::checkStop(LoopTimer<CLOCK>& loop_timer)
123 {
124   bool need_to_stop = false;
125   need_to_stop |= !enough_time_for_next_iteration(loop_timer);
126   need_to_stop |= stop_file_requested();
127   return need_to_stop;
128 }
129 
130 template<class CLOCK>
generateStopMessage(const std::string & driverName,int block) const131 std::string RunTimeControl<CLOCK>::generateStopMessage(const std::string& driverName, int block) const
132 {
133   std::stringstream log;
134   log << "RunTimeControl takes action in " << driverName << " driver." << std::endl;
135   if (stop_status_ == StopStatus::MAX_SECONDS_PASSED)
136     log << "Time limit reached. Stopping after block " << block << std::endl
137         << "Hard limit (seconds) " << MaxCPUSecs << ", elapsed (seconds) " << m_elapsed << std::endl;
138   else if (stop_status_ == StopStatus::NOT_ENOUGH_TIME)
139   {
140     log << "Insufficient time for next block. Stopping after block " << block << std::endl;
141     log << "  Iteration time per " << driverName << " block (seconds) = " << m_loop_time << std::endl;
142     log << "  Elapsed   time (seconds) = " << m_elapsed << std::endl;
143     log << "  Remaining time (seconds) = " << m_remaining << std::endl;
144   }
145   else if (stop_status_ == StopStatus::STOP_FILE)
146     log << "Stop requested from the control file \"" + stop_filename_ + "\", stopping after block " << block
147         << std::endl;
148   else
149     throw std::runtime_error("Unidentified stop status!");
150 
151   return log.str();
152 }
153 
154 template class RunTimeControl<CPUClock>;
155 template class RunTimeControl<FakeCPUClock>;
156 
157 } // namespace qmcplusplus
158