1 //////////////////////////////////////////////////////////////////////////////////////
2 // This file is distributed under the University of Illinois/NCSA Open Source License.
3 // See LICENSE file in top directory for details.
4 //
5 // Copyright (c) 2017 Jeongnim Kim and QMCPACK developers.
6 //
7 // File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
8 //
9 // File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory
10 //////////////////////////////////////////////////////////////////////////////////////
11
12
13 /** @file RunTimeManager.cpp
14 * @brief Class for determining elapsed run time enabling simulations to adjust to time limits.
15
16 */
17 #include "RunTimeManager.h"
18 #include <sstream>
19 #include <fstream>
20 #include <cstdio>
21
22 namespace qmcplusplus
23 {
24 RunTimeManager<CPUClock> run_time_manager;
25
26 template class RunTimeManager<CPUClock>;
27 template class RunTimeManager<FakeCPUClock>;
28
29 template<class CLOCK>
LoopTimer()30 LoopTimer<CLOCK>::LoopTimer() : nloop(0), ticking(false), start_time(0.0), total_time(0.0)
31 {}
32
33 template<class CLOCK>
start()34 void LoopTimer<CLOCK>::start()
35 {
36 if (ticking)
37 throw std::runtime_error("LoopTimer started already!");
38 start_time = CLOCK()();
39 ticking = true;
40 }
41
42 template<class CLOCK>
stop()43 void LoopTimer<CLOCK>::stop()
44 {
45 if (!ticking)
46 throw std::runtime_error("LoopTimer didn't start but called stop!");
47 nloop++;
48 total_time += CLOCK()() - start_time;
49 ticking = false;
50 }
51
52 template<class CLOCK>
get_time_per_iteration() const53 double LoopTimer<CLOCK>::get_time_per_iteration() const
54 {
55 if (nloop > 0)
56 return total_time / nloop;
57 return 0.0;
58 }
59
60 template class LoopTimer<CPUClock>;
61 template class LoopTimer<FakeCPUClock>;
62
63 template<class CLOCK>
RunTimeControl(RunTimeManager<CLOCK> & rm,int maxCPUSecs,const std::string & stop_file_prefix,bool cleanup)64 RunTimeControl<CLOCK>::RunTimeControl(RunTimeManager<CLOCK>& rm,
65 int maxCPUSecs,
66 const std::string& stop_file_prefix,
67 bool cleanup)
68 : MaxCPUSecs(maxCPUSecs),
69 runtimeManager(rm),
70 stop_filename_(stop_file_prefix + ".STOP"),
71 stop_status_(StopStatus::CONTINUE)
72 {
73 if (stop_file_prefix.empty())
74 throw std::runtime_error("Stop control filename prefix must not be empty!");
75
76 if (cleanup)
77 {
78 std::remove(stop_filename_.c_str());
79 if (std::ifstream(stop_filename_.c_str()))
80 throw std::runtime_error("Failed to delete the existing stop control file \"" + stop_filename_ +
81 "\", cannot continue!");
82 }
83
84 m_runtime_safety_padding = 30.0; // generous 30 seconds to allow for shut down?
85 m_loop_margin = 1.1; // 10% margin on average loop time?
86 }
87
88 template<class CLOCK>
enough_time_for_next_iteration(LoopTimer<CLOCK> & loop_timer)89 bool RunTimeControl<CLOCK>::enough_time_for_next_iteration(LoopTimer<CLOCK>& loop_timer)
90 {
91 m_loop_time = loop_timer.get_time_per_iteration();
92 m_elapsed = runtimeManager.elapsed();
93
94 if (m_elapsed >= MaxCPUSecs)
95 {
96 stop_status_ = StopStatus::MAX_SECONDS_PASSED;
97 return false;
98 }
99
100 m_remaining = MaxCPUSecs - m_elapsed;
101 bool enough_time = true;
102 if ((m_loop_margin * m_loop_time + m_runtime_safety_padding) > m_remaining)
103 enough_time = false;
104
105 stop_status_ = StopStatus::NOT_ENOUGH_TIME;
106 return enough_time;
107 }
108
109 template<class CLOCK>
stop_file_requested()110 bool RunTimeControl<CLOCK>::stop_file_requested()
111 {
112 if (std::ifstream(stop_filename_.c_str()))
113 {
114 stop_status_ = StopStatus::STOP_FILE;
115 return true;
116 }
117 else
118 return false;
119 }
120
121 template<class CLOCK>
checkStop(LoopTimer<CLOCK> & loop_timer)122 bool RunTimeControl<CLOCK>::checkStop(LoopTimer<CLOCK>& loop_timer)
123 {
124 bool need_to_stop = false;
125 need_to_stop |= !enough_time_for_next_iteration(loop_timer);
126 need_to_stop |= stop_file_requested();
127 return need_to_stop;
128 }
129
130 template<class CLOCK>
generateStopMessage(const std::string & driverName,int block) const131 std::string RunTimeControl<CLOCK>::generateStopMessage(const std::string& driverName, int block) const
132 {
133 std::stringstream log;
134 log << "RunTimeControl takes action in " << driverName << " driver." << std::endl;
135 if (stop_status_ == StopStatus::MAX_SECONDS_PASSED)
136 log << "Time limit reached. Stopping after block " << block << std::endl
137 << "Hard limit (seconds) " << MaxCPUSecs << ", elapsed (seconds) " << m_elapsed << std::endl;
138 else if (stop_status_ == StopStatus::NOT_ENOUGH_TIME)
139 {
140 log << "Insufficient time for next block. Stopping after block " << block << std::endl;
141 log << " Iteration time per " << driverName << " block (seconds) = " << m_loop_time << std::endl;
142 log << " Elapsed time (seconds) = " << m_elapsed << std::endl;
143 log << " Remaining time (seconds) = " << m_remaining << std::endl;
144 }
145 else if (stop_status_ == StopStatus::STOP_FILE)
146 log << "Stop requested from the control file \"" + stop_filename_ + "\", stopping after block " << block
147 << std::endl;
148 else
149 throw std::runtime_error("Unidentified stop status!");
150
151 return log.str();
152 }
153
154 template class RunTimeControl<CPUClock>;
155 template class RunTimeControl<FakeCPUClock>;
156
157 } // namespace qmcplusplus
158