1 #ifndef CONNECT_SERVICES___GRID_GLOBALS__HPP
2 #define CONNECT_SERVICES___GRID_GLOBALS__HPP
3
4 /* $Id: grid_globals.hpp 574016 2018-11-05 16:55:15Z sadyrovr $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Authors: Maxim Didenko, Dmitry Kazimirov
30 *
31 */
32
33 #include <connect/services/netschedule_api.hpp>
34 #include <connect/services/grid_worker.hpp>
35
36 #include <corelib/ncbimisc.hpp>
37
38
39 BEGIN_NCBI_SCOPE
40
41
42 /// Grid worker global varialbles
43 ///
44 /// @sa CNetScheduleAPI
45 ///
46
47
48 /// @internal
49 class CGridGlobals;
50 class NCBI_XCONNECT_EXPORT CWNJobWatcher : public IWorkerNodeJobWatcher
51 {
52 public:
53 CWNJobWatcher();
54 virtual ~CWNJobWatcher();
55
56 virtual void Notify(const CWorkerNodeJobContext& job, EEvent event);
57
58 void Print(CNcbiOstream& os) const;
GetJobsRunningNumber() const59 unsigned GetJobsRunningNumber() const
60 { return (unsigned) m_ActiveJobs.size(); }
61
SetMaxJobsAllowed(unsigned int max_jobs_allowed)62 void SetMaxJobsAllowed(unsigned int max_jobs_allowed)
63 { m_MaxJobsAllowed = max_jobs_allowed; }
SetMaxFailuresAllowed(unsigned int max_failures_allowed)64 void SetMaxFailuresAllowed(unsigned int max_failures_allowed)
65 { m_MaxFailuresAllowed = max_failures_allowed; }
SetInfiniteLoopTime(unsigned int infinite_loop_time)66 void SetInfiniteLoopTime(unsigned int infinite_loop_time)
67 { m_InfiniteLoopTime = infinite_loop_time; }
68
69
70 void CheckForInfiniteLoop();
71
72 private:
73 unsigned int m_JobsStarted;
74 unsigned int m_JobsSucceeded;
75 unsigned int m_JobsFailed;
76 unsigned int m_JobsReturned;
77 unsigned int m_JobsRescheduled;
78 unsigned int m_JobsCanceled;
79 unsigned int m_JobsLost;
80 unsigned int m_MaxJobsAllowed;
81 unsigned int m_MaxFailuresAllowed;
82 unsigned int m_InfiniteLoopTime;
83 struct SJobActivity {
84 CStopWatch elasped_time;
85 bool is_stuck;
SJobActivityCWNJobWatcher::SJobActivity86 SJobActivity(CStopWatch et, bool is) : elasped_time(et), is_stuck(is) {}
SJobActivityCWNJobWatcher::SJobActivity87 SJobActivity() : elasped_time(CStopWatch(CStopWatch::eStart)), is_stuck(false) {}
88 };
89
90 typedef map<CWorkerNodeJobContext*, SJobActivity> TActiveJobs;
91 TActiveJobs m_ActiveJobs;
92 mutable CMutex m_ActiveJobsMutex;
93
94 friend class CGridGlobals;
95 void x_KillNode(CGridWorkerNode);
96
97 private:
98 CWNJobWatcher(const CWNJobWatcher&);
99 CWNJobWatcher& operator=(const CWNJobWatcher&);
100 };
101
102
103 /// @internal
104 class NCBI_XCONNECT_EXPORT CGridGlobals
105 {
106 public:
107 CGridGlobals();
108 ~CGridGlobals();
109
110 static CGridGlobals& GetInstance();
111
112 unsigned int GetNewJobNumber();
113
ReuseJobObject() const114 bool ReuseJobObject() const {return m_ReuseJobObject;}
SetReuseJobObject(bool value)115 void SetReuseJobObject(bool value) {m_ReuseJobObject = value;}
SetWorker(SGridWorkerNodeImpl * worker)116 void SetWorker(SGridWorkerNodeImpl* worker) {m_Worker = worker;}
SetUDPPort(unsigned short udp_port)117 void SetUDPPort(unsigned short udp_port) {m_UDPPort = udp_port;}
118
119 /// Request node shutdown
RequestShutdown(CNetScheduleAdmin::EShutdownLevel level)120 void RequestShutdown(CNetScheduleAdmin::EShutdownLevel level)
121 {
122 m_ShutdownLevel = level;
123 InterruptUDPPortListening();
124 }
RequestShutdown(CNetScheduleAdmin::EShutdownLevel level,int exit_code)125 void RequestShutdown(CNetScheduleAdmin::EShutdownLevel level,
126 int exit_code)
127 {
128 m_ShutdownLevel = level;
129 m_ExitCode = exit_code;
130 InterruptUDPPortListening();
131 }
132 bool IsShuttingDown();
133
134 /// Check if shutdown was requested.
135 ///
GetShutdownLevel(void)136 CNetScheduleAdmin::EShutdownLevel GetShutdownLevel(void)
137 { return m_ShutdownLevel; }
138
SetExitCode(int exit_code)139 void SetExitCode(int exit_code) { m_ExitCode = exit_code; }
GetExitCode() const140 int GetExitCode() const { return m_ExitCode; }
141
142 CWNJobWatcher& GetJobWatcher();
143
GetStartTime() const144 const CTime& GetStartTime() const { return m_StartTime; }
145
146 void KillNode();
147
148 void InterruptUDPPortListening();
149
150 private:
151 CAtomicCounter_WithAutoInit m_JobsStarted;
152 bool m_ReuseJobObject;
153
154 volatile CNetScheduleAdmin::EShutdownLevel m_ShutdownLevel;
155 volatile int m_ExitCode;
156 unique_ptr<CWNJobWatcher> m_JobWatcher;
157 const CTime m_StartTime;
158 SGridWorkerNodeImpl* m_Worker;
159 unsigned short m_UDPPort;
160
161 CGridGlobals(const CGridGlobals&);
162 CGridGlobals& operator=(const CGridGlobals&);
163 };
164
IsShuttingDown()165 inline bool CGridGlobals::IsShuttingDown()
166 {
167 return m_ShutdownLevel != CNetScheduleAdmin::eNoShutdown;
168 }
169
170 END_NCBI_SCOPE
171
172 #endif // CONNECT_SERVICES___GRID_GLOBALS__HPP
173