1 /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
2 
3 #include "icinga/cib.hpp"
4 #include "icinga/host.hpp"
5 #include "icinga/service.hpp"
6 #include "icinga/clusterevents.hpp"
7 #include "base/application.hpp"
8 #include "base/objectlock.hpp"
9 #include "base/utility.hpp"
10 #include "base/perfdatavalue.hpp"
11 #include "base/configtype.hpp"
12 #include "base/statsfunction.hpp"
13 
14 using namespace icinga;
15 
16 RingBuffer CIB::m_ActiveHostChecksStatistics(15 * 60);
17 RingBuffer CIB::m_ActiveServiceChecksStatistics(15 * 60);
18 RingBuffer CIB::m_PassiveHostChecksStatistics(15 * 60);
19 RingBuffer CIB::m_PassiveServiceChecksStatistics(15 * 60);
20 
UpdateActiveHostChecksStatistics(long tv,int num)21 void CIB::UpdateActiveHostChecksStatistics(long tv, int num)
22 {
23 	m_ActiveHostChecksStatistics.InsertValue(tv, num);
24 }
25 
UpdateActiveServiceChecksStatistics(long tv,int num)26 void CIB::UpdateActiveServiceChecksStatistics(long tv, int num)
27 {
28 	m_ActiveServiceChecksStatistics.InsertValue(tv, num);
29 }
30 
GetActiveHostChecksStatistics(long timespan)31 int CIB::GetActiveHostChecksStatistics(long timespan)
32 {
33 	return m_ActiveHostChecksStatistics.UpdateAndGetValues(Utility::GetTime(), timespan);
34 }
35 
GetActiveServiceChecksStatistics(long timespan)36 int CIB::GetActiveServiceChecksStatistics(long timespan)
37 {
38 	return m_ActiveServiceChecksStatistics.UpdateAndGetValues(Utility::GetTime(), timespan);
39 }
40 
UpdatePassiveHostChecksStatistics(long tv,int num)41 void CIB::UpdatePassiveHostChecksStatistics(long tv, int num)
42 {
43 	m_PassiveServiceChecksStatistics.InsertValue(tv, num);
44 }
45 
UpdatePassiveServiceChecksStatistics(long tv,int num)46 void CIB::UpdatePassiveServiceChecksStatistics(long tv, int num)
47 {
48 	m_PassiveServiceChecksStatistics.InsertValue(tv, num);
49 }
50 
GetPassiveHostChecksStatistics(long timespan)51 int CIB::GetPassiveHostChecksStatistics(long timespan)
52 {
53 	return m_PassiveHostChecksStatistics.UpdateAndGetValues(Utility::GetTime(), timespan);
54 }
55 
GetPassiveServiceChecksStatistics(long timespan)56 int CIB::GetPassiveServiceChecksStatistics(long timespan)
57 {
58 	return m_PassiveServiceChecksStatistics.UpdateAndGetValues(Utility::GetTime(), timespan);
59 }
60 
CalculateHostCheckStats()61 CheckableCheckStatistics CIB::CalculateHostCheckStats()
62 {
63 	double min_latency = -1, max_latency = 0, sum_latency = 0;
64 	int count_latency = 0;
65 	double min_execution_time = -1, max_execution_time = 0, sum_execution_time = 0;
66 	int count_execution_time = 0;
67 	bool checkresult = false;
68 
69 	for (const Host::Ptr& host : ConfigType::GetObjectsByType<Host>()) {
70 		ObjectLock olock(host);
71 
72 		CheckResult::Ptr cr = host->GetLastCheckResult();
73 
74 		if (!cr)
75 			continue;
76 
77 		/* set to true, we have a checkresult */
78 		checkresult = true;
79 
80 		/* latency */
81 		double latency = cr->CalculateLatency();
82 
83 		if (min_latency == -1 || latency < min_latency)
84 			min_latency = latency;
85 
86 		if (latency > max_latency)
87 			max_latency = latency;
88 
89 		sum_latency += latency;
90 		count_latency++;
91 
92 		/* execution_time */
93 		double execution_time = cr->CalculateExecutionTime();
94 
95 		if (min_execution_time == -1 || execution_time < min_execution_time)
96 			min_execution_time = execution_time;
97 
98 		if (execution_time > max_execution_time)
99 			max_execution_time = execution_time;
100 
101 		sum_execution_time += execution_time;
102 		count_execution_time++;
103 	}
104 
105 	if (!checkresult) {
106 		min_latency = 0;
107 		min_execution_time = 0;
108 	}
109 
110 	CheckableCheckStatistics ccs;
111 
112 	ccs.min_latency = min_latency;
113 	ccs.max_latency = max_latency;
114 	ccs.avg_latency = sum_latency / count_latency;
115 	ccs.min_execution_time = min_execution_time;
116 	ccs.max_execution_time = max_execution_time;
117 	ccs.avg_execution_time = sum_execution_time / count_execution_time;
118 
119 	return ccs;
120 }
121 
CalculateServiceCheckStats()122 CheckableCheckStatistics CIB::CalculateServiceCheckStats()
123 {
124 	double min_latency = -1, max_latency = 0, sum_latency = 0;
125 	int count_latency = 0;
126 	double min_execution_time = -1, max_execution_time = 0, sum_execution_time = 0;
127 	int count_execution_time = 0;
128 	bool checkresult = false;
129 
130 	for (const Service::Ptr& service : ConfigType::GetObjectsByType<Service>()) {
131 		ObjectLock olock(service);
132 
133 		CheckResult::Ptr cr = service->GetLastCheckResult();
134 
135 		if (!cr)
136 			continue;
137 
138 		/* set to true, we have a checkresult */
139 		checkresult = true;
140 
141 		/* latency */
142 		double latency = cr->CalculateLatency();
143 
144 		if (min_latency == -1 || latency < min_latency)
145 			min_latency = latency;
146 
147 		if (latency > max_latency)
148 			max_latency = latency;
149 
150 		sum_latency += latency;
151 		count_latency++;
152 
153 		/* execution_time */
154 		double execution_time = cr->CalculateExecutionTime();
155 
156 		if (min_execution_time == -1 || execution_time < min_execution_time)
157 			min_execution_time = execution_time;
158 
159 		if (execution_time > max_execution_time)
160 			max_execution_time = execution_time;
161 
162 		sum_execution_time += execution_time;
163 		count_execution_time++;
164 	}
165 
166 	if (!checkresult) {
167 		min_latency = 0;
168 		min_execution_time = 0;
169 	}
170 
171 	CheckableCheckStatistics ccs;
172 
173 	ccs.min_latency = min_latency;
174 	ccs.max_latency = max_latency;
175 	ccs.avg_latency = sum_latency / count_latency;
176 	ccs.min_execution_time = min_execution_time;
177 	ccs.max_execution_time = max_execution_time;
178 	ccs.avg_execution_time = sum_execution_time / count_execution_time;
179 
180 	return ccs;
181 }
182 
CalculateServiceStats()183 ServiceStatistics CIB::CalculateServiceStats()
184 {
185 	ServiceStatistics ss = {};
186 
187 	for (const Service::Ptr& service : ConfigType::GetObjectsByType<Service>()) {
188 		ObjectLock olock(service);
189 
190 		if (service->GetState() == ServiceOK)
191 			ss.services_ok++;
192 		if (service->GetState() == ServiceWarning)
193 			ss.services_warning++;
194 		if (service->GetState() == ServiceCritical)
195 			ss.services_critical++;
196 		if (service->GetState() == ServiceUnknown)
197 			ss.services_unknown++;
198 
199 		CheckResult::Ptr cr = service->GetLastCheckResult();
200 
201 		if (!cr)
202 			ss.services_pending++;
203 
204 		if (!service->IsReachable())
205 			ss.services_unreachable++;
206 
207 		if (service->IsFlapping())
208 			ss.services_flapping++;
209 		if (service->IsInDowntime())
210 			ss.services_in_downtime++;
211 		if (service->IsAcknowledged())
212 			ss.services_acknowledged++;
213 
214 		if (service->GetHandled())
215 			ss.services_handled++;
216 		if (service->GetProblem())
217 			ss.services_problem++;
218 	}
219 
220 	return ss;
221 }
222 
CalculateHostStats()223 HostStatistics CIB::CalculateHostStats()
224 {
225 	HostStatistics hs = {};
226 
227 	for (const Host::Ptr& host : ConfigType::GetObjectsByType<Host>()) {
228 		ObjectLock olock(host);
229 
230 		if (host->IsReachable()) {
231 			if (host->GetState() == HostUp)
232 				hs.hosts_up++;
233 			if (host->GetState() == HostDown)
234 				hs.hosts_down++;
235 		} else
236 			hs.hosts_unreachable++;
237 
238 		if (!host->GetLastCheckResult())
239 			hs.hosts_pending++;
240 
241 		if (host->IsFlapping())
242 			hs.hosts_flapping++;
243 		if (host->IsInDowntime())
244 			hs.hosts_in_downtime++;
245 		if (host->IsAcknowledged())
246 			hs.hosts_acknowledged++;
247 
248 		if (host->GetHandled())
249 			hs.hosts_handled++;
250 		if (host->GetProblem())
251 			hs.hosts_problem++;
252 	}
253 
254 	return hs;
255 }
256 
257 /*
258  * 'perfdata' must be a flat dictionary with double values
259  * 'status' dictionary can contain multiple levels of dictionaries
260  */
GetFeatureStats()261 std::pair<Dictionary::Ptr, Array::Ptr> CIB::GetFeatureStats()
262 {
263 	Dictionary::Ptr status = new Dictionary();
264 	Array::Ptr perfdata = new Array();
265 
266 	Namespace::Ptr statsFunctions = ScriptGlobal::Get("StatsFunctions", &Empty);
267 
268 	if (statsFunctions) {
269 		ObjectLock olock(statsFunctions);
270 
271 		for (const Namespace::Pair& kv : statsFunctions)
272 			static_cast<Function::Ptr>(kv.second->Get())->Invoke({ status, perfdata });
273 	}
274 
275 	return std::make_pair(status, perfdata);
276 }
277 
278 REGISTER_STATSFUNCTION(CIB, &CIB::StatsFunc);
279 
StatsFunc(const Dictionary::Ptr & status,const Array::Ptr & perfdata)280 void CIB::StatsFunc(const Dictionary::Ptr& status, const Array::Ptr& perfdata) {
281 	double interval = Utility::GetTime() - Application::GetStartTime();
282 
283 	if (interval > 60)
284 		interval = 60;
285 
286 	status->Set("active_host_checks", GetActiveHostChecksStatistics(interval) / interval);
287 	status->Set("passive_host_checks", GetPassiveHostChecksStatistics(interval) / interval);
288 	status->Set("active_host_checks_1min", GetActiveHostChecksStatistics(60));
289 	status->Set("passive_host_checks_1min", GetPassiveHostChecksStatistics(60));
290 	status->Set("active_host_checks_5min", GetActiveHostChecksStatistics(60 * 5));
291 	status->Set("passive_host_checks_5min", GetPassiveHostChecksStatistics(60 * 5));
292 	status->Set("active_host_checks_15min", GetActiveHostChecksStatistics(60 * 15));
293 	status->Set("passive_host_checks_15min", GetPassiveHostChecksStatistics(60 * 15));
294 
295 	status->Set("active_service_checks", GetActiveServiceChecksStatistics(interval) / interval);
296 	status->Set("passive_service_checks", GetPassiveServiceChecksStatistics(interval) / interval);
297 	status->Set("active_service_checks_1min", GetActiveServiceChecksStatistics(60));
298 	status->Set("passive_service_checks_1min", GetPassiveServiceChecksStatistics(60));
299 	status->Set("active_service_checks_5min", GetActiveServiceChecksStatistics(60 * 5));
300 	status->Set("passive_service_checks_5min", GetPassiveServiceChecksStatistics(60 * 5));
301 	status->Set("active_service_checks_15min", GetActiveServiceChecksStatistics(60 * 15));
302 	status->Set("passive_service_checks_15min", GetPassiveServiceChecksStatistics(60 * 15));
303 
304 	// Checker related stats
305 	status->Set("remote_check_queue", ClusterEvents::GetCheckRequestQueueSize());
306 	status->Set("current_pending_callbacks", Application::GetTP().GetPending());
307 	status->Set("current_concurrent_checks", Checkable::CurrentConcurrentChecks.load());
308 
309 	CheckableCheckStatistics scs = CalculateServiceCheckStats();
310 
311 	status->Set("min_latency", scs.min_latency);
312 	status->Set("max_latency", scs.max_latency);
313 	status->Set("avg_latency", scs.avg_latency);
314 	status->Set("min_execution_time", scs.min_execution_time);
315 	status->Set("max_execution_time", scs.max_execution_time);
316 	status->Set("avg_execution_time", scs.avg_execution_time);
317 
318 	ServiceStatistics ss = CalculateServiceStats();
319 
320 	status->Set("num_services_ok", ss.services_ok);
321 	status->Set("num_services_warning", ss.services_warning);
322 	status->Set("num_services_critical", ss.services_critical);
323 	status->Set("num_services_unknown", ss.services_unknown);
324 	status->Set("num_services_pending", ss.services_pending);
325 	status->Set("num_services_unreachable", ss.services_unreachable);
326 	status->Set("num_services_flapping", ss.services_flapping);
327 	status->Set("num_services_in_downtime", ss.services_in_downtime);
328 	status->Set("num_services_acknowledged", ss.services_acknowledged);
329 	status->Set("num_services_handled", ss.services_handled);
330 	status->Set("num_services_problem", ss.services_problem);
331 
332 	double uptime = Application::GetUptime();
333 	status->Set("uptime", uptime);
334 
335 	HostStatistics hs = CalculateHostStats();
336 
337 	status->Set("num_hosts_up", hs.hosts_up);
338 	status->Set("num_hosts_down", hs.hosts_down);
339 	status->Set("num_hosts_pending", hs.hosts_pending);
340 	status->Set("num_hosts_unreachable", hs.hosts_unreachable);
341 	status->Set("num_hosts_flapping", hs.hosts_flapping);
342 	status->Set("num_hosts_in_downtime", hs.hosts_in_downtime);
343 	status->Set("num_hosts_acknowledged", hs.hosts_acknowledged);
344 	status->Set("num_hosts_handled", hs.hosts_handled);
345 	status->Set("num_hosts_problem", hs.hosts_problem);
346 }
347