1 /* Icinga 2 | (c) 2012 Icinga GmbH | GPLv2+ */
2 
3 #include "icinga/checkable.hpp"
4 #include "icinga/service.hpp"
5 #include "icinga/host.hpp"
6 #include "icinga/checkcommand.hpp"
7 #include "icinga/icingaapplication.hpp"
8 #include "icinga/cib.hpp"
9 #include "icinga/clusterevents.hpp"
10 #include "remote/messageorigin.hpp"
11 #include "remote/apilistener.hpp"
12 #include "base/objectlock.hpp"
13 #include "base/logger.hpp"
14 #include "base/convert.hpp"
15 #include "base/utility.hpp"
16 #include "base/context.hpp"
17 
18 using namespace icinga;
19 
20 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, const MessageOrigin::Ptr&)> Checkable::OnNewCheckResult;
21 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, StateType, const MessageOrigin::Ptr&)> Checkable::OnStateChange;
22 boost::signals2::signal<void (const Checkable::Ptr&, const CheckResult::Ptr&, std::set<Checkable::Ptr>, const MessageOrigin::Ptr&)> Checkable::OnReachabilityChanged;
23 boost::signals2::signal<void (const Checkable::Ptr&, NotificationType, const CheckResult::Ptr&, const String&, const String&, const MessageOrigin::Ptr&)> Checkable::OnNotificationsRequested;
24 boost::signals2::signal<void (const Checkable::Ptr&)> Checkable::OnNextCheckUpdated;
25 
26 Atomic<uint_fast64_t> Checkable::CurrentConcurrentChecks (0);
27 
28 std::mutex Checkable::m_StatsMutex;
29 int Checkable::m_PendingChecks = 0;
30 std::condition_variable Checkable::m_PendingChecksCV;
31 
GetCheckCommand() const32 CheckCommand::Ptr Checkable::GetCheckCommand() const
33 {
34 	return dynamic_pointer_cast<CheckCommand>(NavigateCheckCommandRaw());
35 }
36 
GetCheckPeriod() const37 TimePeriod::Ptr Checkable::GetCheckPeriod() const
38 {
39 	return TimePeriod::GetByName(GetCheckPeriodRaw());
40 }
41 
SetSchedulingOffset(long offset)42 void Checkable::SetSchedulingOffset(long offset)
43 {
44 	m_SchedulingOffset = offset;
45 }
46 
GetSchedulingOffset()47 long Checkable::GetSchedulingOffset()
48 {
49 	return m_SchedulingOffset;
50 }
51 
UpdateNextCheck(const MessageOrigin::Ptr & origin)52 void Checkable::UpdateNextCheck(const MessageOrigin::Ptr& origin)
53 {
54 	double interval;
55 
56 	if (GetStateType() == StateTypeSoft && GetLastCheckResult() != nullptr)
57 		interval = GetRetryInterval();
58 	else
59 		interval = GetCheckInterval();
60 
61 	double now = Utility::GetTime();
62 	double adj = 0;
63 
64 	if (interval > 1)
65 		adj = fmod(now * 100 + GetSchedulingOffset(), interval * 100) / 100.0;
66 
67 	if (adj != 0.0)
68 		adj = std::min(0.5 + fmod(GetSchedulingOffset(), interval * 5) / 100.0, adj);
69 
70 	double nextCheck = now - adj + interval;
71 	double lastCheck = GetLastCheck();
72 
73 	Log(LogDebug, "Checkable")
74 		<< "Update checkable '" << GetName() << "' with check interval '" << GetCheckInterval()
75 		<< "' from last check time at " << Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", (lastCheck < 0 ? 0 : lastCheck))
76 		<< " (" << GetLastCheck() << ") to next check time at " << Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", nextCheck) << " (" << nextCheck << ").";
77 
78 	SetNextCheck(nextCheck, false, origin);
79 }
80 
HasBeenChecked() const81 bool Checkable::HasBeenChecked() const
82 {
83 	return GetLastCheckResult() != nullptr;
84 }
85 
GetLastCheck() const86 double Checkable::GetLastCheck() const
87 {
88 	CheckResult::Ptr cr = GetLastCheckResult();
89 	double schedule_end = -1;
90 
91 	if (cr)
92 		schedule_end = cr->GetScheduleEnd();
93 
94 	return schedule_end;
95 }
96 
ProcessCheckResult(const CheckResult::Ptr & cr,const MessageOrigin::Ptr & origin)97 void Checkable::ProcessCheckResult(const CheckResult::Ptr& cr, const MessageOrigin::Ptr& origin)
98 {
99 	{
100 		ObjectLock olock(this);
101 		m_CheckRunning = false;
102 	}
103 
104 	if (!cr)
105 		return;
106 
107 	double now = Utility::GetTime();
108 
109 	if (cr->GetScheduleStart() == 0)
110 		cr->SetScheduleStart(now);
111 
112 	if (cr->GetScheduleEnd() == 0)
113 		cr->SetScheduleEnd(now);
114 
115 	if (cr->GetExecutionStart() == 0)
116 		cr->SetExecutionStart(now);
117 
118 	if (cr->GetExecutionEnd() == 0)
119 		cr->SetExecutionEnd(now);
120 
121 	if (!origin || origin->IsLocal())
122 		cr->SetSchedulingSource(IcingaApplication::GetInstance()->GetNodeName());
123 
124 	Endpoint::Ptr command_endpoint = GetCommandEndpoint();
125 
126 	if (cr->GetCheckSource().IsEmpty()) {
127 		if ((!origin || origin->IsLocal()))
128 			cr->SetCheckSource(IcingaApplication::GetInstance()->GetNodeName());
129 
130 		/* override check source if command_endpoint was defined */
131 		if (command_endpoint && !GetExtension("agent_check"))
132 			cr->SetCheckSource(command_endpoint->GetName());
133 	}
134 
135 	/* agent checks go through the api */
136 	if (command_endpoint && GetExtension("agent_check")) {
137 		ApiListener::Ptr listener = ApiListener::GetInstance();
138 
139 		if (listener) {
140 			/* send message back to its origin */
141 			Dictionary::Ptr message = ClusterEvents::MakeCheckResultMessage(this, cr);
142 			listener->SyncSendMessage(command_endpoint, message);
143 		}
144 
145 		return;
146 
147 	}
148 
149 	if (!IsActive())
150 		return;
151 
152 	bool reachable = IsReachable();
153 	bool notification_reachable = IsReachable(DependencyNotification);
154 
155 	ObjectLock olock(this);
156 
157 	CheckResult::Ptr old_cr = GetLastCheckResult();
158 	ServiceState old_state = GetStateRaw();
159 	StateType old_stateType = GetStateType();
160 	long old_attempt = GetCheckAttempt();
161 	bool recovery = false;
162 
163 	/* When we have an check result already (not after fresh start),
164 	 * prevent to accept old check results and allow overrides for
165 	 * CRs happened in the future.
166 	 */
167 	if (old_cr) {
168 		double currentCRTimestamp = old_cr->GetExecutionStart();
169 		double newCRTimestamp = cr->GetExecutionStart();
170 
171 		/* Our current timestamp may be from the future (wrong server time adjusted again). Allow overrides here. */
172 		if (currentCRTimestamp > now) {
173 			/* our current CR is from the future, let the new CR override it. */
174 			Log(LogDebug, "Checkable")
175 				<< std::fixed << std::setprecision(6) << "Processing check result for checkable '" << GetName() << "' from "
176 				<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", newCRTimestamp) << " (" << newCRTimestamp
177 				<< "). Overriding since ours is from the future at "
178 				<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", currentCRTimestamp) << " (" << currentCRTimestamp << ").";
179 		} else {
180 			/* Current timestamp is from the past, but the new timestamp is even more in the past. Skip it. */
181 			if (newCRTimestamp < currentCRTimestamp) {
182 				Log(LogDebug, "Checkable")
183 					<< std::fixed << std::setprecision(6) << "Skipping check result for checkable '" << GetName() << "' from "
184 					<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", newCRTimestamp) << " (" << newCRTimestamp
185 					<< "). It is in the past compared to ours at "
186 					<< Utility::FormatDateTime("%Y-%m-%d %H:%M:%S %z", currentCRTimestamp) << " (" << currentCRTimestamp << ").";
187 				return;
188 			}
189 		}
190 	}
191 
192 	/* The ExecuteCheck function already sets the old state, but we need to do it again
193 	 * in case this was a passive check result. */
194 	SetLastStateRaw(old_state);
195 	SetLastStateType(old_stateType);
196 	SetLastReachable(reachable);
197 
198 	Host::Ptr host;
199 	Service::Ptr service;
200 	tie(host, service) = GetHostService(this);
201 
202 	CheckableType checkableType = CheckableHost;
203 	if (service)
204 		checkableType = CheckableService;
205 
206 	long attempt = 1;
207 
208 	std::set<Checkable::Ptr> children = GetChildren();
209 
210 	if (IsStateOK(cr->GetState())) {
211 		SetStateType(StateTypeHard); // NOT-OK -> HARD OK
212 
213 		if (!IsStateOK(old_state))
214 			recovery = true;
215 
216 		ResetNotificationNumbers();
217 		SaveLastState(ServiceOK, cr->GetExecutionEnd());
218 
219 		/* update reachability for child objects in OK state */
220 		if (!children.empty())
221 			OnReachabilityChanged(this, cr, children, origin);
222 	} else {
223 		/* OK -> NOT-OK change, first SOFT state. Reset attempt counter. */
224 		if (IsStateOK(old_state)) {
225 			SetStateType(StateTypeSoft);
226 			attempt = 1;
227 		}
228 
229 		/* SOFT state change, increase attempt counter. */
230 		if (old_stateType == StateTypeSoft && !IsStateOK(old_state)) {
231 			SetStateType(StateTypeSoft);
232 			attempt = old_attempt + 1;
233 		}
234 
235 		/* HARD state change (e.g. previously 2/3 and this next attempt). Reset attempt counter. */
236 		if (attempt >= GetMaxCheckAttempts()) {
237 			SetStateType(StateTypeHard);
238 			attempt = 1;
239 		}
240 
241 		if (!IsStateOK(cr->GetState())) {
242 			SaveLastState(cr->GetState(), cr->GetExecutionEnd());
243 		}
244 
245 		/* update reachability for child objects in NOT-OK state */
246 		if (!children.empty())
247 			OnReachabilityChanged(this, cr, children, origin);
248 	}
249 
250 	if (recovery) {
251 		for (auto& child : children) {
252 			if (child->GetProblem() && child->GetEnableActiveChecks()) {
253 				auto nextCheck (now + Utility::Random() % 60);
254 
255 				ObjectLock oLock (child);
256 
257 				if (nextCheck < child->GetNextCheck()) {
258 					child->SetNextCheck(nextCheck);
259 				}
260 			}
261 		}
262 	}
263 
264 	if (!reachable)
265 		SetLastStateUnreachable(cr->GetExecutionEnd());
266 
267 	SetCheckAttempt(attempt);
268 
269 	ServiceState new_state = cr->GetState();
270 	SetStateRaw(new_state);
271 
272 	bool stateChange;
273 
274 	/* Exception on state change calculation for hosts. */
275 	if (checkableType == CheckableService)
276 		stateChange = (old_state != new_state);
277 	else
278 		stateChange = (Host::CalculateState(old_state) != Host::CalculateState(new_state));
279 
280 	/* Store the current last state change for the next iteration. */
281 	SetPreviousStateChange(GetLastStateChange());
282 
283 	if (stateChange) {
284 		SetLastStateChange(cr->GetExecutionEnd());
285 
286 		/* remove acknowledgements */
287 		if (GetAcknowledgement() == AcknowledgementNormal ||
288 			(GetAcknowledgement() == AcknowledgementSticky && IsStateOK(new_state))) {
289 			ClearAcknowledgement("");
290 		}
291 
292 		/* reschedule direct parents */
293 		for (const Checkable::Ptr& parent : GetParents()) {
294 			if (parent.get() == this)
295 				continue;
296 
297 			if (!parent->GetEnableActiveChecks())
298 				continue;
299 
300 			if (parent->GetNextCheck() >= now + parent->GetRetryInterval()) {
301 				ObjectLock olock(parent);
302 				parent->SetNextCheck(now);
303 			}
304 		}
305 	}
306 
307 	bool remove_acknowledgement_comments = false;
308 
309 	if (GetAcknowledgement() == AcknowledgementNone)
310 		remove_acknowledgement_comments = true;
311 
312 	bool hardChange = (GetStateType() == StateTypeHard && old_stateType == StateTypeSoft);
313 
314 	if (stateChange && old_stateType == StateTypeHard && GetStateType() == StateTypeHard)
315 		hardChange = true;
316 
317 	bool is_volatile = GetVolatile();
318 
319 	if (hardChange || is_volatile) {
320 		SetLastHardStateRaw(new_state);
321 		SetLastHardStateChange(cr->GetExecutionEnd());
322 		SetLastHardStatesRaw(GetLastHardStatesRaw() / 100u + new_state * 100u);
323 	}
324 
325 	if (stateChange) {
326 		SetLastSoftStatesRaw(GetLastSoftStatesRaw() / 100u + new_state * 100u);
327 	}
328 
329 	if (!IsStateOK(new_state))
330 		TriggerDowntimes();
331 
332 	/* statistics for external tools */
333 	Checkable::UpdateStatistics(cr, checkableType);
334 
335 	bool in_downtime = IsInDowntime();
336 
337 	bool send_notification = false;
338 	bool suppress_notification = !notification_reachable || in_downtime || IsAcknowledged();
339 
340 	/* Send notifications whether when a hard state change occurred. */
341 	if (hardChange && !(old_stateType == StateTypeSoft && IsStateOK(new_state)))
342 		send_notification = true;
343 	/* Or if the checkable is volatile and in a HARD state. */
344 	else if (is_volatile && GetStateType() == StateTypeHard)
345 		send_notification = true;
346 
347 	if (IsStateOK(old_state) && old_stateType == StateTypeSoft)
348 		send_notification = false; /* Don't send notifications for SOFT-OK -> HARD-OK. */
349 
350 	if (is_volatile && IsStateOK(old_state) && IsStateOK(new_state))
351 		send_notification = false; /* Don't send notifications for volatile OK -> OK changes. */
352 
353 	olock.Unlock();
354 
355 	if (remove_acknowledgement_comments)
356 		RemoveCommentsByType(CommentAcknowledgement);
357 
358 	Dictionary::Ptr vars_after = new Dictionary({
359 		{ "state", new_state },
360 		{ "state_type", GetStateType() },
361 		{ "attempt", GetCheckAttempt() },
362 		{ "reachable", reachable }
363 	});
364 
365 	if (old_cr)
366 		cr->SetVarsBefore(old_cr->GetVarsAfter());
367 
368 	cr->SetVarsAfter(vars_after);
369 
370 	olock.Lock();
371 
372 	if (service) {
373 		SetLastCheckResult(cr);
374 	} else {
375 		bool wasProblem = GetProblem();
376 
377 		SetLastCheckResult(cr);
378 
379 		if (GetProblem() != wasProblem) {
380 			auto services = host->GetServices();
381 			olock.Unlock();
382 			for (auto& service : services) {
383 				Service::OnHostProblemChanged(service, cr, origin);
384 			}
385 			olock.Lock();
386 		}
387 	}
388 
389 	bool was_flapping = IsFlapping();
390 
391 	UpdateFlappingStatus(cr->GetState());
392 
393 	bool is_flapping = IsFlapping();
394 
395 	if (cr->GetActive()) {
396 		UpdateNextCheck(origin);
397 	} else {
398 		/* Reschedule the next check for external passive check results. The side effect of
399 		 * this is that for as long as we receive results for a service we
400 		 * won't execute any active checks. */
401 		double offset;
402 		double ttl = cr->GetTtl();
403 
404 		if (ttl > 0)
405 			offset = ttl;
406 		else
407 			offset = GetCheckInterval();
408 
409 		SetNextCheck(Utility::GetTime() + offset, false, origin);
410 	}
411 
412 	olock.Unlock();
413 
414 #ifdef I2_DEBUG /* I2_DEBUG */
415 	Log(LogDebug, "Checkable")
416 		<< "Flapping: Checkable " << GetName()
417 		<< " was: " << was_flapping
418 		<< " is: " << is_flapping
419 		<< " threshold low: " << GetFlappingThresholdLow()
420 		<< " threshold high: " << GetFlappingThresholdHigh()
421 		<< "% current: " << GetFlappingCurrent() << "%.";
422 #endif /* I2_DEBUG */
423 
424 	OnNewCheckResult(this, cr, origin);
425 
426 	/* signal status updates to for example db_ido */
427 	OnStateChanged(this);
428 
429 	String old_state_str = (service ? Service::StateToString(old_state) : Host::StateToString(Host::CalculateState(old_state)));
430 	String new_state_str = (service ? Service::StateToString(new_state) : Host::StateToString(Host::CalculateState(new_state)));
431 
432 	/* Whether a hard state change or a volatile state change except OK -> OK happened. */
433 	if (hardChange || (is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state)))) {
434 		OnStateChange(this, cr, StateTypeHard, origin);
435 		Log(LogNotice, "Checkable")
436 			<< "State Change: Checkable '" << GetName() << "' hard state change from " << old_state_str << " to " << new_state_str << " detected." << (is_volatile ? " Checkable is volatile." : "");
437 	}
438 	/* Whether a state change happened or the state type is SOFT (must be logged too). */
439 	else if (stateChange || GetStateType() == StateTypeSoft) {
440 		OnStateChange(this, cr, StateTypeSoft, origin);
441 		Log(LogNotice, "Checkable")
442 			<< "State Change: Checkable '" << GetName() << "' soft state change from " << old_state_str << " to " << new_state_str << " detected.";
443 	}
444 
445 	if (GetStateType() == StateTypeSoft || hardChange || recovery ||
446 		(is_volatile && !(IsStateOK(old_state) && IsStateOK(new_state))))
447 		ExecuteEventHandler();
448 
449 	int suppressed_types = 0;
450 
451 	/* Flapping start/end notifications */
452 	if (!was_flapping && is_flapping) {
453 		/* FlappingStart notifications happen on state changes, not in downtimes */
454 		if (!IsPaused()) {
455 			if (in_downtime) {
456 				suppressed_types |= NotificationFlappingStart;
457 			} else {
458 				OnNotificationsRequested(this, NotificationFlappingStart, cr, "", "", nullptr);
459 			}
460 		}
461 
462 		Log(LogNotice, "Checkable")
463 			<< "Flapping Start: Checkable '" << GetName() << "' started flapping (Current flapping value "
464 			<< GetFlappingCurrent() << "% > high threshold " << GetFlappingThresholdHigh() << "%).";
465 
466 		NotifyFlapping(origin);
467 	} else if (was_flapping && !is_flapping) {
468 		/* FlappingEnd notifications are independent from state changes, must not happen in downtine */
469 		if (!IsPaused()) {
470 			if (in_downtime) {
471 				suppressed_types |= NotificationFlappingEnd;
472 			} else {
473 				OnNotificationsRequested(this, NotificationFlappingEnd, cr, "", "", nullptr);
474 			}
475 		}
476 
477 		Log(LogNotice, "Checkable")
478 			<< "Flapping Stop: Checkable '" << GetName() << "' stopped flapping (Current flapping value "
479 			<< GetFlappingCurrent() << "% < low threshold " << GetFlappingThresholdLow() << "%).";
480 
481 		NotifyFlapping(origin);
482 	}
483 
484 	if (send_notification && !is_flapping) {
485 		if (!IsPaused()) {
486 			if (suppress_notification) {
487 				suppressed_types |= (recovery ? NotificationRecovery : NotificationProblem);
488 			} else {
489 				OnNotificationsRequested(this, recovery ? NotificationRecovery : NotificationProblem, cr, "", "", nullptr);
490 			}
491 		}
492 	}
493 
494 	if (suppressed_types) {
495 		/* If some notifications were suppressed, but just because of e.g. a downtime,
496 		 * stash them into a notification types bitmask for maybe re-sending later.
497 		 */
498 
499 		ObjectLock olock (this);
500 		int suppressed_types_before (GetSuppressedNotifications());
501 		int suppressed_types_after (suppressed_types_before | suppressed_types);
502 
503 		for (int conflict : {NotificationProblem | NotificationRecovery, NotificationFlappingStart | NotificationFlappingEnd}) {
504 			/* E.g. problem and recovery notifications neutralize each other. */
505 
506 			if ((suppressed_types_after & conflict) == conflict) {
507 				suppressed_types_after &= ~conflict;
508 			}
509 		}
510 
511 		if (suppressed_types_after != suppressed_types_before) {
512 			SetSuppressedNotifications(suppressed_types_after);
513 		}
514 	}
515 }
516 
ExecuteRemoteCheck(const Dictionary::Ptr & resolvedMacros)517 void Checkable::ExecuteRemoteCheck(const Dictionary::Ptr& resolvedMacros)
518 {
519 	CONTEXT("Executing remote check for object '" + GetName() + "'");
520 
521 	double scheduled_start = GetNextCheck();
522 	double before_check = Utility::GetTime();
523 
524 	CheckResult::Ptr cr = new CheckResult();
525 	cr->SetScheduleStart(scheduled_start);
526 	cr->SetExecutionStart(before_check);
527 
528 	GetCheckCommand()->Execute(this, cr, resolvedMacros, true);
529 }
530 
ExecuteCheck()531 void Checkable::ExecuteCheck()
532 {
533 	CONTEXT("Executing check for object '" + GetName() + "'");
534 
535 	/* keep track of scheduling info in case the check type doesn't provide its own information */
536 	double scheduled_start = GetNextCheck();
537 	double before_check = Utility::GetTime();
538 
539 	SetLastCheckStarted(Utility::GetTime());
540 
541 	/* This calls SetNextCheck() which updates the CheckerComponent's idle/pending
542 	 * queues and ensures that checks are not fired multiple times. ProcessCheckResult()
543 	 * is called too late. See #6421.
544 	 */
545 	UpdateNextCheck();
546 
547 	bool reachable = IsReachable();
548 
549 	{
550 		ObjectLock olock(this);
551 
552 		/* don't run another check if there is one pending */
553 		if (m_CheckRunning)
554 			return;
555 
556 		m_CheckRunning = true;
557 
558 		SetLastStateRaw(GetStateRaw());
559 		SetLastStateType(GetLastStateType());
560 		SetLastReachable(reachable);
561 	}
562 
563 	CheckResult::Ptr cr = new CheckResult();
564 
565 	cr->SetScheduleStart(scheduled_start);
566 	cr->SetExecutionStart(before_check);
567 
568 	Endpoint::Ptr endpoint = GetCommandEndpoint();
569 	bool local = !endpoint || endpoint == Endpoint::GetLocalEndpoint();
570 
571 	if (local) {
572 		GetCheckCommand()->Execute(this, cr, nullptr, false);
573 	} else {
574 		Dictionary::Ptr macros = new Dictionary();
575 		GetCheckCommand()->Execute(this, cr, macros, false);
576 
577 		if (endpoint->GetConnected()) {
578 			/* perform check on remote endpoint */
579 			Dictionary::Ptr message = new Dictionary();
580 			message->Set("jsonrpc", "2.0");
581 			message->Set("method", "event::ExecuteCommand");
582 
583 			Host::Ptr host;
584 			Service::Ptr service;
585 			tie(host, service) = GetHostService(this);
586 
587 			Dictionary::Ptr params = new Dictionary();
588 			message->Set("params", params);
589 			params->Set("command_type", "check_command");
590 			params->Set("command", GetCheckCommand()->GetName());
591 			params->Set("host", host->GetName());
592 
593 			if (service)
594 				params->Set("service", service->GetShortName());
595 
596 			/*
597 			 * If the host/service object specifies the 'check_timeout' attribute,
598 			 * forward this to the remote endpoint to limit the command execution time.
599 			 */
600 			if (!GetCheckTimeout().IsEmpty())
601 				params->Set("check_timeout", GetCheckTimeout());
602 
603 			params->Set("macros", macros);
604 
605 			ApiListener::Ptr listener = ApiListener::GetInstance();
606 
607 			if (listener)
608 				listener->SyncSendMessage(endpoint, message);
609 
610 			/* Re-schedule the check so we don't run it again until after we've received
611 			 * a check result from the remote instance. The check will be re-scheduled
612 			 * using the proper check interval once we've received a check result.
613 			 */
614 			SetNextCheck(Utility::GetTime() + GetCheckCommand()->GetTimeout() + 30);
615 
616 		/*
617 		 * Let the user know that there was a problem with the check if
618 		 * 1) The endpoint is not syncing (replay log, etc.)
619 		 * 2) Outside of the cold startup window (5min)
620 		 */
621 		} else if (!endpoint->GetSyncing() && Application::GetInstance()->GetStartTime() < Utility::GetTime() - 300) {
622 			/* fail to perform check on unconnected endpoint */
623 			cr->SetState(ServiceUnknown);
624 
625 			String output = "Remote Icinga instance '" + endpoint->GetName() + "' is not connected to ";
626 
627 			Endpoint::Ptr localEndpoint = Endpoint::GetLocalEndpoint();
628 
629 			if (localEndpoint)
630 				output += "'" + localEndpoint->GetName() + "'";
631 			else
632 				output += "this instance";
633 
634 			cr->SetOutput(output);
635 
636 			ProcessCheckResult(cr);
637 		}
638 
639 		{
640 			ObjectLock olock(this);
641 			m_CheckRunning = false;
642 		}
643 	}
644 }
645 
UpdateStatistics(const CheckResult::Ptr & cr,CheckableType type)646 void Checkable::UpdateStatistics(const CheckResult::Ptr& cr, CheckableType type)
647 {
648 	time_t ts = cr->GetScheduleEnd();
649 
650 	if (type == CheckableHost) {
651 		if (cr->GetActive())
652 			CIB::UpdateActiveHostChecksStatistics(ts, 1);
653 		else
654 			CIB::UpdatePassiveHostChecksStatistics(ts, 1);
655 	} else if (type == CheckableService) {
656 		if (cr->GetActive())
657 			CIB::UpdateActiveServiceChecksStatistics(ts, 1);
658 		else
659 			CIB::UpdatePassiveServiceChecksStatistics(ts, 1);
660 	} else {
661 		Log(LogWarning, "Checkable", "Unknown checkable type for statistic update.");
662 	}
663 }
664 
IncreasePendingChecks()665 void Checkable::IncreasePendingChecks()
666 {
667 	std::unique_lock<std::mutex> lock(m_StatsMutex);
668 	m_PendingChecks++;
669 }
670 
DecreasePendingChecks()671 void Checkable::DecreasePendingChecks()
672 {
673 	std::unique_lock<std::mutex> lock(m_StatsMutex);
674 	m_PendingChecks--;
675 	m_PendingChecksCV.notify_one();
676 }
677 
GetPendingChecks()678 int Checkable::GetPendingChecks()
679 {
680 	std::unique_lock<std::mutex> lock(m_StatsMutex);
681 	return m_PendingChecks;
682 }
683 
AquirePendingCheckSlot(int maxPendingChecks)684 void Checkable::AquirePendingCheckSlot(int maxPendingChecks)
685 {
686 	std::unique_lock<std::mutex> lock(m_StatsMutex);
687 	while (m_PendingChecks >= maxPendingChecks)
688 		m_PendingChecksCV.wait(lock);
689 
690 	m_PendingChecks++;
691 }
692