1 /* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "common.h"
4 #include "array.h"
5 #include "ioloop.h"
6 #include "hash.h"
7 #include "str.h"
8 #include "safe-mkstemp.h"
9 #include "time-util.h"
10 #include "sleep.h"
11 #include "master-client.h"
12 #include "service.h"
13 #include "service-process.h"
14 #include "service-process-notify.h"
15 #include "service-anvil.h"
16 #include "service-log.h"
17 #include "service-monitor.h"
18 
19 #include <unistd.h>
20 #include <sys/wait.h>
21 #include <syslog.h>
22 #include <signal.h>
23 
24 #define SERVICE_DROP_WARN_INTERVAL_SECS 1
25 #define SERVICE_DROP_TIMEOUT_MSECS (10*1000)
26 #define SERVICE_LOG_DROP_WARNING_DELAY_MSECS 500
27 #define MAX_DIE_WAIT_MSECS 5000
28 #define SERVICE_MAX_EXIT_FAILURES_IN_SEC 10
29 #define SERVICE_PREFORK_MAX_AT_ONCE 10
30 
31 static void service_monitor_start_extra_avail(struct service *service);
32 static void service_status_more(struct service_process *process,
33 				const struct master_status *status);
34 static void service_monitor_listen_start_force(struct service *service);
35 
service_process_kill_idle(struct service_process * process)36 static void service_process_kill_idle(struct service_process *process)
37 {
38 	struct service *service = process->service;
39 	struct master_status status;
40 
41 	i_assert(process->available_count == service->client_limit);
42 
43 	if (service->process_avail <= service->set->process_min_avail) {
44 		/* we don't have any extra idling processes anymore. */
45 		timeout_remove(&process->to_idle);
46 	} else if (process->last_kill_sent > process->last_status_update+1) {
47 		service_error(service, "Process %s is ignoring idle SIGINT",
48 			      dec2str(process->pid));
49 
50 		/* assume this process is busy */
51 		i_zero(&status);
52 		service_status_more(process, &status);
53 		process->available_count = 0;
54 	} else {
55 		if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) {
56 			service_error(service, "kill(%s, SIGINT) failed: %m",
57 				      dec2str(process->pid));
58 		}
59 		process->last_kill_sent = ioloop_time;
60 	}
61 }
62 
service_status_more(struct service_process * process,const struct master_status * status)63 static void service_status_more(struct service_process *process,
64 				const struct master_status *status)
65 {
66 	struct service *service = process->service;
67 
68 	process->total_count +=
69 		process->available_count - status->available_count;
70 	process->idle_start = 0;
71 
72 	timeout_remove(&process->to_idle);
73 
74 	if (status->available_count != 0)
75 		return;
76 
77 	/* process used up all of its clients */
78 	i_assert(service->process_avail > 0);
79 	service->process_avail--;
80 
81 	if (service->type == SERVICE_TYPE_LOGIN &&
82 	    service->process_avail == 0 &&
83 	    service->process_count == service->process_limit)
84 		service_login_notify(service, TRUE);
85 
86 	/* we may need to start more */
87 	service_monitor_start_extra_avail(service);
88 	service_monitor_listen_start(service);
89 }
90 
service_check_idle(struct service_process * process)91 static void service_check_idle(struct service_process *process)
92 {
93 	struct service *service = process->service;
94 
95 	if (process->available_count != service->client_limit)
96 		return;
97 	process->idle_start = ioloop_time;
98 	if (service->process_avail > service->set->process_min_avail &&
99 	    process->to_idle == NULL &&
100 	    service->idle_kill != UINT_MAX) {
101 		/* we have more processes than we really need.
102 		   add a bit of randomness so that we don't send the
103 		   signal to all of them at once */
104 		process->to_idle =
105 			timeout_add((service->idle_kill * 1000) +
106 				    i_rand_limit(100) * 10,
107 				    service_process_kill_idle,
108 				    process);
109 	}
110 }
111 
service_status_less(struct service_process * process)112 static void service_status_less(struct service_process *process)
113 {
114 	struct service *service = process->service;
115 
116 	/* some process got more connections - remove the delayed warning */
117 	timeout_remove(&service->to_drop_warning);
118 
119 	if (process->available_count == 0) {
120 		/* process can accept more clients again */
121 		if (service->process_avail++ == 0)
122 			service_monitor_listen_stop(service);
123 		i_assert(service->process_avail <= service->process_count);
124 	}
125 	if (service->type == SERVICE_TYPE_LOGIN)
126 		service_login_notify(service, FALSE);
127 }
128 
129 static void
service_status_input_one(struct service * service,const struct master_status * status)130 service_status_input_one(struct service *service,
131 			 const struct master_status *status)
132 {
133         struct service_process *process;
134 
135 	process = hash_table_lookup(service_pids, POINTER_CAST(status->pid));
136 	if (process == NULL) {
137 		/* we've probably wait()ed it away already. ignore */
138 		return;
139 	}
140 
141 	if (process->uid != status->uid || process->service != service) {
142 		/* a) Process was closed and another process was created with
143 		   the same PID, but we're still receiving status update from
144 		   the old process.
145 
146 		   b) Some process is trying to corrupt our internal state by
147 		   trying to pretend to be someone else. We could use stronger
148 		   randomness here, but the worst they can do is DoS and there
149 		   are already more serious problems if someone is able to do
150 		   this.. */
151 		service_error(service, "Ignoring invalid update from child %s "
152 			      "(UID=%u)", dec2str(status->pid), status->uid);
153 		return;
154 	}
155 	process->last_status_update = ioloop_time;
156 
157 	/* first status notification */
158 	timeout_remove(&process->to_status);
159 
160 	if (process->available_count != status->available_count) {
161 		if (process->available_count > status->available_count) {
162 			/* process started servicing some more clients */
163 			service_status_more(process, status);
164 		} else {
165 			/* process finished servicing some clients */
166 			service_status_less(process);
167 		}
168 		process->available_count = status->available_count;
169 	}
170 	service_check_idle(process);
171 }
172 
service_status_input(struct service * service)173 static void service_status_input(struct service *service)
174 {
175 	struct master_status status[1024/sizeof(struct master_status)];
176 	unsigned int i, count;
177 	ssize_t ret;
178 
179 	ret = read(service->status_fd[0], &status, sizeof(status));
180 	if (ret <= 0) {
181 		if (ret == 0)
182 			service_error(service, "read(status) failed: EOF");
183 		else if (errno != EAGAIN)
184 			service_error(service, "read(status) failed: %m");
185 		else
186 			return;
187 		service_monitor_stop(service);
188 		return;
189 	}
190 
191 	if ((ret % sizeof(struct master_status)) != 0) {
192 		service_error(service, "service sent partial status update "
193 			      "(%d bytes)", (int)ret);
194 		return;
195 	}
196 
197 	count = ret / sizeof(struct master_status);
198 	for (i = 0; i < count; i++)
199 		service_status_input_one(service, &status[i]);
200 }
201 
service_log_drop_warning(struct service * service)202 static void service_log_drop_warning(struct service *service)
203 {
204 	const char *limit_name;
205 	unsigned int limit;
206 
207 	if (service->last_drop_warning +
208 	    SERVICE_DROP_WARN_INTERVAL_SECS <= ioloop_time) {
209 		service->last_drop_warning = ioloop_time;
210 		if (service->process_limit > 1) {
211 			limit_name = "process_limit";
212 			limit = service->process_limit;
213 		} else if (service->set->service_count == 1) {
214 			i_assert(service->client_limit == 1);
215 			limit_name = "client_limit/service_count";
216 			limit = 1;
217 		} else {
218 			limit_name = "client_limit";
219 			limit = service->client_limit;
220 		}
221 		i_warning("service(%s): %s (%u) reached, "
222 			  "client connections are being dropped",
223 			  service->set->name, limit_name, limit);
224 	}
225 }
226 
service_monitor_throttle(struct service * service)227 static void service_monitor_throttle(struct service *service)
228 {
229 	if (service->to_throttle != NULL || service->list->destroying)
230 		return;
231 
232 	i_assert(service->throttle_msecs > 0);
233 
234 	service_error(service,
235 		      "command startup failed, throttling for %u.%03u secs",
236 		      service->throttle_msecs / 1000,
237 		      service->throttle_msecs % 1000);
238 	service_throttle(service, service->throttle_msecs);
239 	service->throttle_msecs *= 2;
240 	if (service->throttle_msecs >
241 	    SERVICE_STARTUP_FAILURE_THROTTLE_MAX_MSECS) {
242 		service->throttle_msecs =
243 			SERVICE_STARTUP_FAILURE_THROTTLE_MAX_MSECS;
244 	}
245 }
246 
service_drop_timeout(struct service * service)247 static void service_drop_timeout(struct service *service)
248 {
249 	struct service_listener *lp;
250 	int fd;
251 
252 	i_assert(service->process_avail == 0);
253 
254 	/* drop all pending connections */
255 	array_foreach_elem(&service->listeners, lp) {
256 		while ((fd = net_accept(lp->fd, NULL, NULL)) > 0)
257 			net_disconnect(fd);
258 	}
259 
260 	service_monitor_listen_start_force(service);
261 	service->listen_pending = TRUE;
262 }
263 
service_monitor_listen_pending(struct service * service)264 static void service_monitor_listen_pending(struct service *service)
265 {
266 	i_assert(service->process_avail == 0);
267 
268 	service_monitor_listen_stop(service);
269 	service->listen_pending = TRUE;
270 
271 	service->to_drop = timeout_add(SERVICE_DROP_TIMEOUT_MSECS,
272 				       service_drop_timeout, service);
273 }
274 
service_drop_connections(struct service_listener * l)275 static void service_drop_connections(struct service_listener *l)
276 {
277 	struct service *service = l->service;
278 	int fd;
279 
280 	if (service->type != SERVICE_TYPE_WORKER)
281 		service_log_drop_warning(service);
282 
283 	if (service->type == SERVICE_TYPE_LOGIN) {
284 		/* reached process limit, notify processes that they
285 		   need to start killing existing connections if they
286 		   reach connection limit */
287 		service_login_notify(service, TRUE);
288 
289 		service_monitor_listen_pending(service);
290 	} else if (!service->listen_pending) {
291 		/* maybe this is a temporary peak, stop for a while and
292 		   see if it goes away */
293 		service_monitor_listen_pending(service);
294 		if (service->to_drop_warning == NULL &&
295 		    service->type == SERVICE_TYPE_WORKER) {
296 			service->to_drop_warning =
297 				timeout_add_short(SERVICE_LOG_DROP_WARNING_DELAY_MSECS,
298 						  service_log_drop_warning, service);
299 		}
300 	} else {
301 		/* this has been happening for a while now. just accept and
302 		   close the connection, so it's clear that this is happening
303 		   because of the limit, rather than because the service
304 		   processes aren't answering fast enough */
305 		fd = net_accept(l->fd, NULL, NULL);
306 		if (fd > 0)
307 			net_disconnect(fd);
308 	}
309 }
310 
service_accept(struct service_listener * l)311 static void service_accept(struct service_listener *l)
312 {
313 	struct service *service = l->service;
314 
315 	i_assert(service->process_avail == 0);
316 
317 	if (service->process_count == service->process_limit) {
318 		/* we've reached our limits, new clients will have to
319 		   wait until there are more processes available */
320 		service_drop_connections(l);
321 		return;
322 	}
323 
324 	/* create a child process and let it accept() this connection */
325 	if (service_process_create(service) == NULL)
326 		service_monitor_throttle(service);
327 	else
328 		service_monitor_listen_stop(service);
329 }
330 
331 static bool
service_monitor_start_count(struct service * service,unsigned int limit)332 service_monitor_start_count(struct service *service, unsigned int limit)
333 {
334 	unsigned int i, count;
335 
336 	i_assert(service->set->process_min_avail >= service->process_avail);
337 
338 	count = service->set->process_min_avail - service->process_avail;
339 	if (service->process_count + count > service->process_limit)
340 		count = service->process_limit - service->process_count;
341 	if (count > limit)
342 		count = limit;
343 
344 	for (i = 0; i < count; i++) {
345 		if (service_process_create(service) == NULL) {
346 			service_monitor_throttle(service);
347 			break;
348 		}
349 	}
350 	if (i > 0) {
351 		/* we created some processes, they'll do the listening now */
352 		service_monitor_listen_stop(service);
353 	}
354 	return i >= limit;
355 }
356 
service_monitor_prefork_timeout(struct service * service)357 static void service_monitor_prefork_timeout(struct service *service)
358 {
359 	/* don't prefork more processes if other more important processes had
360 	   been forked while we were waiting for this timeout (= master seems
361 	   busy) */
362 	if (service->list->fork_counter != service->prefork_counter) {
363 		service->prefork_counter = service->list->fork_counter;
364 		return;
365 	}
366 	if (service->process_avail < service->set->process_min_avail) {
367 		if (service_monitor_start_count(service, SERVICE_PREFORK_MAX_AT_ONCE) &&
368 		    service->process_avail < service->set->process_min_avail) {
369 			/* All SERVICE_PREFORK_MAX_AT_ONCE were created, but
370 			   it still wasn't enough. Launch more in the next
371 			   timeout. */
372 			return;
373 		}
374 	}
375 	timeout_remove(&service->to_prefork);
376 }
377 
service_monitor_start_extra_avail(struct service * service)378 static void service_monitor_start_extra_avail(struct service *service)
379 {
380 	if (service->process_avail >= service->set->process_min_avail ||
381 	    service->process_count >= service->process_limit ||
382 	    service->list->destroying)
383 		return;
384 
385 	if (service->process_avail == 0) {
386 		/* quickly start one process now */
387 		if (!service_monitor_start_count(service, 1))
388 			return;
389 		if (service->process_avail >= service->set->process_min_avail)
390 			return;
391 	}
392 	if (service->to_prefork == NULL) {
393 		/* ioloop handles timeouts before fds (= SIGCHLD callback),
394 		   so let the first timeout handler call simply update the fork
395 		   counter and the second one check if we're busy or not. */
396 		service->to_prefork =
397 			timeout_add_short(0, service_monitor_prefork_timeout, service);
398 	}
399 }
400 
service_monitor_listen_start_force(struct service * service)401 static void service_monitor_listen_start_force(struct service *service)
402 {
403 	struct service_listener *l;
404 
405 	service->listening = TRUE;
406 	service->listen_pending = FALSE;
407 	timeout_remove(&service->to_drop);
408 	timeout_remove(&service->to_drop_warning);
409 
410 	array_foreach_elem(&service->listeners, l) {
411 		if (l->io == NULL && l->fd != -1)
412 			l->io = io_add(l->fd, IO_READ, service_accept, l);
413 	}
414 }
415 
service_monitor_listen_start(struct service * service)416 void service_monitor_listen_start(struct service *service)
417 {
418 	if (service->process_avail > 0 || service->to_throttle != NULL ||
419 	    (service->process_count == service->process_limit &&
420 	     service->listen_pending))
421 		return;
422 
423 	service_monitor_listen_start_force(service);
424 }
425 
service_monitor_listen_stop(struct service * service)426 void service_monitor_listen_stop(struct service *service)
427 {
428 	struct service_listener *l;
429 
430 	array_foreach_elem(&service->listeners, l)
431 		io_remove(&l->io);
432 	service->listening = FALSE;
433 	service->listen_pending = FALSE;
434 	timeout_remove(&service->to_drop);
435 	timeout_remove(&service->to_drop_warning);
436 }
437 
service_login_create_notify_fd(struct service * service)438 static int service_login_create_notify_fd(struct service *service)
439 {
440 	int fd, ret;
441 
442 	if (service->login_notify_fd != -1)
443 		return 0;
444 
445 	T_BEGIN {
446 		string_t *prefix = t_str_new(128);
447 		const char *path;
448 
449 		str_append(prefix, service->set->master_set->base_dir);
450 		str_append(prefix, "/login-master-notify");
451 
452 		fd = safe_mkstemp(prefix, 0600, (uid_t)-1, (gid_t)-1);
453 		path = str_c(prefix);
454 
455 		if (fd == -1) {
456 			service_error(service, "safe_mkstemp(%s) failed: %m",
457 				      path);
458 		} else if (unlink(path) < 0) {
459 			service_error(service, "unlink(%s) failed: %m", path);
460 		} else {
461 			fd_close_on_exec(fd, TRUE);
462 			service->login_notify_fd = fd;
463 		}
464 	} T_END;
465 
466 	ret = fd == -1 ? -1 : 0;
467 	if (fd != service->login_notify_fd)
468 		i_close_fd(&fd);
469 	return ret;
470 }
471 
services_monitor_start(struct service_list * service_list)472 void services_monitor_start(struct service_list *service_list)
473 {
474 	ARRAY(struct service *) listener_services;
475 	struct service *service;
476 
477 	if (services_log_init(service_list) < 0)
478 		return;
479 	service_anvil_monitor_start(service_list);
480 
481 	if (service_list->io_master == NULL &&
482 	    service_list->master_fd != -1) {
483 		service_list->io_master =
484 			io_add(service_list->master_fd, IO_READ,
485 			       master_client_connected, service_list);
486 	}
487 
488 	t_array_init(&listener_services, array_count(&service_list->services));
489 	array_foreach_elem(&service_list->services, service) {
490 		if (service->type == SERVICE_TYPE_LOGIN) {
491 			if (service_login_create_notify_fd(service) < 0)
492 				continue;
493 		}
494 		if (service->master_dead_pipe_fd[0] == -1) {
495 			if (pipe(service->master_dead_pipe_fd) < 0) {
496 				service_error(service, "pipe() failed: %m");
497 				continue;
498 			}
499 			fd_close_on_exec(service->master_dead_pipe_fd[0], TRUE);
500 			fd_close_on_exec(service->master_dead_pipe_fd[1], TRUE);
501 		}
502 		if (service->status_fd[0] == -1) {
503 			/* we haven't yet created status pipe */
504 			if (pipe(service->status_fd) < 0) {
505 				service_error(service, "pipe() failed: %m");
506 				continue;
507 			}
508 
509 			net_set_nonblock(service->status_fd[0], TRUE);
510 			fd_close_on_exec(service->status_fd[0], TRUE);
511 			net_set_nonblock(service->status_fd[1], TRUE);
512 			fd_close_on_exec(service->status_fd[1], TRUE);
513 		}
514 		if (service->io_status == NULL) {
515 			service->io_status =
516 				io_add(service->status_fd[0], IO_READ,
517 				       service_status_input, service);
518 		}
519 		service_monitor_listen_start(service);
520 		array_push_back(&listener_services, &service);
521 	}
522 
523 	/* create processes only after adding all listeners */
524 	array_foreach_elem(&listener_services, service)
525 		service_monitor_start_extra_avail(service);
526 
527 	if (service_list->log->status_fd[0] != -1) {
528 		if (service_process_create(service_list->log) != NULL)
529 			service_monitor_listen_stop(service_list->log);
530 	}
531 
532 	/* start up a process for startup-services */
533 	array_foreach_elem(&service_list->services, service) {
534 		if (service->type == SERVICE_TYPE_STARTUP &&
535 		    service->status_fd[0] != -1) {
536 			if (service_process_create(service) != NULL)
537 				service_monitor_listen_stop(service);
538 		}
539 	}
540 }
541 
service_monitor_close_dead_pipe(struct service * service)542 static void service_monitor_close_dead_pipe(struct service *service)
543 {
544 	if (service->master_dead_pipe_fd[0] != -1) {
545 		i_close_fd(&service->master_dead_pipe_fd[0]);
546 		i_close_fd(&service->master_dead_pipe_fd[1]);
547 	}
548 }
549 
service_monitor_stop(struct service * service)550 void service_monitor_stop(struct service *service)
551 {
552 	int i;
553 
554 	io_remove(&service->io_status);
555 
556 	if (service->status_fd[0] != -1 &&
557 	    service->type != SERVICE_TYPE_ANVIL) {
558 		for (i = 0; i < 2; i++) {
559 			if (close(service->status_fd[i]) < 0) {
560 				service_error(service,
561 					      "close(status fd) failed: %m");
562 			}
563 			service->status_fd[i] = -1;
564 		}
565 	}
566 	service_monitor_close_dead_pipe(service);
567 	if (service->login_notify_fd != -1) {
568 		if (close(service->login_notify_fd) < 0) {
569 			service_error(service,
570 				      "close(login notify fd) failed: %m");
571 		}
572 		service->login_notify_fd = -1;
573 	}
574 	timeout_remove(&service->to_login_notify);
575 	service_monitor_listen_stop(service);
576 
577 	timeout_remove(&service->to_throttle);
578 	timeout_remove(&service->to_prefork);
579 }
580 
service_monitor_stop_close(struct service * service)581 void service_monitor_stop_close(struct service *service)
582 {
583 	struct service_listener *l;
584 
585 	service_monitor_stop(service);
586 
587 	array_foreach_elem(&service->listeners, l)
588 		i_close_fd(&l->fd);
589 }
590 
services_monitor_wait(struct service_list * service_list)591 static void services_monitor_wait(struct service_list *service_list)
592 {
593 	struct service *service;
594 	struct timeval tv_start;
595 	bool finished;
596 
597 	io_loop_time_refresh();
598 	tv_start = ioloop_timeval;
599 
600 	for (;;) {
601 		finished = TRUE;
602 		services_monitor_reap_children();
603 		array_foreach_elem(&service_list->services, service) {
604 			if (service->status_fd[0] != -1)
605 				service_status_input(service);
606 			if (service->process_avail > 0)
607 				finished = FALSE;
608 		}
609 		io_loop_time_refresh();
610 		if (finished ||
611 		    timeval_diff_msecs(&ioloop_timeval, &tv_start) > MAX_DIE_WAIT_MSECS)
612 			break;
613 		i_sleep_msecs(100);
614 	}
615 }
616 
service_processes_close_listeners(struct service * service)617 static bool service_processes_close_listeners(struct service *service)
618 {
619 	struct service_process *process = service->processes;
620 	bool ret = FALSE;
621 
622 	for (; process != NULL; process = process->next) {
623 		if (kill(process->pid, SIGQUIT) == 0)
624 			ret = TRUE;
625 		else if (errno != ESRCH) {
626 			service_error(service, "kill(%s, SIGQUIT) failed: %m",
627 				      dec2str(process->pid));
628 		}
629 	}
630 	return ret;
631 }
632 
633 static bool
service_list_processes_close_listeners(struct service_list * service_list)634 service_list_processes_close_listeners(struct service_list *service_list)
635 {
636 	struct service *service;
637 	bool ret = FALSE;
638 
639 	array_foreach_elem(&service_list->services, service) {
640 		if (service_processes_close_listeners(service))
641 			ret = TRUE;
642 	}
643 	return ret;
644 }
645 
services_monitor_wait_and_kill(struct service_list * service_list)646 static void services_monitor_wait_and_kill(struct service_list *service_list)
647 {
648 	/* we've notified all children that the master is dead.
649 	   now wait for the children to either die or to tell that
650 	   they're no longer listening for new connections. */
651 	services_monitor_wait(service_list);
652 
653 	/* Even if the waiting stopped early because all the process_avail==0,
654 	   it can mean that there are processes that have the listener socket
655 	   open (just not actively being listened to). We'll need to make sure
656 	   that those sockets are closed before we exit, so that a restart
657 	   won't fail. Do this by sending SIGQUIT to all the child processes
658 	   that are left, which are handled by lib-master to immediately close
659 	   the listener in the signal handler itself. */
660 	if (service_list_processes_close_listeners(service_list)) {
661 		/* SIGQUITs were sent. wait a little bit to make sure they're
662 		   also processed before quitting. */
663 		i_sleep_msecs(1000);
664 	}
665 }
666 
services_monitor_stop(struct service_list * service_list,bool wait)667 void services_monitor_stop(struct service_list *service_list, bool wait)
668 {
669 	struct service *service;
670 
671 	array_foreach_elem(&service_list->services, service)
672 		service_monitor_close_dead_pipe(service);
673 
674 	if (wait)
675 		services_monitor_wait_and_kill(service_list);
676 
677 	io_remove(&service_list->io_master);
678 
679 	array_foreach_elem(&service_list->services, service)
680 		service_monitor_stop(service);
681 
682 	services_log_deinit(service_list);
683 }
684 
685 static bool
service_process_failure(struct service_process * process,int status)686 service_process_failure(struct service_process *process, int status)
687 {
688 	struct service *service = process->service;
689 	bool throttle;
690 
691 	service_process_log_status_error(process, status);
692 	throttle = process->to_status != NULL;
693 	if (!throttle && !service->have_successful_exits) {
694 		/* this service has seen no successful exits yet.
695 		   try to avoid failure storms by throttling the service if it
696 		   only keeps failing rapidly. this is no longer done after
697 		   one success to avoid intentional DoSing, in case attacker
698 		   finds a way to quickly crash his own session. */
699 		if (service->exit_failure_last != ioloop_time) {
700 			service->exit_failure_last = ioloop_time;
701 			service->exit_failures_in_sec = 0;
702 		}
703 		if (++service->exit_failures_in_sec > SERVICE_MAX_EXIT_FAILURES_IN_SEC)
704 			throttle = TRUE;
705 	}
706 	service_process_notify_add(service_anvil_global->kills, process);
707 	return throttle;
708 }
709 
services_monitor_reap_children(void)710 void services_monitor_reap_children(void)
711 {
712 	struct service_process *process;
713 	struct service *service;
714 	pid_t pid;
715 	int status;
716 	bool service_stopped, throttle;
717 
718 	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
719 		process = hash_table_lookup(service_pids, POINTER_CAST(pid));
720 		if (process == NULL) {
721 			i_error("waitpid() returned unknown PID %s",
722 				dec2str(pid));
723 			continue;
724 		}
725 
726 		service = process->service;
727 		if (status == 0) {
728 			/* success - one success resets all failures */
729 			service->have_successful_exits = TRUE;
730 			service->exit_failures_in_sec = 0;
731 			service->throttle_msecs =
732 				SERVICE_STARTUP_FAILURE_THROTTLE_MIN_MSECS;
733 			throttle = FALSE;
734 		} else {
735 			throttle = service_process_failure(process, status);
736 		}
737 		if (service->type == SERVICE_TYPE_ANVIL)
738 			service_anvil_process_destroyed(process);
739 
740 		/* if we're reloading, we may get here with a service list
741 		   that's going to be destroyed after this process is
742 		   destroyed. keep the list referenced until we're done. */
743 		service_list_ref(service->list);
744 		service_process_destroy(process);
745 
746 		if (throttle)
747 			service_monitor_throttle(service);
748 		service_stopped = service->status_fd[0] == -1;
749 		if (!service_stopped && !service->list->destroying) {
750 			service_monitor_start_extra_avail(service);
751 			/* if there are no longer listening processes,
752 			   start listening for more */
753 			if (service->to_throttle != NULL) {
754 				/* throttling */
755 			} else if (service == service->list->log &&
756 				   service->process_count == 0) {
757 				/* log service must always be running */
758 				if (service_process_create(service) == NULL)
759 					service_monitor_throttle(service);
760 			} else {
761 				service_monitor_listen_start(service);
762 			}
763 		}
764 		service_list_unref(service->list);
765 	}
766 }
767