1 /* master.c -- IMAP master process to handle recovery, checkpointing, spawning
2  *
3  * Copyright (c) 1994-2008 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #include <config.h>
44 
45 #include <stdio.h>
46 #include <stdint.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50 #include <sys/time.h>
51 #include <sys/types.h>
52 #include <sys/wait.h>
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #ifdef HAVE_SYS_RESOURCE_H
57 #include <sys/resource.h>
58 #endif
59 #include <fcntl.h>
60 #include <signal.h>
61 #include <sys/param.h>
62 #include <sys/stat.h>
63 #include <syslog.h>
64 #include <netdb.h>
65 #include <sys/socket.h>
66 #include <netinet/in.h>
67 #include <sys/un.h>
68 #include <arpa/inet.h>
69 #include <sysexits.h>
70 #include <errno.h>
71 #include <limits.h>
72 #include <math.h>
73 #include <inttypes.h>
74 
75 #ifndef PATH_MAX
76 #define PATH_MAX 4096
77 #endif
78 
79 #ifndef INADDR_NONE
80 #define INADDR_NONE 0xffffffff
81 #endif
82 
83 #ifndef INADDR_ANY
84 #define INADDR_ANY 0x00000000
85 #endif
86 
87 #if !defined(IPV6_V6ONLY) && defined(IPV6_BINDV6ONLY)
88 #define IPV6_V6ONLY     IPV6_BINDV6ONLY
89 #endif
90 
91 #include "masterconf.h"
92 
93 #include "master.h"
94 #include "service.h"
95 
96 #include "cyr_lock.h"
97 #include "retry.h"
98 #include "util.h"
99 #include "xmalloc.h"
100 #include "strarray.h"
101 
102 enum {
103     child_table_size = 10000,
104     child_table_inc = 100
105 };
106 
107 static int verbose = 0;
108 static int listen_queue_backlog = 32;
109 static int pidfd = -1;
110 
111 static int in_shutdown = 0;
112 
113 const char *MASTER_CONFIG_FILENAME = DEFAULT_MASTER_CONFIG_FILENAME;
114 
115 #define SERVICE_NONE -1
116 #define SERVICE_MAX  INT_MAX-10
117 #define SERVICEPARAM(x) ((x) ? x : "unknown")
118 
119 #define MAX_READY_FAILS              5
120 #define MAX_READY_FAIL_INTERVAL     10  /* 10 seconds */
121 
122 #define FNAME_PROM_STATS_DIR        "/stats" /* keep in sync with prometheus.h */
123 #define FNAME_PROM_MASTER_REPORT    "master.txt"
124 
125 struct service *Services = NULL;
126 static int allocservices = 0;
127 int nservices = 0;
128 
129 struct event {
130     char *name;
131     struct timeval mark;
132     time_t period;
133     int hour;
134     int min;
135     int periodic;
136     strarray_t *exec;
137     struct event *next;
138 };
139 static struct event *schedule = NULL;
140 
141 enum sstate {
142     SERVICE_STATE_UNKNOWN = 0,  /* duh */
143     SERVICE_STATE_INIT    = 1,  /* Service forked - UNUSED */
144     SERVICE_STATE_READY   = 2,  /* Service told us it is ready */
145                                 /* or it just forked and has not
146                                  * talked to us yet */
147     SERVICE_STATE_BUSY    = 3,  /* Service told us it is not ready */
148     SERVICE_STATE_DEAD    = 4   /* We received a sigchld from this service */
149 };
150 
151 struct centry {
152     pid_t pid;
153     enum sstate service_state;  /* SERVICE_STATE_* */
154     time_t janitor_deadline;    /* cleanup deadline */
155     int si;                     /* Services[] index */
156     char *desc;                 /* human readable description for logging */
157     struct timeval spawntime;   /* when the centry was allocated */
158     time_t sighuptime;          /* when did we send a SIGHUP */
159     struct centry *next;
160 };
161 static struct centry *ctable[child_table_size];
162 
163 static int janitor_frequency = 1;       /* Janitor sweeps per second */
164 static int janitor_position;            /* Entry to begin at in next sweep */
165 static struct timeval janitor_mark;     /* Last time janitor did a sweep */
166 
167 static int prom_enabled = 0;
168 static int prom_frequency = 0;
169 static struct timeval prom_prev_report = { 0, 0 };
170 static char *prom_report_fname = NULL;
171 
172 #ifdef HAVE_SETRLIMIT
173 static void limit_fds(rlim_t);
174 #endif
175 static void schedule_event(struct event *a);
176 static void child_sighandler_setup(void);
177 
178 #if HAVE_PSELECT
179 static sigset_t pselect_sigmask;
180 #endif
181 
myselect(int nfds,fd_set * rfds,fd_set * wfds,fd_set * efds,struct timeval * tout)182 static int myselect(int nfds, fd_set *rfds, fd_set *wfds,
183                     fd_set *efds, struct timeval *tout)
184 {
185 #if HAVE_PSELECT
186     /* pselect() closes the race between SIGCHLD arriving
187     * and select() sleeping for up to 10 seconds. */
188     struct timespec ts, *tsptr = NULL;
189 
190     if (tout) {
191         ts.tv_sec = tout->tv_sec;
192         ts.tv_nsec = tout->tv_usec * 1000;
193         tsptr = &ts;
194     }
195     return pselect(nfds, rfds, wfds, efds, tsptr, &pselect_sigmask);
196 #else
197     return select(nfds, rfds, wfds, efds, tout);
198 #endif
199 }
200 
fatal(const char * msg,int code)201 EXPORTED void fatal(const char *msg, int code)
202 {
203     syslog(LOG_CRIT, "%s", msg);
204     syslog(LOG_NOTICE, "exiting");
205     exit(code);
206 }
207 
event_free(struct event * a)208 static void event_free(struct event *a)
209 {
210     if (a->exec) {
211         strarray_free(a->exec);
212         a->exec = NULL;
213     }
214     free(a->name);
215     free(a);
216 }
217 
get_daemon(char * path,size_t size,const strarray_t * cmd)218 static void get_daemon(char *path, size_t size, const strarray_t *cmd)
219 {
220     if (!size) return;
221     if (cmd->data[0][0] == '/') {
222         /* master lacks strlcpy, due to no libcyrus */
223         strncpy(path, cmd->data[0], size - 1);
224     }
225     else snprintf(path, size, "%s/%s", LIBEXEC_DIR, cmd->data[0]);
226     path[size-1] = '\0';
227 }
228 
get_prog(char * path,size_t size,const strarray_t * cmd)229 static void get_prog(char *path, size_t size, const strarray_t *cmd)
230 {
231     if (!size) return;
232     if (cmd->data[0][0] == '/') {
233         /* master lacks strlcpy, due to no libcyrus */
234         strncpy(path, cmd->data[0], size - 1);
235     }
236     else snprintf(path, size, "%s/%s", SBIN_DIR, cmd->data[0]);
237     path[size-1] = '\0';
238 }
239 
get_executable(char * path,size_t size,const strarray_t * cmd)240 static void get_executable(char *path, size_t size, const strarray_t *cmd)
241 {
242     struct stat statbuf;
243 
244     if (!size) return;
245     get_daemon(path, size, cmd);
246     if (!stat(path, &statbuf)) return;
247     get_prog(path, size, cmd);
248     if (!stat(path, &statbuf)) return;
249     /* XXX - abort? */
250 }
251 
get_statsock(int filedes[2])252 static void get_statsock(int filedes[2])
253 {
254     int r, fdflags;
255 
256     r = pipe(filedes);
257     if (r != 0)
258         fatalf(1, "couldn't create status socket: %m");
259 
260     /* we don't want the master blocking on reads */
261     fdflags = fcntl(filedes[0], F_GETFL, 0);
262     if (fdflags != -1) fdflags = fcntl(filedes[0], F_SETFL,
263                                        fdflags | O_NONBLOCK);
264     if (fdflags == -1)
265         fatalf(1, "unable to set non-blocking: %m");
266     /* we don't want the services to be able to read from it */
267     fdflags = fcntl(filedes[0], F_GETFD, 0);
268     if (fdflags != -1) fdflags = fcntl(filedes[0], F_SETFD,
269                                        fdflags | FD_CLOEXEC);
270     if (fdflags == -1)
271         fatalf(1, "unable to set close-on-exec: %m");
272 }
273 
cyrus_cap_bind(int socket,struct sockaddr * addr,socklen_t length)274 static int cyrus_cap_bind(int socket, struct sockaddr *addr, socklen_t length)
275 {
276     int r;
277 
278     set_caps(BEFORE_BIND, /*is_master*/1);
279     r = bind(socket, addr, length);
280     set_caps(AFTER_BIND, /*is_master*/1);
281 
282     return r;
283 }
284 
285 /* Return a new 'centry', by malloc'ing it. */
centry_alloc(void)286 static struct centry *centry_alloc(void)
287 {
288     struct centry *t;
289 
290     t = xzmalloc(sizeof(*t));
291     t->si = SERVICE_NONE;
292     gettimeofday(&t->spawntime, NULL);
293     t->sighuptime = (time_t)-1;
294 
295     return t;
296 }
297 
centry_set_name(struct centry * c,const char * type,const char * name,const char * path)298 static void centry_set_name(struct centry *c, const char *type,
299                             const char *name, const char *path)
300 {
301     free(c->desc);
302     if (name && path)
303         c->desc = strconcat("type:", type, " name:", name, " path:", path, NULL);
304     else
305         c->desc = strconcat("type:", type, NULL);
306 }
307 
centry_describe(const struct centry * c,pid_t pid)308 static char *centry_describe(const struct centry *c, pid_t pid)
309 {
310     struct buf desc = BUF_INITIALIZER;
311 
312     if (!c) {
313         buf_appendcstr(&desc, "unknown process");
314     }
315     else {
316         struct timeval now;
317         gettimeofday(&now, NULL);
318         buf_printf(&desc, "process %s age:%.3fs",
319                    c->desc, timesub(&c->spawntime, &now));
320     }
321     buf_printf(&desc, " pid:%d", (int)pid);
322     return buf_release(&desc);
323 }
324 
325 /* free a centry */
centry_free(struct centry * c)326 static void centry_free(struct centry *c)
327 {
328     free(c->desc);
329     free(c);
330 }
331 
332 /* add a centry to the global table of all
333  * centries, using the given pid as the key */
centry_add(struct centry * c,pid_t p)334 static void centry_add(struct centry *c, pid_t p)
335 {
336     c->pid = p;
337     c->next = ctable[p % child_table_size];
338     ctable[p % child_table_size] = c;
339 }
340 
341 /* find a centry in the global table, using the
342  * given pid as the key.  Returns NULL if not
343  * found. */
centry_find(pid_t p)344 static struct centry *centry_find(pid_t p)
345 {
346     struct centry *c;
347 
348     c = ctable[p % child_table_size];
349     while (c && c->pid != p)
350         c = c->next;
351     return c;
352 }
353 
centry_set_state(struct centry * c,enum sstate state)354 static void centry_set_state(struct centry *c, enum sstate state)
355 {
356     c->service_state = state;
357     if (state == SERVICE_STATE_DEAD)
358         c->janitor_deadline = time(NULL) + 2;
359 }
360 
361 /*
362  * Parse the "listen" parameter as one of the forms:
363  *
364  * port
365  * hostname ':' port
366  * ipv4-address
367  * ipv4-address ':' port
368  * '[' ipv4-address ']'
369  * '[' ipv4-address ']' ':' port
370  * '[' ipv6-address ']'
371  * '[' ipv6-address ']' ':' port
372  *
373  * Returns 0 on success with one or more of *@hostp and *@portp set
374  * to new strings which must be free()d by the caller, or -1 on error.
375  */
parse_inet_listen(const char * listen,char ** hostp,char ** portp)376 static int parse_inet_listen(const char *listen,
377                              char **hostp, char **portp)
378 {
379     const char *cp;
380 
381     *portp = NULL;
382     *hostp = NULL;
383     if (listen[0] == '[') {
384         cp = strrchr(listen, ']');
385         if (!cp)
386             return -1;
387         cp++;
388         if (*cp == ':') {
389             if (!cp[1])
390                 return -1;
391             *hostp = xstrndup(listen+1, (cp - listen - 2));
392             *portp = xstrdup(cp+1);
393             return 0;
394         }
395         if (!*cp) {
396             *hostp = xstrndup(listen+1, (cp - listen - 2));
397             /* no port specified */
398             return 0;
399         }
400         return -1;
401     }
402 
403     cp = strrchr(listen, ':');
404     if (cp) {
405         if (!cp[1])
406             return -1;
407         *hostp = xstrndup(listen, (cp - listen));
408         *portp = xstrdup(cp+1);
409         return 0;
410     }
411 
412     /* no host specified */
413     *portp = xstrdup(listen);
414     return 0;
415 }
416 
verify_service_file(const strarray_t * filename)417 static int verify_service_file(const strarray_t *filename)
418 {
419     char path[PATH_MAX];
420     struct stat statbuf;
421 
422     get_executable(path, sizeof(path), filename);
423     if (stat(path, &statbuf)) return 0;
424     if (! S_ISREG(statbuf.st_mode)) return 0;
425     return statbuf.st_mode & S_IXUSR;
426 }
427 
service_forget_exec(struct service * s)428 static void service_forget_exec(struct service *s)
429 {
430     if (s->exec) {
431         /* Only free the service info on the primary */
432         if (s->associate == 0) {
433             strarray_free(s->exec);
434         }
435         s->exec = NULL;
436     }
437 }
438 
service_add(const struct service * proto)439 static struct service *service_add(const struct service *proto)
440 {
441     struct service *s;
442 
443     if (nservices == allocservices) {
444         if (allocservices > SERVICE_MAX - 5)
445             fatal("out of service structures, please restart", EX_UNAVAILABLE);
446         Services = xrealloc(Services,
447                            (allocservices+=5) * sizeof(struct service));
448     }
449     s = &Services[nservices++];
450 
451     if (proto)
452         memcpy(s, proto, sizeof(struct service));
453     else {
454         memset(s, 0, sizeof(struct service));
455         s->socket = -1;
456         s->stat[0] = -1;
457         s->stat[1] = -1;
458     }
459 
460     return s;
461 }
462 
service_create(struct service * s,int is_startup)463 static void service_create(struct service *s, int is_startup)
464 {
465     struct service service0, service;
466     struct addrinfo hints, *res0, *res;
467     int error, nsocket = 0;
468     struct sockaddr_un sunsock;
469     mode_t oldumask;
470     int on = 1;
471     int res0_is_local = 0;
472     int r;
473 
474     if (s->associate > 0)
475         return;                 /* service is already activated */
476 
477     if (!s->listen)
478         return;                 /* service is a daemon, no listener */
479 
480     if (!s->name)
481         fatal("Serious software bug found: service_create() called on unnamed service!",
482                 EX_SOFTWARE);
483 
484     if (s->listen[0] == '/') { /* unix socket */
485         if (strlen(s->listen) >= sizeof(sunsock.sun_path)) {
486             syslog(LOG_ERR, "invalid listen '%s' (too long), disabling %s",
487                    s->listen, s->name);
488             service_forget_exec(s);
489             return;
490         }
491         res0_is_local = 1;
492         res0 = (struct addrinfo *)xzmalloc(sizeof(struct addrinfo));
493         res0->ai_flags = AI_PASSIVE;
494         res0->ai_family = PF_UNIX;
495         if(!strcmp(s->proto, "tcp")) {
496             res0->ai_socktype = SOCK_STREAM;
497         } else {
498             /* udp */
499             res0->ai_socktype = SOCK_DGRAM;
500         }
501         res0->ai_addr = (struct sockaddr *)&sunsock;
502         res0->ai_addrlen = sizeof(sunsock.sun_family) + strlen(s->listen) + 1;
503 #ifdef SIN6_LEN
504         res0->ai_addrlen += sizeof(sunsock.sun_len);
505         sunsock.sun_len = res0->ai_addrlen;
506 #endif
507         sunsock.sun_family = AF_UNIX;
508 
509         int r = snprintf(sunsock.sun_path, sizeof(sunsock.sun_path), "%s", s->listen);
510         if (r < 0 || (size_t) r >= sizeof(sunsock.sun_path)) {
511             /* belt and suspenders */
512             fatal("Serious software bug found: "
513                   "over-long listen path not detected earlier!",
514                   EX_SOFTWARE);
515         }
516         unlink(s->listen);
517     } else { /* inet socket */
518         char *port;
519         char *listen_addr;
520 
521         memset(&hints, 0, sizeof(hints));
522         hints.ai_flags = AI_PASSIVE;
523         if (!strcmp(s->proto, "tcp")) {
524             hints.ai_family = PF_UNSPEC;
525             hints.ai_socktype = SOCK_STREAM;
526         } else if (!strcmp(s->proto, "tcp4")) {
527             hints.ai_family = PF_INET;
528             hints.ai_socktype = SOCK_STREAM;
529 #ifdef PF_INET6
530         } else if (!strcmp(s->proto, "tcp6")) {
531             hints.ai_family = PF_INET6;
532             hints.ai_socktype = SOCK_STREAM;
533 #endif
534         } else if (!strcmp(s->proto, "udp")) {
535             hints.ai_family = PF_UNSPEC;
536             hints.ai_socktype = SOCK_DGRAM;
537         } else if (!strcmp(s->proto, "udp4")) {
538             hints.ai_family = PF_INET;
539             hints.ai_socktype = SOCK_DGRAM;
540 #ifdef PF_INET6
541         } else if (!strcmp(s->proto, "udp6")) {
542             hints.ai_family = PF_INET6;
543             hints.ai_socktype = SOCK_DGRAM;
544 #endif
545         } else {
546             syslog(LOG_INFO, "invalid proto '%s', disabling %s",
547                    s->proto, s->name);
548             service_forget_exec(s);
549             return;
550         }
551 
552         if (parse_inet_listen(s->listen, &listen_addr, &port) < 0) {
553             syslog(LOG_ERR, "invalid listen '%s', disabling %s",
554                    s->listen, s->name);
555             service_forget_exec(s);
556             return;
557         }
558 
559         error = getaddrinfo(listen_addr, port, &hints, &res0);
560 
561         free(listen_addr);
562         free(port);
563 
564         if (error) {
565             syslog(LOG_INFO, "%s, disabling %s", gai_strerror(error), s->name);
566             service_forget_exec(s);
567             return;
568         }
569     }
570 
571     memcpy(&service0, s, sizeof(struct service));
572 
573     for (res = res0; res; res = res->ai_next) {
574         if (s->socket >= 0) {
575             memcpy(&service, &service0, sizeof(struct service));
576             s = &service;
577         }
578 
579         s->family = res->ai_family;
580         switch (s->family) {
581         case AF_UNIX:   s->familyname = "unix"; break;
582         case AF_INET:   s->familyname = "ipv4"; break;
583         case AF_INET6:  s->familyname = "ipv6"; break;
584         default:        s->familyname = "unknown"; break;
585         }
586 
587         if (verbose > 2) {
588             syslog(LOG_DEBUG, "activating service %s/%s",
589                 s->name, s->familyname);
590         }
591 
592         s->socket = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
593         if (s->socket < 0) {
594             int e = errno;
595             if (is_startup && config_getswitch(IMAPOPT_MASTER_BIND_ERRORS_FATAL)) {
596                 struct buf buf = BUF_INITIALIZER;
597                 buf_printf(&buf, "unable to open %s/%s socket: %s",
598                                  s->name, s->familyname, strerror(e));
599                 fatal(buf_cstring(&buf), EX_UNAVAILABLE);
600             }
601 
602             syslog(LOG_ERR, "unable to open %s/%s socket: %m",
603                 s->name, s->familyname);
604             continue;
605         }
606 
607         /* allow reuse of address */
608         r = setsockopt(s->socket, SOL_SOCKET, SO_REUSEADDR,
609                        (void *) &on, sizeof(on));
610         if (r < 0) {
611             syslog(LOG_ERR, "unable to setsocketopt(SO_REUSEADDR) service %s/%s: %m",
612                 s->name, s->familyname);
613         }
614 #if defined(IPV6_V6ONLY) && !(defined(__FreeBSD__) && __FreeBSD__ < 3)
615         if (res->ai_family == AF_INET6) {
616             r = setsockopt(s->socket, IPPROTO_IPV6, IPV6_V6ONLY,
617                            (void *) &on, sizeof(on));
618             if (r < 0) {
619                 syslog(LOG_ERR, "unable to setsocketopt(IPV6_V6ONLY) service %s/%s: %m",
620                     s->name, s->familyname);
621             }
622         }
623 #endif
624 
625         /* set IP ToS if supported */
626 #if defined(SOL_IP) && defined(IP_TOS)
627         if (s->family == AF_INET || s->family == AF_INET6) {
628             r = setsockopt(s->socket, SOL_IP, IP_TOS,
629                            (void *) &config_qosmarking,
630                            sizeof(config_qosmarking));
631             if (r < 0) {
632                 syslog(LOG_WARNING,
633                        "unable to setsocketopt(IP_TOS) service %s/%s: %m",
634                        s->name, s->familyname);
635             }
636         }
637 #endif
638 
639         oldumask = umask((mode_t) 0); /* for linux */
640         r = cyrus_cap_bind(s->socket, res->ai_addr, res->ai_addrlen);
641         umask(oldumask);
642         if (r < 0) {
643             int e = errno;
644             if (is_startup && config_getswitch(IMAPOPT_MASTER_BIND_ERRORS_FATAL)) {
645                 struct buf buf = BUF_INITIALIZER;
646                 buf_printf(&buf, "unable to bind to %s/%s socket: %s",
647                                  s->name, s->familyname, strerror(e));
648                 fatal(buf_cstring(&buf), EX_UNAVAILABLE);
649             }
650 
651             syslog(LOG_ERR, "unable to bind to %s/%s socket: %m",
652                 s->name, s->familyname);
653             xclose(s->socket);
654             continue;
655         }
656 
657         if (s->listen[0] == '/') { /* unix socket */
658             /* for DUX, where this isn't the default.
659                (harmlessly fails on some systems) */
660             chmod(s->listen, (mode_t) 0777);
661         }
662 
663         if ((!strcmp(s->proto, "tcp") || !strcmp(s->proto, "tcp4")
664              || !strcmp(s->proto, "tcp6"))
665             && listen(s->socket, listen_queue_backlog) < 0) {
666             int e = errno;
667             if (is_startup && config_getswitch(IMAPOPT_MASTER_BIND_ERRORS_FATAL)) {
668                 struct buf buf = BUF_INITIALIZER;
669                 buf_printf(&buf, "unable to listen to %s/%s socket: %s",
670                                  s->name, s->familyname, strerror(e));
671                 fatal(buf_cstring(&buf), EX_UNAVAILABLE);
672             }
673 
674             syslog(LOG_ERR, "unable to listen to %s/%s socket: %m",
675                 s->name, s->familyname);
676             xclose(s->socket);
677             continue;
678         }
679 
680         s->ready_workers = 0;
681         s->associate = nsocket;
682 
683         get_statsock(s->stat);
684 
685         if (s == &service)
686             service_add(s);
687         nsocket++;
688     }
689     if (res0) {
690         if(res0_is_local)
691             free(res0);
692         else
693             freeaddrinfo(res0);
694     }
695     if (nsocket <= 0) {
696         syslog(LOG_ERR, "unable to create %s listener socket: %m", s->name);
697         service_forget_exec(s);
698         return;
699     }
700 }
701 
decode_wait_status(struct centry * c,pid_t pid,int status)702 static int decode_wait_status(struct centry *c, pid_t pid, int status)
703 {
704     int failed = 0;
705     char *desc = centry_describe(c, pid);
706 
707     if (WIFEXITED(status)) {
708         if (!WEXITSTATUS(status)) {
709             syslog(LOG_DEBUG, "%s exited normally", desc);
710         }
711         else if (WEXITSTATUS(status) == EX_TEMPFAIL) {
712             syslog(LOG_DEBUG, "%s was killed", desc);
713         }
714         else {
715             syslog(LOG_ERR, "%s exited, status %d",
716                    desc, WEXITSTATUS(status));
717             failed = 1;
718         }
719     }
720 
721     if (WIFSIGNALED(status)) {
722         const char *signame = strsignal(WTERMSIG(status));
723         if (!signame)
724             signame = "unknown signal";
725 #ifdef WCOREDUMP
726         syslog(LOG_ERR, "%s signaled to death by signal %d (%s%s)",
727                desc, WTERMSIG(status), signame,
728                WCOREDUMP(status) ? ", core dumped" : "");
729         failed = WCOREDUMP(status) ? 2 : 1;
730 #else
731         syslog(LOG_ERR, "%s signaled to death by %s %d",
732                desc, signame, WTERMSIG(status));
733         failed = 1;
734 #endif
735     }
736     free(desc);
737     return failed;
738 }
739 
run_startup(const char * name,const strarray_t * cmd)740 static void run_startup(const char *name, const strarray_t *cmd)
741 {
742     pid_t pid;
743     int status;
744     struct centry *c;
745     char path[PATH_MAX];
746 
747     get_executable(path, sizeof(path), cmd);
748 
749     switch (pid = fork()) {
750     case -1:
751         fatalf(1, "can't fork process to run startup: %m");
752         break;
753 
754     case 0:
755         /* Child - Release our pidfile lock. */
756         xclose(pidfd);
757 
758         set_caps(AFTER_FORK, /*is_master*/1);
759 
760         child_sighandler_setup();
761 
762         syslog(LOG_DEBUG, "about to exec %s", path);
763         execv(path, cmd->data);
764         fatalf(EX_OSERR, "can't exec %s for startup: %m", path);
765 
766     default: /* parent */
767         if (waitpid(pid, &status, 0) < 0) {
768             syslog(LOG_ERR, "waitpid(): %m");
769             return;
770         }
771         c = centry_alloc();
772         centry_set_name(c, "START", name, path);
773         if (decode_wait_status(c, pid, status))
774             fatal("can't run startup", 1);
775         centry_free(c);
776         break;
777     }
778 }
779 
fcntl_unset(int fd,int flag)780 static void fcntl_unset(int fd, int flag)
781 {
782     int fdflags = fcntl(fd, F_GETFD, 0);
783     if (fdflags != -1) fdflags = fcntl(STATUS_FD, F_SETFD,
784                                        fdflags & ~flag);
785     if (fdflags == -1) {
786         syslog(LOG_ERR, "fcntl(): unable to unset %d: %m", flag);
787     }
788 }
789 
service_is_fork_limited(struct service * s)790 static int service_is_fork_limited(struct service *s)
791 {
792 /* The longest period for which we will ignore the service */
793 #define FORKRATE_INTERVAL   0.4 /* seconds */
794 /* How much the forkrate estimator decays, as a proportion, per second */
795 #define FORKRATE_ALPHA          0.5     /* per second */
796     struct timeval now;
797     double interval;
798 
799     if (!s->maxforkrate)
800         return 0;
801 
802     gettimeofday(&now, 0);
803     interval = timesub(&s->last_interval_start, &now);
804     /* update our fork rate */
805     if (interval > 0.0) {
806         double f = pow(FORKRATE_ALPHA, interval);
807         s->forkrate = f * s->forkrate +
808                       (1.0-f) * (s->interval_forks/interval);
809         s->interval_forks = 0;
810         s->last_interval_start = now;
811     }
812     else if (interval < 0.0) {
813         /*
814          * NTP or similar moved the time-of-day clock backwards more
815          * than the interval we asked to be delayed for.  Given that, we
816          * have no basis for updating forkrate and must reset our rate
817          * estimating state.  Let's just hope this is a rare event.
818          */
819         s->interval_forks = 0;
820         s->last_interval_start = now;
821         syslog(LOG_WARNING, "time of day clock went backwards");
822     }
823 
824     /* If we've been busy lately, we will refuse to fork! */
825     /* (We schedule a wakeup call for sometime soon though to be
826      * sure that we don't wait to do the fork that is required forever! */
827     if ((unsigned int)s->forkrate >= s->maxforkrate) {
828         struct event *evt = (struct event *) xzmalloc(sizeof(struct event));
829 
830         evt->name = xstrdup("forkrate wakeup call");
831         evt->mark = now;
832         timeval_add_double(&evt->mark, FORKRATE_INTERVAL);
833 
834         schedule_event(evt);
835 
836         return 1;
837     }
838     return 0;
839 }
840 
spawn_service(int si)841 static void spawn_service(int si)
842 {
843     pid_t p;
844     int i;
845     char path[PATH_MAX];
846     static char name_env[100], name_env2[100], name_env3[100];
847     struct centry *c;
848     struct service *s = &Services[si];
849 
850     if (!s->name) {
851         fatal("Serious software bug found: spawn_service() called on unnamed service!",
852                 EX_SOFTWARE);
853     }
854 
855     if (service_is_fork_limited(s))
856         return;
857 
858     get_executable(path, sizeof(path), s->exec);
859 
860     switch (p = fork()) {
861     case -1:
862         syslog(LOG_ERR, "can't fork process to run service %s/%s: %m",
863             s->name, s->familyname);
864         break;
865 
866     case 0:
867         if (verbose > 2) {
868             syslog(LOG_DEBUG, "forked process to run service %s/%s",
869                 s->name, s->familyname);
870         }
871 
872         /* Child - Release our pidfile lock. */
873         xclose(pidfd);
874 
875         set_caps(AFTER_FORK, /*is_master*/1);
876 
877         child_sighandler_setup();
878 
879         if (s->listen) {
880             if (dup2(s->stat[1], STATUS_FD) < 0) {
881                 syslog(LOG_ERR, "can't duplicate status fd: %m");
882                 exit(1);
883             }
884             if (dup2(s->socket, LISTEN_FD) < 0) {
885                 syslog(LOG_ERR, "can't duplicate listener fd: %m");
886                 exit(1);
887             }
888 
889             fcntl_unset(STATUS_FD, FD_CLOEXEC);
890             fcntl_unset(LISTEN_FD, FD_CLOEXEC);
891         }
892         else {
893             snprintf(name_env3, sizeof(name_env3), "CYRUS_ISDAEMON=1");
894             putenv(name_env3);
895         }
896 #ifdef HAVE_SETRLIMIT
897         if (s->maxfds) limit_fds(s->maxfds);
898 #endif
899 
900         /* close all listeners */
901         for (i = 0; i < nservices; i++) {
902             xclose(Services[i].socket);
903             xclose(Services[i].stat[0]);
904             xclose(Services[i].stat[1]);
905         }
906 
907         syslog(LOG_DEBUG, "about to exec %s", path);
908 
909         /* add service name to environment */
910         snprintf(name_env, sizeof(name_env), "CYRUS_SERVICE=%s", s->name);
911         putenv(name_env);
912         snprintf(name_env2, sizeof(name_env2), "CYRUS_ID=%d", s->associate);
913         putenv(name_env2);
914 
915         execv(path, s->exec->data);
916         syslog(LOG_ERR, "couldn't exec %s: %m", path);
917         exit(EX_OSERR);
918 
919     default:                    /* parent */
920         s->ready_workers++;
921         s->interval_forks++;
922         s->nforks++;
923         s->nactive++;
924 
925         /* add to child table */
926         c = centry_alloc();
927         centry_set_name(c, s->listen ? "SERVICE" : "DAEMON", s->name, path);
928         c->si = si;
929         centry_set_state(c, SERVICE_STATE_READY);
930         centry_add(c, p);
931         break;
932     }
933 
934 }
935 
schedule_event(struct event * a)936 static void schedule_event(struct event *a)
937 {
938     struct event *ptr;
939 
940     if (! a->name)
941         fatal("Serious software bug found: schedule_event() called on unnamed event!",
942                 EX_SOFTWARE);
943 
944     if (!schedule || timesub(&schedule->mark, &a->mark) < 0.0) {
945         a->next = schedule;
946         schedule = a;
947 
948         return;
949     }
950     for (ptr = schedule;
951          ptr->next && timesub(&a->mark, &ptr->next->mark) <= 0.0;
952          ptr = ptr->next) ;
953 
954     /* insert a */
955     a->next = ptr->next;
956     ptr->next = a;
957 }
958 
spawn_schedule(struct timeval now)959 static void spawn_schedule(struct timeval now)
960 {
961     struct event *a, *b;
962     int i;
963     char path[PATH_MAX];
964     pid_t p;
965     struct centry *c;
966 
967     a = NULL;
968     /* update schedule accordingly */
969     while (schedule && timesub(&now, &schedule->mark) <= 0.0) {
970         /* delete from schedule, insert into a */
971         struct event *ptr = schedule;
972 
973         /* delete */
974         schedule = schedule->next;
975 
976         /* insert */
977         ptr->next = a;
978         a = ptr;
979     }
980 
981     /* run all events */
982     while (a && a != schedule) {
983         /* if a->exec is NULL, we just used the event to wake up,
984          * so we actually don't need to exec anything at the moment */
985         if(a->exec) {
986             get_executable(path, sizeof(path), a->exec);
987             switch (p = fork()) {
988             case -1:
989                 syslog(LOG_CRIT,
990                        "can't fork process to run event %s", a->name);
991                 break;
992 
993             case 0:
994                 /* Child - Release our pidfile lock. */
995                 xclose(pidfd);
996 
997                 set_caps(AFTER_FORK, /*is_master*/1);
998 
999                 /* close all listeners */
1000                 for (i = 0; i < nservices; i++) {
1001                     xclose(Services[i].socket);
1002                     xclose(Services[i].stat[0]);
1003                     xclose(Services[i].stat[1]);
1004                 }
1005 
1006                 syslog(LOG_DEBUG, "about to exec %s", path);
1007                 execv(path, a->exec->data);
1008                 syslog(LOG_ERR, "can't exec %s on schedule: %m", path);
1009                 exit(EX_OSERR);
1010                 break;
1011 
1012             default:
1013                 /* we don't wait for it to complete */
1014 
1015                 /* add to child table */
1016                 c = centry_alloc();
1017                 centry_set_name(c, "EVENT", a->name, path);
1018                 centry_set_state(c, SERVICE_STATE_READY);
1019                 centry_add(c, p);
1020                 break;
1021             }
1022         } /* a->exec */
1023 
1024         /* reschedule as needed */
1025         b = a->next;
1026         if (a->period) {
1027             if(a->periodic) {
1028                 a->mark = now;
1029                 a->mark.tv_sec += a->period;
1030             } else {
1031                 struct tm *tm;
1032                 int delta;
1033                 /* Daily Event */
1034                 while (timesub(&now, &a->mark) <= 0.0)
1035                     a->mark.tv_sec += a->period;
1036                 /* check for daylight savings fuzz... */
1037                 tm = localtime(&a->mark.tv_sec);
1038                 if (tm->tm_hour != a->hour || tm->tm_min != a->min) {
1039                     /* calculate the same time on the new day */
1040                     tm->tm_hour = a->hour;
1041                     tm->tm_min = a->min;
1042                     delta = mktime(tm) - a->mark.tv_sec;
1043                     /* bring it within half a period either way */
1044                     while (delta > (a->period/2)) delta -= a->period;
1045                     while (delta < -(a->period/2)) delta += a->period;
1046                     /* update the time */
1047                     a->mark.tv_sec += delta;
1048                     /* and let us know about the change */
1049                     syslog(LOG_NOTICE, "timezone shift for %s - altering schedule by %d seconds", a->name, delta);
1050                 }
1051             }
1052             /* reschedule a */
1053             schedule_event(a);
1054         } else {
1055             event_free(a);
1056         }
1057         /* examine next event */
1058         a = b;
1059     }
1060 }
1061 
reap_child(void)1062 static void reap_child(void)
1063 {
1064     int status;
1065     pid_t pid;
1066     struct centry *c;
1067     struct service *s;
1068     int failed;
1069 
1070     while ((pid = waitpid((pid_t) -1, &status, WNOHANG)) > 0) {
1071 
1072         /* account for the child */
1073         c = centry_find(pid);
1074 
1075         failed = decode_wait_status(c, pid, status);
1076 
1077         if (c) {
1078             s = ((c->si) != SERVICE_NONE) ? &Services[c->si] : NULL;
1079 
1080             /* paranoia */
1081             switch (c->service_state) {
1082             case SERVICE_STATE_READY:
1083             case SERVICE_STATE_BUSY:
1084             case SERVICE_STATE_UNKNOWN:
1085             case SERVICE_STATE_DEAD:
1086                 break;
1087             default:
1088                 syslog(LOG_CRIT,
1089                        "service %s/%s pid %d in ILLEGAL STATE: exited. Serious "
1090                        "software bug or memory corruption detected!",
1091                        s ? SERVICEPARAM(s->name) : "unknown",
1092                        s ? SERVICEPARAM(s->familyname) : "unknown", pid);
1093                 centry_set_state(c, SERVICE_STATE_UNKNOWN);
1094             }
1095             if (s) {
1096                 /* update counters for known services */
1097                 switch (c->service_state) {
1098                 case SERVICE_STATE_READY:
1099                     s->nactive--;
1100                     s->ready_workers--;
1101                     if (!in_shutdown && failed) {
1102                         time_t now = time(NULL);
1103 
1104                         syslog(LOG_WARNING,
1105                                "service %s/%s pid %d in READY state: "
1106                                "terminated abnormally",
1107                                SERVICEPARAM(s->name),
1108                                SERVICEPARAM(s->familyname), pid);
1109                         if (now - s->lastreadyfail > MAX_READY_FAIL_INTERVAL) {
1110                             s->nreadyfails = 0;
1111                         }
1112                         s->lastreadyfail = now;
1113                         if (++s->nreadyfails >= MAX_READY_FAILS && s->exec) {
1114                             syslog(LOG_ERR, "too many failures for "
1115                                    "service %s/%s, disabling until next SIGHUP",
1116                                    SERVICEPARAM(s->name),
1117                                    SERVICEPARAM(s->familyname));
1118                             service_forget_exec(s);
1119                             xclose(s->socket);
1120                         }
1121                     }
1122                     break;
1123 
1124                 case SERVICE_STATE_DEAD:
1125                     /* uh? either we got duplicate signals, or we are now MT */
1126                     syslog(LOG_WARNING,
1127                            "service %s/%s pid %d in DEAD state: "
1128                            "receiving duplicate signals",
1129                            SERVICEPARAM(s->name),
1130                            SERVICEPARAM(s->familyname), pid);
1131                     break;
1132 
1133                 case SERVICE_STATE_BUSY:
1134                     s->nactive--;
1135                     if (!in_shutdown && failed) {
1136                         syslog(LOG_DEBUG,
1137                                "service %s/%s pid %d in BUSY state: "
1138                                "terminated abnormally",
1139                                SERVICEPARAM(s->name),
1140                                SERVICEPARAM(s->familyname), pid);
1141                     }
1142                     break;
1143 
1144                 case SERVICE_STATE_UNKNOWN:
1145                     s->nactive--;
1146                     syslog(LOG_WARNING,
1147                            "service %s/%s pid %d in UNKNOWN state: exited",
1148                            SERVICEPARAM(s->name),
1149                            SERVICEPARAM(s->familyname), pid);
1150                     break;
1151                 default:
1152                     /* Shouldn't get here */
1153                     break;
1154                 }
1155             } else {
1156                 /* children from spawn_schedule (events) or
1157                  * children of services removed by reread_conf() */
1158                 if (c->service_state != SERVICE_STATE_READY) {
1159                     syslog(LOG_WARNING,
1160                            "unknown service pid %d in state %d: exited "
1161                            "(maybe using a service as an event, "
1162                            "or a service was removed by SIGHUP?)",
1163                            pid, c->service_state);
1164                 }
1165             }
1166             centry_set_state(c, SERVICE_STATE_DEAD);
1167         } else {
1168             /* Are we multithreaded now? we don't know this child */
1169             syslog(LOG_ERR,
1170                    "received SIGCHLD from unknown child pid %d, ignoring",
1171                    pid);
1172             /* FIXME: is this something we should take lightly? */
1173         }
1174         if (verbose && c && (c->si != SERVICE_NONE))
1175             syslog(LOG_DEBUG, "service %s/%s now has %d ready workers",
1176                     SERVICEPARAM(Services[c->si].name),
1177                     SERVICEPARAM(Services[c->si].familyname),
1178                     Services[c->si].ready_workers);
1179     }
1180 }
1181 
init_janitor(struct timeval now)1182 static void init_janitor(struct timeval now)
1183 {
1184     struct event *evt = (struct event *) xzmalloc(sizeof(struct event));
1185 
1186     janitor_mark = now;
1187     janitor_position = 0;
1188 
1189     evt->name = xstrdup("janitor periodic wakeup call");
1190     evt->period = 10;
1191     evt->periodic = 1;
1192     evt->mark = janitor_mark;
1193     schedule_event(evt);
1194 }
1195 
child_janitor(struct timeval now)1196 static void child_janitor(struct timeval now)
1197 {
1198     int i;
1199     struct centry **p;
1200     struct centry *c;
1201 
1202     /* Estimate the number of entries to clean up in this sweep */
1203     if (now.tv_sec > janitor_mark.tv_sec + 1) {
1204         /* overflow protection */
1205         i = child_table_size;
1206     } else {
1207         double n;
1208 
1209         n = child_table_size * janitor_frequency * timesub(&janitor_mark, &now);
1210         if (n < child_table_size) {
1211             i = n;
1212         } else {
1213             i = child_table_size;
1214         }
1215     }
1216 
1217     while (i-- > 0) {
1218         p = &ctable[janitor_position++];
1219         janitor_position = janitor_position % child_table_size;
1220         while (*p) {
1221             c = *p;
1222             if (c->service_state == SERVICE_STATE_DEAD) {
1223                 if (c->janitor_deadline < now.tv_sec) {
1224                     *p = c->next;
1225                     centry_free(c);
1226                 } else {
1227                     p = &((*p)->next);
1228                 }
1229             } else {
1230                 time_t delay = (c->sighuptime != (time_t)-1) ?
1231                     time(NULL) - c->sighuptime : 0;
1232 
1233                 if (delay >= 30) {
1234                     /* client not yet logged out ? */
1235                     struct service *s = ((c->si) != SERVICE_NONE) ?
1236                         &Services[c->si] : NULL;
1237 
1238                     syslog(LOG_INFO, "service %s/%s pid %d in state %d has not "
1239                         "yet been recycled since SIGHUP was sent (%ds ago)",
1240                         s ? SERVICEPARAM(s->name) : "unknown",
1241                         s ? SERVICEPARAM(s->familyname) : "unknown",
1242                         c->pid, c->service_state, (int)delay);
1243 
1244                     /* no need to log it more than once */
1245                     c->sighuptime = (time_t)-1;
1246                 }
1247                 p = &((*p)->next);
1248             }
1249         }
1250     }
1251 }
1252 
1253 /* Allow a clean shutdown on SIGQUIT, SIGTERM or SIGINT */
1254 static volatile sig_atomic_t gotsigquit = 0;
1255 
sigquit_handler(int sig)1256 static void sigquit_handler(int sig __attribute__((unused)))
1257 {
1258     gotsigquit = 1;
1259 }
1260 
begin_shutdown(void)1261 static void begin_shutdown(void)
1262 {
1263     /* Set a flag so main loop knows to shut down when
1264        all children have exited.  Note, we will be called
1265        twice as we send SIGTERM to our own process group. */
1266     if (in_shutdown)
1267         return;
1268     in_shutdown = 1;
1269     syslog(LOG_INFO, "attempting clean shutdown on signal");
1270 
1271     /* send our process group a SIGTERM */
1272     if (kill(0, SIGTERM) < 0) {
1273         syslog(LOG_ERR, "begin_shutdown: kill(0, SIGTERM): %m");
1274     }
1275 }
1276 
1277 static volatile sig_atomic_t gotsigchld = 0;
1278 
sigchld_handler(int sig)1279 static void sigchld_handler(int sig __attribute__((unused)))
1280 {
1281     gotsigchld = 1;
1282 }
1283 
1284 static volatile int gotsighup = 0;
1285 
sighup_handler(int sig)1286 static void sighup_handler(int sig __attribute__((unused)))
1287 {
1288     gotsighup = 1;
1289 }
1290 
sigalrm_handler(int sig)1291 static void sigalrm_handler(int sig __attribute__((unused)))
1292 {
1293     return;
1294 }
1295 
sighandler_setup(void)1296 static void sighandler_setup(void)
1297 {
1298     struct sigaction action;
1299     sigset_t siglist;
1300 
1301     memset(&siglist, 0, sizeof(siglist));
1302     sigemptyset(&siglist);
1303     sigaddset(&siglist, SIGHUP);
1304     sigaddset(&siglist, SIGALRM);
1305     sigaddset(&siglist, SIGQUIT);
1306     sigaddset(&siglist, SIGTERM);
1307     sigaddset(&siglist, SIGINT);
1308     sigaddset(&siglist, SIGCHLD);
1309     sigprocmask(SIG_UNBLOCK, &siglist, NULL);
1310 
1311     memset(&action, 0, sizeof(action));
1312     sigemptyset(&action.sa_mask);
1313 
1314     action.sa_handler = sighup_handler;
1315 #ifdef SA_RESTART
1316     action.sa_flags |= SA_RESTART;
1317 #endif
1318     if (sigaction(SIGHUP, &action, NULL) < 0)
1319         fatalf(1, "unable to install signal handler for SIGHUP: %m");
1320 
1321     action.sa_handler = sigalrm_handler;
1322     if (sigaction(SIGALRM, &action, NULL) < 0)
1323         fatalf(1, "unable to install signal handler for SIGALRM: %m");
1324 
1325     /* Allow a clean shutdown on any of SIGQUIT, SIGINT or SIGTERM */
1326     action.sa_handler = sigquit_handler;
1327     if (sigaction(SIGQUIT, &action, NULL) < 0)
1328         fatalf(1, "unable to install signal handler for SIGQUIT: %m");
1329     if (sigaction(SIGTERM, &action, NULL) < 0)
1330         fatalf(1, "unable to install signal handler for SIGTERM: %m");
1331     if (sigaction(SIGINT, &action, NULL) < 0)
1332         fatalf(1, "unable to install signal handler for SIGINT: %m");
1333 
1334     action.sa_flags |= SA_NOCLDSTOP;
1335     action.sa_handler = sigchld_handler;
1336     if (sigaction(SIGCHLD, &action, NULL) < 0)
1337         fatalf(1, "unable to install signal handler for SIGCHLD: %m");
1338 
1339 #if HAVE_PSELECT
1340     /* block SIGCHLD, and set up pselect_sigmask so SIGCHLD
1341      * will be unblocked again inside pselect().  Ditto SIGQUIT.  */
1342     sigemptyset(&siglist);
1343     sigaddset(&siglist, SIGCHLD);
1344     sigaddset(&siglist, SIGQUIT);
1345     sigaddset(&siglist, SIGINT);
1346     sigaddset(&siglist, SIGTERM);
1347     sigprocmask(SIG_BLOCK, &siglist, &pselect_sigmask);
1348 #endif
1349 }
1350 
child_sighandler_setup(void)1351 static void child_sighandler_setup(void)
1352 {
1353 #if HAVE_PSELECT
1354     /*
1355      * We need to explicitly reset our SIGQUIT handler to the default
1356      * action.  This happens at execv() time, but in the small window
1357      * between fork() and execv() any SIGQUIT signal delivered will be
1358      * caught, and the gotsigquit flag set, but that flag is then
1359      * completely ignored.  Ditto SIGINT and SIGTERM.
1360      */
1361     struct sigaction action;
1362 
1363     memset(&action, 0, sizeof(action));
1364     sigemptyset(&action.sa_mask);
1365     action.sa_handler = SIG_DFL;
1366     if (sigaction(SIGQUIT, &action, NULL) < 0) {
1367         syslog(LOG_ERR, "unable to remove signal handler for SIGQUIT: %m");
1368         exit(EX_TEMPFAIL);
1369     }
1370     if (sigaction(SIGINT, &action, NULL) < 0) {
1371         syslog(LOG_ERR, "unable to remove signal handler for SIGINT: %m");
1372         exit(EX_TEMPFAIL);
1373     }
1374     if (sigaction(SIGTERM, &action, NULL) < 0) {
1375         syslog(LOG_ERR, "unable to remove signal handler for SIGTERM: %m");
1376         exit(EX_TEMPFAIL);
1377     }
1378 
1379     /* Unblock SIGCHLD et al in the child */
1380     sigprocmask(SIG_SETMASK, &pselect_sigmask, NULL);
1381 #endif
1382 }
1383 
1384 /*
1385  * Receives a message from a service.
1386  *
1387  * Returns zero if all goes well
1388  * 1 if no msg available
1389  * 2 if bad message received (incorrectly sized)
1390  * -1 on error (errno set)
1391  *
1392  * TODO: should use retry_read() which has almost the
1393  * exact same semantics apart from the return value.
1394  */
read_msg(int fd,struct notify_message * msg)1395 static int read_msg(int fd, struct notify_message *msg)
1396 {
1397     ssize_t r = 0;
1398     size_t off = 0;
1399     int s = sizeof(struct notify_message);
1400 
1401     while (s > 0) {
1402         do
1403             r = read(fd, ((char *)msg) + off, s);
1404         while ((r == -1) && (errno == EINTR));
1405         if (r <= 0) break;
1406         s -= r;
1407         off += r;
1408     }
1409     if ( ((r == 0) && (off == 0)) ||
1410          ((r == -1) && (errno == EAGAIN)) )
1411         return 1;
1412     if (r == -1) return -1;
1413     if (s != 0) return 2;
1414     return 0;
1415 }
1416 
process_msg(int si,struct notify_message * msg)1417 static void process_msg(int si, struct notify_message *msg)
1418 {
1419     struct centry *c;
1420     /* si must NOT point to an invalid service */
1421     struct service *s = &Services[si];
1422 
1423     c = centry_find(msg->service_pid);
1424 
1425     /* Did we find it? */
1426     if (!c) {
1427         /* If we don't know about the child, that means it has expired from
1428          * the child list, due to large message delivery delays.  This is
1429          * indeed possible, although it is rare (Debian bug report).
1430          *
1431          * Note that this analysis depends on master's single-threaded
1432          * nature */
1433         syslog(LOG_WARNING,
1434                 "service %s/%s pid %d: receiving messages from long dead children",
1435                SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), msg->service_pid);
1436         /* re-add child to list */
1437         c = centry_alloc();
1438         centry_set_name(c, "ZOMBIE", NULL, NULL);
1439         c->si = si;
1440         centry_set_state(c, SERVICE_STATE_DEAD);
1441         centry_add(c, msg->service_pid);
1442     }
1443 
1444     /* paranoia */
1445     if (si != c->si) {
1446         syslog(LOG_ERR,
1447                "service %s/%s pid %d: changing from service %s/%s due to received message",
1448                SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid,
1449                ((c->si != SERVICE_NONE) ? SERVICEPARAM(Services[c->si].name) : "unknown"),
1450                ((c->si != SERVICE_NONE) ? SERVICEPARAM(Services[c->si].familyname) : "unknown"));
1451         c->si = si;
1452     }
1453     switch (c->service_state) {
1454     case SERVICE_STATE_UNKNOWN:
1455         syslog(LOG_WARNING,
1456                "service %s/%s pid %d in UNKNOWN state: processing message 0x%x",
1457                SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid, msg->message);
1458         break;
1459     case SERVICE_STATE_READY:
1460     case SERVICE_STATE_BUSY:
1461     case SERVICE_STATE_DEAD:
1462         break;
1463     default:
1464         syslog(LOG_CRIT,
1465                "service %s/%s pid %d in ILLEGAL state: detected. Serious software bug or memory corruption uncloaked while processing message 0x%x from child!",
1466                SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid, msg->message);
1467         centry_set_state(c, SERVICE_STATE_UNKNOWN);
1468         break;
1469     }
1470 
1471     /* process message, according to state machine */
1472     switch (msg->message) {
1473     case MASTER_SERVICE_AVAILABLE:
1474         switch (c->service_state) {
1475         case SERVICE_STATE_READY:
1476             /* duplicate message? */
1477             syslog(LOG_WARNING,
1478                    "service %s/%s pid %d in READY state: sent available message but it is already ready",
1479                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1480             break;
1481 
1482         case SERVICE_STATE_UNKNOWN:
1483             /* since state is unknown, error in non-DoS way, i.e.
1484              * we don't increment ready_workers */
1485             syslog(LOG_DEBUG,
1486                    "service %s/%s pid %d in UNKNOWN state: now available and in READY state",
1487                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1488             centry_set_state(c, SERVICE_STATE_READY);
1489             break;
1490 
1491         case SERVICE_STATE_BUSY:
1492             if (verbose)
1493                 syslog(LOG_DEBUG,
1494                        "service %s/%s pid %d in BUSY state: now available and in READY state",
1495                        SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1496             centry_set_state(c, SERVICE_STATE_READY);
1497             s->ready_workers++;
1498             break;
1499 
1500         case SERVICE_STATE_DEAD:
1501             /* echoes from the past... just ignore */
1502             break;
1503 
1504         default:
1505             /* Shouldn't get here */
1506             break;
1507         }
1508         break;
1509 
1510     case MASTER_SERVICE_UNAVAILABLE:
1511         switch (c->service_state) {
1512         case SERVICE_STATE_BUSY:
1513             /* duplicate message? */
1514             syslog(LOG_WARNING,
1515                    "service %s/%s pid %d in BUSY state: sent unavailable message but it is already busy",
1516                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1517             break;
1518 
1519         case SERVICE_STATE_UNKNOWN:
1520             syslog(LOG_DEBUG,
1521                    "service %s/%s pid %d in UNKNOWN state: now unavailable and in BUSY state",
1522                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1523             centry_set_state(c, SERVICE_STATE_BUSY);
1524             break;
1525 
1526         case SERVICE_STATE_READY:
1527             if (verbose)
1528                 syslog(LOG_DEBUG,
1529                        "service %s/%s pid %d in READY state: now unavailable and in BUSY state",
1530                        SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1531             centry_set_state(c, SERVICE_STATE_BUSY);
1532             s->ready_workers--;
1533             break;
1534 
1535         case SERVICE_STATE_DEAD:
1536             /* echoes from the past... just ignore */
1537             break;
1538 
1539         default:
1540             /* Shouldn't get here */
1541             break;
1542         }
1543         break;
1544 
1545     case MASTER_SERVICE_CONNECTION:
1546         switch (c->service_state) {
1547         case SERVICE_STATE_BUSY:
1548             s->nconnections++;
1549             if (verbose)
1550                 syslog(LOG_DEBUG,
1551                        "service %s/%s pid %d in BUSY state: now serving connection",
1552                        SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1553             break;
1554 
1555         case SERVICE_STATE_UNKNOWN:
1556             s->nconnections++;
1557             centry_set_state(c, SERVICE_STATE_BUSY);
1558             syslog(LOG_DEBUG,
1559                    "service %s/%s pid %d in UNKNOWN state: now in BUSY state and serving connection",
1560                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1561             break;
1562 
1563         case SERVICE_STATE_READY:
1564             syslog(LOG_ERR,
1565                    "service %s/%s pid %d in READY state: reported new connection, forced to BUSY state",
1566                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1567             /* be resilient on face of a bogon source, so lets err to the side
1568              * of non-denial-of-service */
1569             centry_set_state(c, SERVICE_STATE_BUSY);
1570             s->nconnections++;
1571             s->ready_workers--;
1572             break;
1573 
1574         case SERVICE_STATE_DEAD:
1575             /* echoes from the past... do the accounting */
1576             s->nconnections++;
1577             break;
1578 
1579         default:
1580             /* Shouldn't get here */
1581             break;
1582         }
1583         break;
1584 
1585     case MASTER_SERVICE_CONNECTION_MULTI:
1586         switch (c->service_state) {
1587         case SERVICE_STATE_READY:
1588             s->nconnections++;
1589             if (verbose)
1590                 syslog(LOG_DEBUG,
1591                        "service %s/%s pid %d in READY state: serving one more multi-threaded connection",
1592                        SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1593             break;
1594 
1595         case SERVICE_STATE_BUSY:
1596             syslog(LOG_ERR,
1597                    "service %s/%s pid %d in BUSY state: serving one more multi-threaded connection, forced to READY state",
1598                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1599             /* be resilient on face of a bogon source, so lets err to the side
1600              * of non-denial-of-service */
1601             centry_set_state(c, SERVICE_STATE_READY);
1602             s->nconnections++;
1603             s->ready_workers++;
1604             break;
1605 
1606         case SERVICE_STATE_UNKNOWN:
1607             s->nconnections++;
1608             centry_set_state(c, SERVICE_STATE_READY);
1609             syslog(LOG_ERR,
1610                    "service %s/%s pid %d in UNKNOWN state: serving one more multi-threaded connection, forced to READY state",
1611                    SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1612             break;
1613 
1614         case SERVICE_STATE_DEAD:
1615             /* echoes from the past... do the accounting */
1616             s->nconnections++;
1617             break;
1618 
1619         default:
1620             /* Shouldn't get here */
1621             break;
1622         }
1623         break;
1624 
1625     default:
1626         syslog(LOG_CRIT, "service %s/%s pid %d: Software bug: unrecognized message 0x%x",
1627                SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid, msg->message);
1628         break;
1629     }
1630 
1631     if (verbose)
1632         syslog(LOG_DEBUG, "service %s/%s now has %d ready workers",
1633                SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), s->ready_workers);
1634 }
1635 
add_start(const char * name,struct entry * e,void * rock)1636 static void add_start(const char *name, struct entry *e,
1637                       void *rock __attribute__((unused)))
1638 {
1639     const char *cmd = masterconf_getstring(e, "cmd", "");
1640     strarray_t *tok;
1641 
1642     if (!strcmp(cmd,""))
1643         fatalf(EX_CONFIG, "unable to find command for %s", name);
1644 
1645     tok = strarray_split(cmd, NULL, 0);
1646     run_startup(name, tok);
1647     strarray_free(tok);
1648 }
1649 
add_daemon(const char * name,struct entry * e,void * rock)1650 static void add_daemon(const char *name, struct entry *e, void *rock)
1651 {
1652     int ignore_err = rock ? 1 : 0;
1653     char *cmd = xstrdup(masterconf_getstring(e, "cmd", ""));
1654     rlim_t maxfds = (rlim_t) masterconf_getint(e, "maxfds", 0);
1655     int maxforkrate = masterconf_getint(e, "maxforkrate", 0);
1656     int reconfig = 0;
1657     int i;
1658 
1659     if (maxforkrate == 0) maxforkrate = 10; /* reasonable safety */
1660 
1661     if (!strcmp(cmd, "")) {
1662         char buf[256];
1663         snprintf(buf, sizeof(buf),
1664                  "unable to find command or port for service '%s'", name);
1665 
1666         if (ignore_err) {
1667             syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1668             goto done;
1669         }
1670 
1671         fatal(buf, EX_CONFIG);
1672     }
1673 
1674     /* see if we have an existing entry that can be reused */
1675     for (i = 0; i < nservices; i++) {
1676         /* skip non-primary instances */
1677         if (Services[i].associate > 0)
1678             continue;
1679 
1680         if (!strcmpsafe(Services[i].name, name) && Services[i].exec) {
1681             /* we have duplicate service names in the config file */
1682             char buf[256];
1683             snprintf(buf, sizeof(buf), "multiple entries for service '%s'", name);
1684 
1685             if (ignore_err) {
1686                 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1687                 goto done;
1688             }
1689 
1690             fatal(buf, EX_CONFIG);
1691         }
1692 
1693         /* must have empty/same service name, listen and proto */
1694         if (!Services[i].name || !strcmp(Services[i].name, name))
1695             break;
1696     }
1697 
1698     if (i == nservices) {
1699         /* we don't have an existing one, so create a new service */
1700         struct service *s = service_add(NULL);
1701         gettimeofday(&s->last_interval_start, 0);
1702     }
1703     else reconfig = 1;
1704 
1705     if (!Services[i].name) Services[i].name = xstrdup(name);
1706 
1707     strarray_free(Services[i].exec);
1708     Services[i].exec = strarray_split(cmd, NULL, 0);
1709 
1710     /* is this daemon actually there? */
1711     if (!verify_service_file(Services[i].exec)) {
1712         fatalf(EX_CONFIG,
1713                  "cannot find executable for daemon '%s'", name);
1714         /* if it is not, we're misconfigured, die. */
1715     }
1716 
1717     Services[i].maxforkrate = maxforkrate;
1718     Services[i].maxfds = maxfds;
1719     Services[i].babysit = 1;
1720     Services[i].max_workers = 1;
1721     Services[i].desired_workers = 1;
1722     Services[i].familyname = "daemon";
1723 
1724     if (verbose > 2)
1725         syslog(LOG_DEBUG, "%s: daemon '%s' (%s, %d)",
1726                reconfig ? "reconfig" : "add",
1727                Services[i].name, cmd,
1728                (int) Services[i].maxfds);
1729 
1730 done:
1731     free(cmd);
1732     return;
1733 }
1734 
add_service(const char * name,struct entry * e,void * rock)1735 static void add_service(const char *name, struct entry *e, void *rock)
1736 {
1737     int ignore_err = rock ? 1 : 0;
1738     char *cmd = xstrdup(masterconf_getstring(e, "cmd", ""));
1739     int prefork = masterconf_getint(e, "prefork", 0);
1740     int babysit = masterconf_getswitch(e, "babysit", 0);
1741     int maxforkrate = masterconf_getint(e, "maxforkrate", 0);
1742     char *listen = xstrdup(masterconf_getstring(e, "listen", ""));
1743     char *proto = xstrdup(masterconf_getstring(e, "proto", "tcp"));
1744     char *max = xstrdup(masterconf_getstring(e, "maxchild", "-1"));
1745     rlim_t maxfds = (rlim_t) masterconf_getint(e, "maxfds", 0);
1746     int reconfig = 0;
1747     int i, j;
1748 
1749     if(babysit && prefork == 0) prefork = 1;
1750     if(babysit && maxforkrate == 0) maxforkrate = 10; /* reasonable safety */
1751 
1752     if (!strcmp(cmd,"") || !strcmp(listen,"")) {
1753         char buf[256];
1754         snprintf(buf, sizeof(buf),
1755                  "unable to find command or port for service '%s'", name);
1756 
1757         if (ignore_err) {
1758             syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1759             goto done;
1760         }
1761 
1762         fatal(buf, EX_CONFIG);
1763     }
1764 
1765     /* see if we have an existing entry that can be reused */
1766     for (i = 0; i < nservices; i++) {
1767         /* skip non-primary instances */
1768         if (Services[i].associate > 0)
1769             continue;
1770 
1771         if (!strcmpsafe(Services[i].name, name) && Services[i].exec) {
1772             /* we have duplicate service names in the config file */
1773             char buf[256];
1774             snprintf(buf, sizeof(buf), "multiple entries for service '%s'", name);
1775 
1776             if (ignore_err) {
1777                 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1778                 goto done;
1779             }
1780 
1781             fatal(buf, EX_CONFIG);
1782         }
1783 
1784         /* must have empty/same service name, listen and proto */
1785         if ((!Services[i].name || !strcmp(Services[i].name, name)) &&
1786             (!Services[i].listen || !strcmp(Services[i].listen, listen)) &&
1787             (!Services[i].proto || !strcmp(Services[i].proto, proto)))
1788             break;
1789     }
1790 
1791     if (i == nservices) {
1792         /* either we don't have an existing entry or we are changing
1793          * the port parameters, so create a new service
1794          */
1795         struct service *s = service_add(NULL);
1796         gettimeofday(&s->last_interval_start, 0);
1797     }
1798     else if (Services[i].listen) reconfig = 1;
1799 
1800     if (!Services[i].name) Services[i].name = xstrdup(name);
1801     if (Services[i].listen) free(Services[i].listen);
1802     Services[i].listen = listen;
1803     listen = NULL; /* avoid freeing it */
1804     if (Services[i].proto) free(Services[i].proto);
1805     Services[i].proto = proto;
1806     proto = NULL; /* avoid freeing it */
1807 
1808     strarray_free(Services[i].exec);
1809     Services[i].exec = strarray_split(cmd, NULL, 0);
1810 
1811     /* is this service actually there? */
1812     if (!verify_service_file(Services[i].exec)) {
1813         fatalf(EX_CONFIG,
1814                  "cannot find executable for service '%s'", name);
1815         /* if it is not, we're misconfigured, die. */
1816     }
1817 
1818     Services[i].maxforkrate = maxforkrate;
1819     Services[i].maxfds = maxfds;
1820 
1821     if (!strcmp(Services[i].proto, "tcp") ||
1822         !strcmp(Services[i].proto, "tcp4") ||
1823         !strcmp(Services[i].proto, "tcp6")) {
1824         Services[i].desired_workers = prefork;
1825         Services[i].babysit = babysit;
1826         Services[i].max_workers = atoi(max);
1827         if (Services[i].max_workers < 0) {
1828             Services[i].max_workers = INT_MAX;
1829         }
1830     } else {
1831         /* udp */
1832         if (prefork > 1) prefork = 1;
1833         Services[i].desired_workers = prefork;
1834         Services[i].max_workers = 1;
1835     }
1836 
1837     if (reconfig) {
1838         /* reconfiguring an existing service, update any other instances */
1839         for (j = 0; j < nservices; j++) {
1840             if (Services[j].associate > 0 && Services[j].listen &&
1841                 Services[j].name && !strcmp(Services[j].name, name)) {
1842                 Services[j].maxforkrate = Services[i].maxforkrate;
1843                 Services[j].exec = Services[i].exec;
1844                 Services[j].desired_workers = Services[i].desired_workers;
1845                 Services[j].babysit = Services[i].babysit;
1846                 Services[j].max_workers = Services[i].max_workers;
1847             }
1848         }
1849     }
1850 
1851     if (verbose > 2)
1852         syslog(LOG_DEBUG, "%s: service '%s' (%s, %s:%s, %d, %d, %d)",
1853                reconfig ? "reconfig" : "add",
1854                Services[i].name, cmd,
1855                Services[i].proto, Services[i].listen,
1856                Services[i].desired_workers,
1857                Services[i].max_workers,
1858                (int) Services[i].maxfds);
1859 
1860 done:
1861     free(cmd);
1862     free(listen);
1863     free(proto);
1864     free(max);
1865     return;
1866 }
1867 
add_event(const char * name,struct entry * e,void * rock)1868 static void add_event(const char *name, struct entry *e, void *rock)
1869 {
1870     int ignore_err = rock ? 1 : 0;
1871     /* Note: masterconf_getstring() shares a static buffer with
1872      * masterconf_getint() so we *must* strdup here */
1873     char *cmd = xstrdup(masterconf_getstring(e, "cmd", ""));
1874     int period = 60 * masterconf_getint(e, "period", 0);
1875     int at = masterconf_getint(e, "at", -1), hour, min;
1876     struct timeval now;
1877     struct event *evt;
1878 
1879     gettimeofday(&now, 0);
1880 
1881     if (!strcmp(cmd,"")) {
1882         char buf[256];
1883         snprintf(buf, sizeof(buf),
1884                  "unable to find command or port for event '%s'", name);
1885 
1886         if (ignore_err) {
1887             syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1888             free(cmd);
1889             return;
1890         }
1891 
1892         fatal(buf, EX_CONFIG);
1893     }
1894 
1895     evt = (struct event *) xzmalloc(sizeof(struct event));
1896     evt->name = xstrdup(name);
1897 
1898     if (at >= 0 && ((hour = at / 100) <= 23) && ((min = at % 100) <= 59)) {
1899         struct tm *tm = localtime(&now.tv_sec);
1900 
1901         period = 86400; /* 24 hours */
1902         evt->periodic = 0;
1903         evt->hour = hour;
1904         evt->min = min;
1905         tm->tm_hour = hour;
1906         tm->tm_min = min;
1907         tm->tm_sec = 0;
1908         evt->mark.tv_sec = mktime(tm);
1909         evt->mark.tv_usec = 0;
1910         if (timesub(&now, &evt->mark) < 0.0) {
1911             /* already missed it, so schedule for next day */
1912             evt->mark.tv_sec += period;
1913         }
1914     }
1915     else {
1916         evt->periodic = 1;
1917         evt->mark = now;
1918     }
1919     evt->period = period;
1920 
1921     evt->exec = strarray_splitm(cmd, NULL, 0);
1922 
1923     schedule_event(evt);
1924 }
1925 
1926 #ifdef HAVE_SETRLIMIT
1927 
1928 #ifdef RLIMIT_NOFILE
1929 # define RLIMIT_NUMFDS RLIMIT_NOFILE
1930 #else
1931 # ifdef RLIMIT_OFILE
1932 #  define RLIMIT_NUMFDS RLIMIT_OFILE
1933 # endif
1934 #endif
limit_fds(rlim_t x)1935 static void limit_fds(rlim_t x)
1936 {
1937     struct rlimit rl;
1938 
1939 #ifdef HAVE_GETRLIMIT
1940     if (!getrlimit(RLIMIT_NUMFDS, &rl)) {
1941         if (x != RLIM_INFINITY && rl.rlim_max != RLIM_INFINITY && x > rl.rlim_max) {
1942             syslog(LOG_WARNING,
1943                    "limit_fds: requested %" PRIu64 ", but capped to %" PRIu64,
1944                    (uint64_t) x, (uint64_t) rl.rlim_max);
1945         }
1946         rl.rlim_cur = (x == RLIM_INFINITY || x > rl.rlim_max) ? rl.rlim_max : x;
1947     }
1948     else
1949 #endif /* HAVE_GETRLIMIT */
1950     {
1951         rl.rlim_cur = rl.rlim_max = x;
1952     }
1953 
1954     if (verbose > 1) {
1955         syslog(LOG_DEBUG, "set maximum file descriptors to " RLIM_T_FMT "/" RLIM_T_FMT,
1956                rl.rlim_cur, rl.rlim_max);
1957     }
1958 
1959     if (setrlimit(RLIMIT_NUMFDS, &rl) < 0) {
1960         syslog(LOG_ERR,
1961                "setrlimit: Unable to set file descriptors limit to " RLIM_T_FMT ": %m",
1962                rl.rlim_cur);
1963     }
1964 }
1965 #endif /* HAVE_SETRLIMIT */
1966 
1967 /* minimal-dependency prometheus text report */
init_prom_report(struct timeval now)1968 static void init_prom_report(struct timeval now)
1969 {
1970     struct buf buf = BUF_INITIALIZER;
1971     struct event *evt;
1972     const char *tmp;
1973 
1974     prom_enabled = config_getswitch(IMAPOPT_PROMETHEUS_ENABLED);
1975     prom_frequency = config_getduration(IMAPOPT_PROMETHEUS_UPDATE_FREQ, 's');
1976 
1977     if (prom_frequency < 1) prom_enabled = 0;
1978     if (!prom_enabled) return;
1979 
1980     prom_prev_report.tv_sec = now.tv_sec - prom_frequency; /* next report asap */
1981     prom_prev_report.tv_usec = 0;
1982 
1983     if ((tmp = config_getstring(IMAPOPT_PROMETHEUS_STATS_DIR))) {
1984         if (tmp[0] == '/' && tmp[1] != '\0') {
1985             buf_setcstr(&buf, tmp);
1986             if (buf.s[buf.len-1] != '/')
1987                 buf_putc(&buf, '/');
1988             buf_appendcstr(&buf, FNAME_PROM_MASTER_REPORT);
1989         }
1990     }
1991     else if ((tmp = config_getstring(IMAPOPT_CONFIGDIRECTORY))) {
1992         buf_setcstr(&buf, tmp);
1993         buf_appendcstr(&buf, FNAME_PROM_STATS_DIR);
1994         buf_putc(&buf, '/');
1995         buf_appendcstr(&buf, FNAME_PROM_MASTER_REPORT);
1996     }
1997 
1998     if (!buf_len(&buf)) {
1999         syslog(LOG_NOTICE, "couldn't find somewhere to write prometheus report to"
2000                            " - disabling master prometheus report until next reload");
2001         prom_enabled = 0;
2002         buf_free(&buf);
2003         return;
2004     }
2005 
2006     if (prom_report_fname) free(prom_report_fname);
2007     prom_report_fname = buf_release(&buf);
2008     cyrus_mkdir(prom_report_fname, 0755);
2009 
2010     evt = xzmalloc(sizeof(*evt));
2011     evt->name = xstrdup("master prometheus report periodic wakeup call");
2012     evt->period = prom_frequency;
2013     evt->periodic = 1;
2014     evt->mark = now;
2015     schedule_event(evt);
2016 
2017     syslog(LOG_DEBUG, "updating %s every %d seconds", prom_report_fname, prom_frequency);
2018 }
2019 
do_prom_report(struct timeval now)2020 static void do_prom_report(struct timeval now)
2021 {
2022     struct buf report = BUF_INITIALIZER;
2023     int fd, i, r;
2024     int64_t last_updated;
2025 
2026     if (!prom_enabled || timesub(&prom_prev_report, &now) + 0.5 < prom_frequency)
2027         return;
2028 
2029     /* open and grab the lock -- but if we would block, just skip this time */
2030     fd = open(prom_report_fname, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
2031     if (fd == -1) {
2032         syslog(LOG_ERR, "open(%s): %m - %s",
2033                         prom_report_fname,
2034                         "disabling master prometheus report until next reload");
2035         prom_enabled = 0;
2036         return;
2037     }
2038     r = lock_setlock(fd, /*ex*/ 1, /*nb*/ 1, prom_report_fname);
2039     if (r == -1) {
2040         if (errno != EWOULDBLOCK) {
2041             syslog(LOG_ERR, "lock_setlock(%s): %m - %s",
2042                             prom_report_fname,
2043                             "disabling master prometheus report until next reload");
2044             prom_enabled = 0;
2045         }
2046         return;
2047     }
2048 
2049     /* okay, now prepare the report */
2050     syslog(LOG_DEBUG, "updating prometheus report for master process");
2051     last_updated = now_ms();
2052 
2053     buf_printf(&report, "# HELP %s %s\n",
2054                         "cyrus_master_ready_workers",
2055                         "The number of ready workers");
2056     buf_appendcstr(&report, "# TYPE cyrus_master_ready_workers gauge\n");
2057     for (i = 0; i < nservices; i++) {
2058         const struct service *s = &Services[i];
2059         buf_printf(&report, "cyrus_master_ready_workers{service=\"%s\",family=\"%s\"}",
2060                             s->name, s->familyname);
2061         buf_printf(&report, " %d %" PRId64 "\n",
2062                             s->ready_workers, last_updated);
2063     }
2064 
2065     buf_printf(&report, "# HELP %s %s\n",
2066                         "cyrus_master_forks_total",
2067                         "The number of children spawned");
2068     buf_appendcstr(&report, "# TYPE cyrus_master_forks_total counter\n");
2069     for (i = 0; i < nservices; i++) {
2070         const struct service *s = &Services[i];
2071         buf_printf(&report, "cyrus_master_forks_total{service=\"%s\",family=\"%s\"}",
2072                             s->name, s->familyname);
2073         buf_printf(&report, " %d %" PRId64 "\n",
2074                             s->nforks, last_updated);
2075     }
2076 
2077     buf_printf(&report, "# HELP %s %s\n",
2078                         "cyrus_master_active_children",
2079                         "The number of children servicing clients");
2080     buf_appendcstr(&report, "# TYPE cyrus_master_active_children gauge\n");
2081     for (i = 0; i < nservices; i++) {
2082         const struct service *s = &Services[i];
2083         buf_printf(&report, "cyrus_master_active_children{service=\"%s\",family=\"%s\"}",
2084                             s->name, s->familyname);
2085         buf_printf(&report, " %d %" PRId64 "\n",
2086                             s->nactive, last_updated);
2087     }
2088 
2089     buf_printf(&report, "# HELP %s %s\n",
2090                         "cyrus_master_max_children",
2091                         "The maximum number of child processes");
2092     buf_appendcstr(&report, "# TYPE cyrus_master_max_children gauge\n");
2093     for (i = 0; i < nservices; i++) {
2094         const struct service *s = &Services[i];
2095         buf_printf(&report, "cyrus_master_max_children{service=\"%s\",family=\"%s\"}",
2096                             s->name, s->familyname);
2097         buf_printf(&report, " %d %" PRId64 "\n",
2098                             s->max_workers, last_updated);
2099     }
2100 
2101     /* XXX what is nconnections? */
2102 
2103     buf_printf(&report, "# HELP %s %s\n",
2104                         "cyrus_master_forks_per_second",
2105                         "The rate at which we're spawning children");
2106     buf_appendcstr(&report, "# TYPE cyrus_master_forks_per_second gauge\n");
2107     for (i = 0; i < nservices; i++) {
2108         const struct service *s = &Services[i];
2109         buf_printf(&report, "cyrus_master_forks_per_second{service=\"%s\",family=\"%s\"}",
2110                             s->name, s->familyname);
2111         buf_printf(&report, " %g %" PRId64 "\n",
2112                             s->forkrate, last_updated);
2113     }
2114 
2115     buf_printf(&report, "# HELP %s %s\n",
2116                         "cyrus_master_max_forks_per_second",
2117                         "The maximum rate at which we will spawn children");
2118     buf_appendcstr(&report, "# TYPE cyrus_master_max_forks_per_second gauge\n");
2119     for (i = 0; i < nservices; i++) {
2120         const struct service *s = &Services[i];
2121         buf_printf(&report, "cyrus_master_max_forks_per_second{service=\"%s\",family=\"%s\"}",
2122                             s->name, s->familyname);
2123         buf_printf(&report, " %u %" PRId64 "\n",
2124                             s->maxforkrate, last_updated);
2125     }
2126 
2127     buf_printf(&report, "# HELP %s %s\n",
2128                         "cyrus_master_ready_fails_total",
2129                         "The number of failures in READY state");
2130     buf_appendcstr(&report, "# TYPE cyrus_master_ready_fails_total counter\n");
2131     for (i = 0; i < nservices; i++) {
2132         const struct service *s = &Services[i];
2133         buf_printf(&report, "cyrus_master_ready_fails_total{service=\"%s\",family=\"%s\"}",
2134                             s->name, s->familyname);
2135         buf_printf(&report, " %d %" PRId64 "\n",
2136                             s->nreadyfails, last_updated);
2137     }
2138 
2139     /* write it out */
2140     retry_write(fd, buf_cstring(&report), buf_len(&report));
2141     ftruncate(fd, buf_len(&report));
2142     lock_unlock(fd, prom_report_fname);
2143     close(fd);
2144 
2145     prom_prev_report = now;
2146 
2147     buf_free(&report);
2148 }
2149 
reread_conf(struct timeval now)2150 static void reread_conf(struct timeval now)
2151 {
2152     int i,j;
2153     struct event *ptr;
2154     struct centry *c;
2155 
2156     /* disable all services -
2157        they will be re-enabled if they appear in config file */
2158     for (i = 0; i < nservices; i++) service_forget_exec(&Services[i]);
2159 
2160     /* read services */
2161     masterconf_getsection("SERVICES", &add_service, (void*) 1);
2162     masterconf_getsection("DAEMON", &add_daemon, (void *)1);
2163 
2164     for (i = 0; i < nservices; i++) {
2165         /* Send SIGHUP to all children:
2166          *  - for services being added, there are still no children
2167          *  - for services being disabled, we need to terminate the children
2168          *  - otherwise (remaining services) we want to recycle children
2169          * Note that for services being disabled, it is important to first
2170          * signal them before shutting down their socket.
2171          */
2172         for (j = 0 ; j < child_table_size ; j++ ) {
2173             c = ctable[j];
2174             while (c != NULL) {
2175                 if ((c->si == i) &&
2176                     (c->service_state != SERVICE_STATE_DEAD)) {
2177                     kill(c->pid, SIGHUP);
2178                     c->sighuptime = time(NULL);
2179                 }
2180                 c = c->next;
2181             }
2182         }
2183 
2184         if (!Services[i].exec && (Services[i].socket >= 0)) {
2185             /* cleanup newly disabled services */
2186 
2187             if (verbose > 2)
2188                 syslog(LOG_DEBUG, "disable: service %s/%s socket %d pipe %d %d",
2189                        Services[i].name, Services[i].familyname,
2190                        Services[i].socket,
2191                        Services[i].stat[0], Services[i].stat[1]);
2192 
2193             /* Only free the service info on the primary */
2194             if(Services[i].associate == 0) {
2195                 free(Services[i].listen);
2196                 free(Services[i].proto);
2197             }
2198             Services[i].listen = NULL;
2199             Services[i].proto = NULL;
2200             Services[i].desired_workers = 0;
2201 
2202             /* close all listeners */
2203             shutdown(Services[i].socket, SHUT_RDWR);
2204             xclose(Services[i].socket);
2205         }
2206         else if (Services[i].exec && (Services[i].socket < 0)) {
2207             /* initialize new services */
2208 
2209             service_create(&Services[i], 0);
2210             if (verbose > 2)
2211                 syslog(LOG_DEBUG, "init: service %s/%s socket %d pipe %d %d",
2212                        Services[i].name, Services[i].familyname,
2213                        Services[i].socket,
2214                        Services[i].stat[0], Services[i].stat[1]);
2215         }
2216     }
2217 
2218     /* remove existing events */
2219     while (schedule) {
2220         ptr = schedule;
2221         schedule = schedule->next;
2222         event_free(ptr);
2223     }
2224     schedule = NULL;
2225 
2226     /* read events */
2227     masterconf_getsection("EVENTS", &add_event, (void*) 1);
2228 
2229     /* reinit child janitor */
2230     init_janitor(now);
2231 
2232     /* reinit prom report */
2233     init_prom_report(now);
2234 
2235     /* send some feedback to admin */
2236     syslog(LOG_NOTICE,
2237             "Services reconfigured. %d out of %d (max %d) services structures are now in use",
2238             nservices, allocservices, SERVICE_MAX);
2239 }
2240 
main(int argc,char ** argv)2241 int main(int argc, char **argv)
2242 {
2243     static const char lock_suffix[] = ".lock";
2244 
2245     const char *pidfile = MASTER_PIDFILE;
2246     char *pidfile_lock = NULL;
2247 
2248     int startup_pipe[2] = { -1, -1 };
2249     int pidlock_fd = -1;
2250 
2251     int i, opt, close_std = 1, daemon_mode = 0;
2252     const char *error_log = NULL;
2253     extern char *optarg;
2254 
2255     char *alt_config = NULL;
2256 
2257     int fd;
2258     fd_set rfds;
2259     char *p = NULL;
2260     int r = 0;
2261 
2262     struct timeval now;
2263 
2264     p = getenv("CYRUS_VERBOSE");
2265     if (p) verbose = atoi(p) + 1;
2266     while ((opt = getopt(argc, argv, "C:L:M:p:l:Ddj:vV")) != EOF) {
2267         switch (opt) {
2268         case 'C': /* alt imapd.conf file */
2269             alt_config = optarg;
2270             break;
2271         case 'M': /* alt cyrus.conf file */
2272             MASTER_CONFIG_FILENAME = optarg;
2273             break;
2274         case 'l':
2275             /* user defined listen queue backlog */
2276             listen_queue_backlog = atoi(optarg);
2277             break;
2278         case 'p':
2279             /* Set the pidfile name */
2280             pidfile = optarg;
2281             break;
2282         case 'd':
2283             /* Daemon Mode */
2284             daemon_mode = 1;
2285             break;
2286         case 'D':
2287             /* Debug Mode */
2288             close_std = 0;
2289             break;
2290         case 'L':
2291             /* error log */
2292             error_log = optarg;
2293             break;
2294         case 'j':
2295             /* Janitor frequency */
2296             janitor_frequency = atoi(optarg);
2297             if(janitor_frequency < 1)
2298                 fatal("The janitor period must be at least 1 second", EX_CONFIG);
2299             break;
2300         case 'v':
2301             verbose++;
2302             break;
2303         case 'V':
2304             /* print version information and exit */
2305             printf("%s %s\n", PACKAGE_NAME, CYRUS_VERSION);
2306             return 0;
2307         default:
2308             break;
2309         }
2310     }
2311 
2312     if (daemon_mode && !close_std)
2313         fatal("Unable to be both debug and daemon mode", EX_CONFIG);
2314 
2315     /* we reserve fds for children to communicate with us, so they
2316        better be available. */
2317     for (fd = STATUS_FD; fd <= LISTEN_FD; fd++) {
2318         close(fd);
2319         if (dup(0) != fd) fatalf(2, "couldn't dup fd 0: %m");
2320     }
2321 
2322     masterconf_init("master", alt_config);
2323 
2324     if (close_std || error_log) {
2325         /* close stdin/out/err */
2326         for (fd = 0; fd < 3; fd++) {
2327             const char *file = (error_log && fd > 0 ?
2328                                 error_log : "/dev/null");
2329             int mode = (fd > 0 ? O_WRONLY : O_RDWR) |
2330                        (error_log && fd > 0 ? O_CREAT|O_APPEND : 0);
2331             close(fd);
2332             if (open(file, mode, 0666) != fd)
2333                 fatalf(2, "couldn't open %s: %m", file);
2334         }
2335     }
2336 
2337     /* Pidfile Algorithm in Daemon Mode.  This is a little subtle because
2338      * we want to ensure that we can report an error to our parent if the
2339      * child fails to lock the pidfile.
2340      *
2341      * [A] Create/lock pidfile.lock.  If locked, exit(failure).
2342      * [A] Create a pipe
2343      * [A] Fork [B]
2344      * [A] Block on reading exit code from pipe
2345      * [B] Create/lock pidfile.  If locked, write failure code to pipe and
2346      *     exit(failure)
2347      * [B] write pid to pidfile
2348      * [B] write success code to pipe & finish starting up
2349      * [A] unlink pidfile.lock and exit(code read from pipe)
2350      *
2351      */
2352     if(daemon_mode) {
2353         /* Daemonize */
2354         pid_t pid = -1;
2355 
2356         pidfile_lock = strconcat(pidfile, lock_suffix, (char *)NULL);
2357 
2358         pidlock_fd = open(pidfile_lock, O_CREAT|O_TRUNC|O_RDWR, 0644);
2359         if(pidlock_fd == -1) {
2360             syslog(LOG_ERR, "can't open pidfile lock: %s (%m)", pidfile_lock);
2361             exit(EX_OSERR);
2362         } else {
2363             if(lock_nonblocking(pidlock_fd, pidfile)) {
2364                 syslog(LOG_ERR, "can't get exclusive lock on %s",
2365                        pidfile_lock);
2366                 exit(EX_TEMPFAIL);
2367             }
2368         }
2369 
2370         if(pipe(startup_pipe) == -1) {
2371             syslog(LOG_ERR, "can't create startup pipe (%m)");
2372             exit(EX_OSERR);
2373         }
2374 
2375         /* Set the current working directory where cores can go to die. */
2376         const char *path = config_getstring(IMAPOPT_CONFIGDIRECTORY);
2377         if (path == NULL) {
2378                 path = getenv("TMPDIR");
2379                 if (path == NULL)
2380                         path = "/tmp";
2381         }
2382         if (chdir(path))
2383             fatalf(2, "couldn't chdir to %s: %m", path);
2384         r = chdir("cores");
2385 
2386         do {
2387             pid = fork();
2388 
2389             if ((pid == -1) && (errno == EAGAIN)) {
2390                 syslog(LOG_WARNING, "master fork failed (sleeping): %m");
2391                 sleep(5);
2392             }
2393         } while ((pid == -1) && (errno == EAGAIN));
2394 
2395         if (pid == -1) {
2396             fatal("fork error", EX_OSERR);
2397         } else if (pid != 0) {
2398             int exit_code;
2399 
2400             /* Parent, wait for child */
2401             if(read(startup_pipe[0], &exit_code, sizeof(exit_code)) == -1) {
2402                 syslog(LOG_ERR, "could not read from startup_pipe (%m)");
2403                 unlink(pidfile_lock);
2404                 exit(EX_OSERR);
2405             } else {
2406                 unlink(pidfile_lock);
2407                 exit(exit_code);
2408             }
2409         }
2410 
2411         /* Child! */
2412         close(startup_pipe[0]);
2413 
2414         free(pidfile_lock);
2415 
2416         /*
2417          * We're now running in the child. Lose our controlling terminal
2418          * and obtain a new process group.
2419          */
2420         if (setsid() == -1) {
2421             int exit_result = EX_OSERR;
2422 
2423             /* Tell our parent that we failed. */
2424             if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2425                 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2426             }
2427 
2428             fatal("setsid failure", EX_OSERR);
2429         }
2430     }
2431 
2432     /* Write out the pidfile */
2433     pidfd = open(pidfile, O_CREAT|O_RDWR, 0644);
2434     if(pidfd == -1) {
2435         int exit_result = EX_OSERR;
2436 
2437         syslog(LOG_ERR, "can't open pidfile: %m");
2438 
2439         /* Tell our parent that we failed. */
2440         if (daemon_mode && write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2441             syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2442         }
2443 
2444         exit(EX_OSERR);
2445     } else {
2446         char buf[100];
2447 
2448         if(lock_nonblocking(pidfd, pidfile)) {
2449             int exit_result = EX_OSERR;
2450 
2451             /* Tell our parent that we failed. */
2452             if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2453                 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2454             }
2455 
2456             fatal("cannot get exclusive lock on pidfile (is another master still running?)", EX_OSERR);
2457         } else {
2458             int pidfd_flags = fcntl(pidfd, F_GETFD, 0);
2459             if (pidfd_flags != -1)
2460                 pidfd_flags = fcntl(pidfd, F_SETFD,
2461                                     pidfd_flags | FD_CLOEXEC);
2462             if (pidfd_flags == -1) {
2463                 int exit_result = EX_OSERR;
2464 
2465                 syslog(LOG_ERR, "unable to set close-on-exec for pidfile: %m");
2466 
2467                 /* Tell our parent that we failed. */
2468                 if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2469                     syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2470                 }
2471 
2472                 fatalf(EX_OSERR, "unable to set close-on-exec for pidfile (see syslog for details)");
2473             }
2474 
2475             /* Write PID */
2476             snprintf(buf, sizeof(buf), "%lu\n", (unsigned long int)getpid());
2477             if(lseek(pidfd, 0, SEEK_SET) == -1 ||
2478                ftruncate(pidfd, 0) == -1 ||
2479                write(pidfd, buf, strlen(buf)) == -1) {
2480                 int exit_result = EX_OSERR;
2481 
2482                 syslog(LOG_ERR, "unable to write to pidfile: %m");
2483 
2484                 /* Tell our parent that we failed. */
2485                 if (daemon_mode && write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2486                     syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2487                 }
2488 
2489                 fatalf(EX_OSERR, "unable to write to pidfile (see syslog for details)");
2490             }
2491             if (fsync(pidfd))
2492                 fatalf(EX_OSERR, "unable to sync pidfile: %m");
2493         }
2494     }
2495 
2496     if(daemon_mode) {
2497         int exit_result = 0;
2498 
2499         /* success! */
2500         if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1)
2501             fatalf(EX_OSERR,
2502                    "could not write success result to startup pipe (%m)");
2503 
2504         close(startup_pipe[1]);
2505         xclose(pidlock_fd);
2506     }
2507 
2508     syslog(LOG_DEBUG, "process started");
2509 
2510 #if defined(__linux__) && defined(HAVE_LIBCAP)
2511     if (become_cyrus(/*is_master*/1) != 0) {
2512         syslog(LOG_ERR, "can't change to the cyrus user: %m");
2513         exit(1);
2514     }
2515 #endif
2516 
2517     masterconf_getsection("START", &add_start, NULL);
2518     masterconf_getsection("SERVICES", &add_service, NULL);
2519     masterconf_getsection("EVENTS", &add_event, NULL);
2520     masterconf_getsection("DAEMON", &add_daemon, NULL);
2521 
2522     /* set signal handlers */
2523     sighandler_setup();
2524 
2525     /* initialize services */
2526     for (i = 0; i < nservices; i++) {
2527         service_create(&Services[i], 1);
2528         if (verbose > 2)
2529             syslog(LOG_DEBUG, "init: service %s/%s socket %d pipe %d %d",
2530                    Services[i].name, Services[i].familyname,
2531                    Services[i].socket,
2532                    Services[i].stat[0], Services[i].stat[1]);
2533     }
2534 
2535 #if !defined(__linux__) || !defined(HAVE_LIBCAP)
2536     if (become_cyrus(/*is_master*/1) != 0) {
2537         syslog(LOG_ERR, "can't change to the cyrus user: %m");
2538         exit(1);
2539     }
2540 #endif
2541 
2542     /* init ctable janitor */
2543     gettimeofday(&now, 0);
2544     init_janitor(now);
2545 
2546     /* init prom report */
2547     init_prom_report(now);
2548 
2549     /* ok, we're going to start spawning like mad now */
2550     syslog(LOG_DEBUG, "ready for work");
2551 
2552     for (;;) {
2553         int i, maxfd, ready_fds, total_children = 0;
2554         struct timeval tv, *tvptr;
2555         struct notify_message msg;
2556 
2557         if (gotsigquit) {
2558             gotsigquit = 0;
2559             begin_shutdown();
2560         }
2561 
2562         /* run any scheduled processes */
2563         if (!in_shutdown)
2564             spawn_schedule(now);
2565 
2566         /* reap first, that way if we need to babysit we will */
2567         if (gotsigchld) {
2568             /* order matters here */
2569             gotsigchld = 0;
2570             reap_child();
2571         }
2572 
2573         /* do we have any services undermanned? */
2574         for (i = 0; i < nservices; i++) {
2575             total_children += Services[i].nactive;
2576             if (!in_shutdown) {
2577                 if (Services[i].exec /* enabled */ &&
2578                     (Services[i].nactive < Services[i].max_workers) &&
2579                     (Services[i].ready_workers < Services[i].desired_workers))
2580                 {
2581                     /* bring us up to desired_workers */
2582                     int j = Services[i].desired_workers - Services[i].ready_workers;
2583 
2584                     if (verbose) {
2585                         syslog(LOG_DEBUG, "service %s/%s needs %d more ready workers",
2586                             Services[i].name, Services[i].familyname, j);
2587                     }
2588 
2589                     while (j-- > 0) {
2590                         spawn_service(i);
2591                     }
2592                 } else if (Services[i].exec
2593                           && Services[i].babysit
2594                           && Services[i].nactive == 0) {
2595                     syslog(LOG_ERR,
2596                           "lost all children for service: %s/%s.  " \
2597                           "Applying babysitter.",
2598                           Services[i].name, Services[i].familyname);
2599                     spawn_service(i);
2600                 } else if (!Services[i].exec /* disabled */ &&
2601                           Services[i].name /* not yet removed */ &&
2602                           Services[i].nactive == 0) {
2603                     if (verbose > 2)
2604                         syslog(LOG_DEBUG, "remove: service %s/%s pipe %d %d",
2605                               Services[i].name, Services[i].familyname,
2606                               Services[i].stat[0], Services[i].stat[1]);
2607 
2608                     /* Only free the service info on the primary */
2609                     if (Services[i].associate == 0) {
2610                         free(Services[i].name);
2611                     }
2612                     Services[i].name = NULL;
2613                     Services[i].nforks = 0;
2614                     Services[i].nactive = 0;
2615                     Services[i].nconnections = 0;
2616                     Services[i].associate = 0;
2617 
2618                     xclose(Services[i].stat[0]);
2619                     xclose(Services[i].stat[1]);
2620                 }
2621             }
2622         }
2623 
2624         if (in_shutdown && total_children == 0) {
2625            syslog(LOG_NOTICE, "All children have exited, closing down");
2626            exit(0);
2627         }
2628 
2629         if (gotsighup) {
2630             syslog(LOG_NOTICE, "got SIGHUP");
2631             gotsighup = 0;
2632             reread_conf(now);
2633         }
2634 
2635         FD_ZERO(&rfds);
2636         maxfd = 0;
2637         for (i = 0; i < nservices; i++) {
2638             int x = Services[i].stat[0];
2639 
2640             int y = Services[i].socket;
2641 
2642             /* messages */
2643             if (x >= 0) {
2644                 if (verbose > 2)
2645                     syslog(LOG_DEBUG, "listening for messages from %s/%s",
2646                            Services[i].name, Services[i].familyname);
2647                 FD_SET(x, &rfds);
2648             }
2649             if (x > maxfd) maxfd = x;
2650 
2651             /* connections */
2652             if (y >= 0 && Services[i].ready_workers == 0 &&
2653                 Services[i].nactive < Services[i].max_workers &&
2654                 !service_is_fork_limited(&Services[i])) {
2655                 if (verbose > 2)
2656                     syslog(LOG_DEBUG, "listening for connections for %s/%s",
2657                            Services[i].name, Services[i].familyname);
2658                 FD_SET(y, &rfds);
2659                 if (y > maxfd) maxfd = y;
2660             }
2661 
2662             /* paranoia */
2663             if (Services[i].ready_workers < 0) {
2664                 syslog(LOG_ERR, "%s/%s has %d workers?!?", Services[i].name,
2665                        Services[i].familyname, Services[i].ready_workers);
2666             }
2667         }
2668         maxfd++;                /* need 1 greater than maxfd */
2669 
2670         int interrupted = 0;
2671         do {
2672             /* how long to wait? - do now so that any scheduled wakeup
2673             * calls get accounted for*/
2674             gettimeofday(&now, 0);
2675             tvptr = NULL;
2676             if (schedule && !in_shutdown) {
2677                 double delay = timesub(&now, &schedule->mark);
2678                 if (!interrupted && delay > 0.0) {
2679                     timeval_set_double(&tv, delay);
2680                 }
2681                 else {
2682                     tv.tv_sec = 0;
2683                     tv.tv_usec = 0;
2684                 }
2685                 tvptr = &tv;
2686             }
2687 
2688             errno = 0;
2689             ready_fds = myselect(maxfd, &rfds, NULL, NULL, tvptr);
2690 
2691             if (ready_fds < 0) {
2692                 switch (errno) {
2693                 case EAGAIN:
2694                 case EINTR:
2695                     /* Try again to get valid rfds, this time without blocking so we
2696                      * will definitely process messages without getting interrupted
2697                      * again. */
2698                     interrupted++;
2699                     if (interrupted > 5) {
2700                         syslog(LOG_WARNING, "Repeatedly interrupted, too many signals?");
2701                         /* Fake a timeout */
2702                         ready_fds = 0;
2703                         FD_ZERO(&rfds);
2704                     }
2705                     break;
2706                 default:
2707                     /* uh oh */
2708                     fatalf(1, "select failed: %m");
2709                 }
2710             }
2711         } while (!in_shutdown && ready_fds < 0);
2712 
2713         if (ready_fds > 0) {
2714             for (i = 0; i < nservices; i++) {
2715                 int x = Services[i].stat[0];
2716                 int y = Services[i].socket;
2717 
2718                 if ((x >= 0) && FD_ISSET(x, &rfds)) {
2719                     while ((r = read_msg(x, &msg)) == 0)
2720                         process_msg(i, &msg);
2721 
2722                     if (r == 2) {
2723                         syslog(LOG_ERR,
2724                             "got incorrectly sized response from child: %x", i);
2725                         continue;
2726                     }
2727                     if (r < 0) {
2728                         syslog(LOG_ERR,
2729                             "error while receiving message from child %x: %m", i);
2730                         continue;
2731                     }
2732                 }
2733 
2734                 if (!in_shutdown && Services[i].exec &&
2735                     Services[i].nactive < Services[i].max_workers &&
2736                     Services[i].ready_workers == 0 &&
2737                     y >= 0 && FD_ISSET(y, &rfds))
2738                 {
2739                     /* huh, someone wants to talk to us */
2740                     spawn_service(i);
2741                 }
2742             }
2743         }
2744 
2745         gettimeofday(&now, 0);
2746         child_janitor(now);
2747         do_prom_report(now);
2748     }
2749 
2750     /* never reached */
2751     return r;
2752 }
2753