1 /* master.c -- IMAP master process to handle recovery, checkpointing, spawning
2 *
3 * Copyright (c) 1994-2008 Carnegie Mellon University. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. The name "Carnegie Mellon University" must not be used to
18 * endorse or promote products derived from this software without
19 * prior written permission. For permission or any legal
20 * details, please contact
21 * Carnegie Mellon University
22 * Center for Technology Transfer and Enterprise Creation
23 * 4615 Forbes Avenue
24 * Suite 302
25 * Pittsburgh, PA 15213
26 * (412) 268-7393, fax: (412) 268-7395
27 * innovation@andrew.cmu.edu
28 *
29 * 4. Redistributions of any form whatsoever must retain the following
30 * acknowledgment:
31 * "This product includes software developed by Computing Services
32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33 *
34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 #include <config.h>
44
45 #include <stdio.h>
46 #include <stdint.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50 #include <sys/time.h>
51 #include <sys/types.h>
52 #include <sys/wait.h>
53 #ifdef HAVE_UNISTD_H
54 #include <unistd.h>
55 #endif
56 #ifdef HAVE_SYS_RESOURCE_H
57 #include <sys/resource.h>
58 #endif
59 #include <fcntl.h>
60 #include <signal.h>
61 #include <sys/param.h>
62 #include <sys/stat.h>
63 #include <syslog.h>
64 #include <netdb.h>
65 #include <sys/socket.h>
66 #include <netinet/in.h>
67 #include <sys/un.h>
68 #include <arpa/inet.h>
69 #include <sysexits.h>
70 #include <errno.h>
71 #include <limits.h>
72 #include <math.h>
73 #include <inttypes.h>
74
75 #ifndef PATH_MAX
76 #define PATH_MAX 4096
77 #endif
78
79 #ifndef INADDR_NONE
80 #define INADDR_NONE 0xffffffff
81 #endif
82
83 #ifndef INADDR_ANY
84 #define INADDR_ANY 0x00000000
85 #endif
86
87 #if !defined(IPV6_V6ONLY) && defined(IPV6_BINDV6ONLY)
88 #define IPV6_V6ONLY IPV6_BINDV6ONLY
89 #endif
90
91 #include "masterconf.h"
92
93 #include "master.h"
94 #include "service.h"
95
96 #include "cyr_lock.h"
97 #include "retry.h"
98 #include "util.h"
99 #include "xmalloc.h"
100 #include "strarray.h"
101
102 enum {
103 child_table_size = 10000,
104 child_table_inc = 100
105 };
106
107 static int verbose = 0;
108 static int listen_queue_backlog = 32;
109 static int pidfd = -1;
110
111 static int in_shutdown = 0;
112
113 const char *MASTER_CONFIG_FILENAME = DEFAULT_MASTER_CONFIG_FILENAME;
114
115 #define SERVICE_NONE -1
116 #define SERVICE_MAX INT_MAX-10
117 #define SERVICEPARAM(x) ((x) ? x : "unknown")
118
119 #define MAX_READY_FAILS 5
120 #define MAX_READY_FAIL_INTERVAL 10 /* 10 seconds */
121
122 #define FNAME_PROM_STATS_DIR "/stats" /* keep in sync with prometheus.h */
123 #define FNAME_PROM_MASTER_REPORT "master.txt"
124
125 struct service *Services = NULL;
126 static int allocservices = 0;
127 int nservices = 0;
128
129 struct event {
130 char *name;
131 struct timeval mark;
132 time_t period;
133 int hour;
134 int min;
135 int periodic;
136 strarray_t *exec;
137 struct event *next;
138 };
139 static struct event *schedule = NULL;
140
141 enum sstate {
142 SERVICE_STATE_UNKNOWN = 0, /* duh */
143 SERVICE_STATE_INIT = 1, /* Service forked - UNUSED */
144 SERVICE_STATE_READY = 2, /* Service told us it is ready */
145 /* or it just forked and has not
146 * talked to us yet */
147 SERVICE_STATE_BUSY = 3, /* Service told us it is not ready */
148 SERVICE_STATE_DEAD = 4 /* We received a sigchld from this service */
149 };
150
151 struct centry {
152 pid_t pid;
153 enum sstate service_state; /* SERVICE_STATE_* */
154 time_t janitor_deadline; /* cleanup deadline */
155 int si; /* Services[] index */
156 char *desc; /* human readable description for logging */
157 struct timeval spawntime; /* when the centry was allocated */
158 time_t sighuptime; /* when did we send a SIGHUP */
159 struct centry *next;
160 };
161 static struct centry *ctable[child_table_size];
162
163 static int janitor_frequency = 1; /* Janitor sweeps per second */
164 static int janitor_position; /* Entry to begin at in next sweep */
165 static struct timeval janitor_mark; /* Last time janitor did a sweep */
166
167 static int prom_enabled = 0;
168 static int prom_frequency = 0;
169 static struct timeval prom_prev_report = { 0, 0 };
170 static char *prom_report_fname = NULL;
171
172 #ifdef HAVE_SETRLIMIT
173 static void limit_fds(rlim_t);
174 #endif
175 static void schedule_event(struct event *a);
176 static void child_sighandler_setup(void);
177
178 #if HAVE_PSELECT
179 static sigset_t pselect_sigmask;
180 #endif
181
myselect(int nfds,fd_set * rfds,fd_set * wfds,fd_set * efds,struct timeval * tout)182 static int myselect(int nfds, fd_set *rfds, fd_set *wfds,
183 fd_set *efds, struct timeval *tout)
184 {
185 #if HAVE_PSELECT
186 /* pselect() closes the race between SIGCHLD arriving
187 * and select() sleeping for up to 10 seconds. */
188 struct timespec ts, *tsptr = NULL;
189
190 if (tout) {
191 ts.tv_sec = tout->tv_sec;
192 ts.tv_nsec = tout->tv_usec * 1000;
193 tsptr = &ts;
194 }
195 return pselect(nfds, rfds, wfds, efds, tsptr, &pselect_sigmask);
196 #else
197 return select(nfds, rfds, wfds, efds, tout);
198 #endif
199 }
200
fatal(const char * msg,int code)201 EXPORTED void fatal(const char *msg, int code)
202 {
203 syslog(LOG_CRIT, "%s", msg);
204 syslog(LOG_NOTICE, "exiting");
205 exit(code);
206 }
207
event_free(struct event * a)208 static void event_free(struct event *a)
209 {
210 if (a->exec) {
211 strarray_free(a->exec);
212 a->exec = NULL;
213 }
214 free(a->name);
215 free(a);
216 }
217
get_daemon(char * path,size_t size,const strarray_t * cmd)218 static void get_daemon(char *path, size_t size, const strarray_t *cmd)
219 {
220 if (!size) return;
221 if (cmd->data[0][0] == '/') {
222 /* master lacks strlcpy, due to no libcyrus */
223 strncpy(path, cmd->data[0], size - 1);
224 }
225 else snprintf(path, size, "%s/%s", LIBEXEC_DIR, cmd->data[0]);
226 path[size-1] = '\0';
227 }
228
get_prog(char * path,size_t size,const strarray_t * cmd)229 static void get_prog(char *path, size_t size, const strarray_t *cmd)
230 {
231 if (!size) return;
232 if (cmd->data[0][0] == '/') {
233 /* master lacks strlcpy, due to no libcyrus */
234 strncpy(path, cmd->data[0], size - 1);
235 }
236 else snprintf(path, size, "%s/%s", SBIN_DIR, cmd->data[0]);
237 path[size-1] = '\0';
238 }
239
get_executable(char * path,size_t size,const strarray_t * cmd)240 static void get_executable(char *path, size_t size, const strarray_t *cmd)
241 {
242 struct stat statbuf;
243
244 if (!size) return;
245 get_daemon(path, size, cmd);
246 if (!stat(path, &statbuf)) return;
247 get_prog(path, size, cmd);
248 if (!stat(path, &statbuf)) return;
249 /* XXX - abort? */
250 }
251
get_statsock(int filedes[2])252 static void get_statsock(int filedes[2])
253 {
254 int r, fdflags;
255
256 r = pipe(filedes);
257 if (r != 0)
258 fatalf(1, "couldn't create status socket: %m");
259
260 /* we don't want the master blocking on reads */
261 fdflags = fcntl(filedes[0], F_GETFL, 0);
262 if (fdflags != -1) fdflags = fcntl(filedes[0], F_SETFL,
263 fdflags | O_NONBLOCK);
264 if (fdflags == -1)
265 fatalf(1, "unable to set non-blocking: %m");
266 /* we don't want the services to be able to read from it */
267 fdflags = fcntl(filedes[0], F_GETFD, 0);
268 if (fdflags != -1) fdflags = fcntl(filedes[0], F_SETFD,
269 fdflags | FD_CLOEXEC);
270 if (fdflags == -1)
271 fatalf(1, "unable to set close-on-exec: %m");
272 }
273
cyrus_cap_bind(int socket,struct sockaddr * addr,socklen_t length)274 static int cyrus_cap_bind(int socket, struct sockaddr *addr, socklen_t length)
275 {
276 int r;
277
278 set_caps(BEFORE_BIND, /*is_master*/1);
279 r = bind(socket, addr, length);
280 set_caps(AFTER_BIND, /*is_master*/1);
281
282 return r;
283 }
284
285 /* Return a new 'centry', by malloc'ing it. */
centry_alloc(void)286 static struct centry *centry_alloc(void)
287 {
288 struct centry *t;
289
290 t = xzmalloc(sizeof(*t));
291 t->si = SERVICE_NONE;
292 gettimeofday(&t->spawntime, NULL);
293 t->sighuptime = (time_t)-1;
294
295 return t;
296 }
297
centry_set_name(struct centry * c,const char * type,const char * name,const char * path)298 static void centry_set_name(struct centry *c, const char *type,
299 const char *name, const char *path)
300 {
301 free(c->desc);
302 if (name && path)
303 c->desc = strconcat("type:", type, " name:", name, " path:", path, NULL);
304 else
305 c->desc = strconcat("type:", type, NULL);
306 }
307
centry_describe(const struct centry * c,pid_t pid)308 static char *centry_describe(const struct centry *c, pid_t pid)
309 {
310 struct buf desc = BUF_INITIALIZER;
311
312 if (!c) {
313 buf_appendcstr(&desc, "unknown process");
314 }
315 else {
316 struct timeval now;
317 gettimeofday(&now, NULL);
318 buf_printf(&desc, "process %s age:%.3fs",
319 c->desc, timesub(&c->spawntime, &now));
320 }
321 buf_printf(&desc, " pid:%d", (int)pid);
322 return buf_release(&desc);
323 }
324
325 /* free a centry */
centry_free(struct centry * c)326 static void centry_free(struct centry *c)
327 {
328 free(c->desc);
329 free(c);
330 }
331
332 /* add a centry to the global table of all
333 * centries, using the given pid as the key */
centry_add(struct centry * c,pid_t p)334 static void centry_add(struct centry *c, pid_t p)
335 {
336 c->pid = p;
337 c->next = ctable[p % child_table_size];
338 ctable[p % child_table_size] = c;
339 }
340
341 /* find a centry in the global table, using the
342 * given pid as the key. Returns NULL if not
343 * found. */
centry_find(pid_t p)344 static struct centry *centry_find(pid_t p)
345 {
346 struct centry *c;
347
348 c = ctable[p % child_table_size];
349 while (c && c->pid != p)
350 c = c->next;
351 return c;
352 }
353
centry_set_state(struct centry * c,enum sstate state)354 static void centry_set_state(struct centry *c, enum sstate state)
355 {
356 c->service_state = state;
357 if (state == SERVICE_STATE_DEAD)
358 c->janitor_deadline = time(NULL) + 2;
359 }
360
361 /*
362 * Parse the "listen" parameter as one of the forms:
363 *
364 * port
365 * hostname ':' port
366 * ipv4-address
367 * ipv4-address ':' port
368 * '[' ipv4-address ']'
369 * '[' ipv4-address ']' ':' port
370 * '[' ipv6-address ']'
371 * '[' ipv6-address ']' ':' port
372 *
373 * Returns 0 on success with one or more of *@hostp and *@portp set
374 * to new strings which must be free()d by the caller, or -1 on error.
375 */
parse_inet_listen(const char * listen,char ** hostp,char ** portp)376 static int parse_inet_listen(const char *listen,
377 char **hostp, char **portp)
378 {
379 const char *cp;
380
381 *portp = NULL;
382 *hostp = NULL;
383 if (listen[0] == '[') {
384 cp = strrchr(listen, ']');
385 if (!cp)
386 return -1;
387 cp++;
388 if (*cp == ':') {
389 if (!cp[1])
390 return -1;
391 *hostp = xstrndup(listen+1, (cp - listen - 2));
392 *portp = xstrdup(cp+1);
393 return 0;
394 }
395 if (!*cp) {
396 *hostp = xstrndup(listen+1, (cp - listen - 2));
397 /* no port specified */
398 return 0;
399 }
400 return -1;
401 }
402
403 cp = strrchr(listen, ':');
404 if (cp) {
405 if (!cp[1])
406 return -1;
407 *hostp = xstrndup(listen, (cp - listen));
408 *portp = xstrdup(cp+1);
409 return 0;
410 }
411
412 /* no host specified */
413 *portp = xstrdup(listen);
414 return 0;
415 }
416
verify_service_file(const strarray_t * filename)417 static int verify_service_file(const strarray_t *filename)
418 {
419 char path[PATH_MAX];
420 struct stat statbuf;
421
422 get_executable(path, sizeof(path), filename);
423 if (stat(path, &statbuf)) return 0;
424 if (! S_ISREG(statbuf.st_mode)) return 0;
425 return statbuf.st_mode & S_IXUSR;
426 }
427
service_forget_exec(struct service * s)428 static void service_forget_exec(struct service *s)
429 {
430 if (s->exec) {
431 /* Only free the service info on the primary */
432 if (s->associate == 0) {
433 strarray_free(s->exec);
434 }
435 s->exec = NULL;
436 }
437 }
438
service_add(const struct service * proto)439 static struct service *service_add(const struct service *proto)
440 {
441 struct service *s;
442
443 if (nservices == allocservices) {
444 if (allocservices > SERVICE_MAX - 5)
445 fatal("out of service structures, please restart", EX_UNAVAILABLE);
446 Services = xrealloc(Services,
447 (allocservices+=5) * sizeof(struct service));
448 }
449 s = &Services[nservices++];
450
451 if (proto)
452 memcpy(s, proto, sizeof(struct service));
453 else {
454 memset(s, 0, sizeof(struct service));
455 s->socket = -1;
456 s->stat[0] = -1;
457 s->stat[1] = -1;
458 }
459
460 return s;
461 }
462
service_create(struct service * s,int is_startup)463 static void service_create(struct service *s, int is_startup)
464 {
465 struct service service0, service;
466 struct addrinfo hints, *res0, *res;
467 int error, nsocket = 0;
468 struct sockaddr_un sunsock;
469 mode_t oldumask;
470 int on = 1;
471 int res0_is_local = 0;
472 int r;
473
474 if (s->associate > 0)
475 return; /* service is already activated */
476
477 if (!s->listen)
478 return; /* service is a daemon, no listener */
479
480 if (!s->name)
481 fatal("Serious software bug found: service_create() called on unnamed service!",
482 EX_SOFTWARE);
483
484 if (s->listen[0] == '/') { /* unix socket */
485 if (strlen(s->listen) >= sizeof(sunsock.sun_path)) {
486 syslog(LOG_ERR, "invalid listen '%s' (too long), disabling %s",
487 s->listen, s->name);
488 service_forget_exec(s);
489 return;
490 }
491 res0_is_local = 1;
492 res0 = (struct addrinfo *)xzmalloc(sizeof(struct addrinfo));
493 res0->ai_flags = AI_PASSIVE;
494 res0->ai_family = PF_UNIX;
495 if(!strcmp(s->proto, "tcp")) {
496 res0->ai_socktype = SOCK_STREAM;
497 } else {
498 /* udp */
499 res0->ai_socktype = SOCK_DGRAM;
500 }
501 res0->ai_addr = (struct sockaddr *)&sunsock;
502 res0->ai_addrlen = sizeof(sunsock.sun_family) + strlen(s->listen) + 1;
503 #ifdef SIN6_LEN
504 res0->ai_addrlen += sizeof(sunsock.sun_len);
505 sunsock.sun_len = res0->ai_addrlen;
506 #endif
507 sunsock.sun_family = AF_UNIX;
508
509 int r = snprintf(sunsock.sun_path, sizeof(sunsock.sun_path), "%s", s->listen);
510 if (r < 0 || (size_t) r >= sizeof(sunsock.sun_path)) {
511 /* belt and suspenders */
512 fatal("Serious software bug found: "
513 "over-long listen path not detected earlier!",
514 EX_SOFTWARE);
515 }
516 unlink(s->listen);
517 } else { /* inet socket */
518 char *port;
519 char *listen_addr;
520
521 memset(&hints, 0, sizeof(hints));
522 hints.ai_flags = AI_PASSIVE;
523 if (!strcmp(s->proto, "tcp")) {
524 hints.ai_family = PF_UNSPEC;
525 hints.ai_socktype = SOCK_STREAM;
526 } else if (!strcmp(s->proto, "tcp4")) {
527 hints.ai_family = PF_INET;
528 hints.ai_socktype = SOCK_STREAM;
529 #ifdef PF_INET6
530 } else if (!strcmp(s->proto, "tcp6")) {
531 hints.ai_family = PF_INET6;
532 hints.ai_socktype = SOCK_STREAM;
533 #endif
534 } else if (!strcmp(s->proto, "udp")) {
535 hints.ai_family = PF_UNSPEC;
536 hints.ai_socktype = SOCK_DGRAM;
537 } else if (!strcmp(s->proto, "udp4")) {
538 hints.ai_family = PF_INET;
539 hints.ai_socktype = SOCK_DGRAM;
540 #ifdef PF_INET6
541 } else if (!strcmp(s->proto, "udp6")) {
542 hints.ai_family = PF_INET6;
543 hints.ai_socktype = SOCK_DGRAM;
544 #endif
545 } else {
546 syslog(LOG_INFO, "invalid proto '%s', disabling %s",
547 s->proto, s->name);
548 service_forget_exec(s);
549 return;
550 }
551
552 if (parse_inet_listen(s->listen, &listen_addr, &port) < 0) {
553 syslog(LOG_ERR, "invalid listen '%s', disabling %s",
554 s->listen, s->name);
555 service_forget_exec(s);
556 return;
557 }
558
559 error = getaddrinfo(listen_addr, port, &hints, &res0);
560
561 free(listen_addr);
562 free(port);
563
564 if (error) {
565 syslog(LOG_INFO, "%s, disabling %s", gai_strerror(error), s->name);
566 service_forget_exec(s);
567 return;
568 }
569 }
570
571 memcpy(&service0, s, sizeof(struct service));
572
573 for (res = res0; res; res = res->ai_next) {
574 if (s->socket >= 0) {
575 memcpy(&service, &service0, sizeof(struct service));
576 s = &service;
577 }
578
579 s->family = res->ai_family;
580 switch (s->family) {
581 case AF_UNIX: s->familyname = "unix"; break;
582 case AF_INET: s->familyname = "ipv4"; break;
583 case AF_INET6: s->familyname = "ipv6"; break;
584 default: s->familyname = "unknown"; break;
585 }
586
587 if (verbose > 2) {
588 syslog(LOG_DEBUG, "activating service %s/%s",
589 s->name, s->familyname);
590 }
591
592 s->socket = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
593 if (s->socket < 0) {
594 int e = errno;
595 if (is_startup && config_getswitch(IMAPOPT_MASTER_BIND_ERRORS_FATAL)) {
596 struct buf buf = BUF_INITIALIZER;
597 buf_printf(&buf, "unable to open %s/%s socket: %s",
598 s->name, s->familyname, strerror(e));
599 fatal(buf_cstring(&buf), EX_UNAVAILABLE);
600 }
601
602 syslog(LOG_ERR, "unable to open %s/%s socket: %m",
603 s->name, s->familyname);
604 continue;
605 }
606
607 /* allow reuse of address */
608 r = setsockopt(s->socket, SOL_SOCKET, SO_REUSEADDR,
609 (void *) &on, sizeof(on));
610 if (r < 0) {
611 syslog(LOG_ERR, "unable to setsocketopt(SO_REUSEADDR) service %s/%s: %m",
612 s->name, s->familyname);
613 }
614 #if defined(IPV6_V6ONLY) && !(defined(__FreeBSD__) && __FreeBSD__ < 3)
615 if (res->ai_family == AF_INET6) {
616 r = setsockopt(s->socket, IPPROTO_IPV6, IPV6_V6ONLY,
617 (void *) &on, sizeof(on));
618 if (r < 0) {
619 syslog(LOG_ERR, "unable to setsocketopt(IPV6_V6ONLY) service %s/%s: %m",
620 s->name, s->familyname);
621 }
622 }
623 #endif
624
625 /* set IP ToS if supported */
626 #if defined(SOL_IP) && defined(IP_TOS)
627 if (s->family == AF_INET || s->family == AF_INET6) {
628 r = setsockopt(s->socket, SOL_IP, IP_TOS,
629 (void *) &config_qosmarking,
630 sizeof(config_qosmarking));
631 if (r < 0) {
632 syslog(LOG_WARNING,
633 "unable to setsocketopt(IP_TOS) service %s/%s: %m",
634 s->name, s->familyname);
635 }
636 }
637 #endif
638
639 oldumask = umask((mode_t) 0); /* for linux */
640 r = cyrus_cap_bind(s->socket, res->ai_addr, res->ai_addrlen);
641 umask(oldumask);
642 if (r < 0) {
643 int e = errno;
644 if (is_startup && config_getswitch(IMAPOPT_MASTER_BIND_ERRORS_FATAL)) {
645 struct buf buf = BUF_INITIALIZER;
646 buf_printf(&buf, "unable to bind to %s/%s socket: %s",
647 s->name, s->familyname, strerror(e));
648 fatal(buf_cstring(&buf), EX_UNAVAILABLE);
649 }
650
651 syslog(LOG_ERR, "unable to bind to %s/%s socket: %m",
652 s->name, s->familyname);
653 xclose(s->socket);
654 continue;
655 }
656
657 if (s->listen[0] == '/') { /* unix socket */
658 /* for DUX, where this isn't the default.
659 (harmlessly fails on some systems) */
660 chmod(s->listen, (mode_t) 0777);
661 }
662
663 if ((!strcmp(s->proto, "tcp") || !strcmp(s->proto, "tcp4")
664 || !strcmp(s->proto, "tcp6"))
665 && listen(s->socket, listen_queue_backlog) < 0) {
666 int e = errno;
667 if (is_startup && config_getswitch(IMAPOPT_MASTER_BIND_ERRORS_FATAL)) {
668 struct buf buf = BUF_INITIALIZER;
669 buf_printf(&buf, "unable to listen to %s/%s socket: %s",
670 s->name, s->familyname, strerror(e));
671 fatal(buf_cstring(&buf), EX_UNAVAILABLE);
672 }
673
674 syslog(LOG_ERR, "unable to listen to %s/%s socket: %m",
675 s->name, s->familyname);
676 xclose(s->socket);
677 continue;
678 }
679
680 s->ready_workers = 0;
681 s->associate = nsocket;
682
683 get_statsock(s->stat);
684
685 if (s == &service)
686 service_add(s);
687 nsocket++;
688 }
689 if (res0) {
690 if(res0_is_local)
691 free(res0);
692 else
693 freeaddrinfo(res0);
694 }
695 if (nsocket <= 0) {
696 syslog(LOG_ERR, "unable to create %s listener socket: %m", s->name);
697 service_forget_exec(s);
698 return;
699 }
700 }
701
decode_wait_status(struct centry * c,pid_t pid,int status)702 static int decode_wait_status(struct centry *c, pid_t pid, int status)
703 {
704 int failed = 0;
705 char *desc = centry_describe(c, pid);
706
707 if (WIFEXITED(status)) {
708 if (!WEXITSTATUS(status)) {
709 syslog(LOG_DEBUG, "%s exited normally", desc);
710 }
711 else if (WEXITSTATUS(status) == EX_TEMPFAIL) {
712 syslog(LOG_DEBUG, "%s was killed", desc);
713 }
714 else {
715 syslog(LOG_ERR, "%s exited, status %d",
716 desc, WEXITSTATUS(status));
717 failed = 1;
718 }
719 }
720
721 if (WIFSIGNALED(status)) {
722 const char *signame = strsignal(WTERMSIG(status));
723 if (!signame)
724 signame = "unknown signal";
725 #ifdef WCOREDUMP
726 syslog(LOG_ERR, "%s signaled to death by signal %d (%s%s)",
727 desc, WTERMSIG(status), signame,
728 WCOREDUMP(status) ? ", core dumped" : "");
729 failed = WCOREDUMP(status) ? 2 : 1;
730 #else
731 syslog(LOG_ERR, "%s signaled to death by %s %d",
732 desc, signame, WTERMSIG(status));
733 failed = 1;
734 #endif
735 }
736 free(desc);
737 return failed;
738 }
739
run_startup(const char * name,const strarray_t * cmd)740 static void run_startup(const char *name, const strarray_t *cmd)
741 {
742 pid_t pid;
743 int status;
744 struct centry *c;
745 char path[PATH_MAX];
746
747 get_executable(path, sizeof(path), cmd);
748
749 switch (pid = fork()) {
750 case -1:
751 fatalf(1, "can't fork process to run startup: %m");
752 break;
753
754 case 0:
755 /* Child - Release our pidfile lock. */
756 xclose(pidfd);
757
758 set_caps(AFTER_FORK, /*is_master*/1);
759
760 child_sighandler_setup();
761
762 syslog(LOG_DEBUG, "about to exec %s", path);
763 execv(path, cmd->data);
764 fatalf(EX_OSERR, "can't exec %s for startup: %m", path);
765
766 default: /* parent */
767 if (waitpid(pid, &status, 0) < 0) {
768 syslog(LOG_ERR, "waitpid(): %m");
769 return;
770 }
771 c = centry_alloc();
772 centry_set_name(c, "START", name, path);
773 if (decode_wait_status(c, pid, status))
774 fatal("can't run startup", 1);
775 centry_free(c);
776 break;
777 }
778 }
779
fcntl_unset(int fd,int flag)780 static void fcntl_unset(int fd, int flag)
781 {
782 int fdflags = fcntl(fd, F_GETFD, 0);
783 if (fdflags != -1) fdflags = fcntl(STATUS_FD, F_SETFD,
784 fdflags & ~flag);
785 if (fdflags == -1) {
786 syslog(LOG_ERR, "fcntl(): unable to unset %d: %m", flag);
787 }
788 }
789
service_is_fork_limited(struct service * s)790 static int service_is_fork_limited(struct service *s)
791 {
792 /* The longest period for which we will ignore the service */
793 #define FORKRATE_INTERVAL 0.4 /* seconds */
794 /* How much the forkrate estimator decays, as a proportion, per second */
795 #define FORKRATE_ALPHA 0.5 /* per second */
796 struct timeval now;
797 double interval;
798
799 if (!s->maxforkrate)
800 return 0;
801
802 gettimeofday(&now, 0);
803 interval = timesub(&s->last_interval_start, &now);
804 /* update our fork rate */
805 if (interval > 0.0) {
806 double f = pow(FORKRATE_ALPHA, interval);
807 s->forkrate = f * s->forkrate +
808 (1.0-f) * (s->interval_forks/interval);
809 s->interval_forks = 0;
810 s->last_interval_start = now;
811 }
812 else if (interval < 0.0) {
813 /*
814 * NTP or similar moved the time-of-day clock backwards more
815 * than the interval we asked to be delayed for. Given that, we
816 * have no basis for updating forkrate and must reset our rate
817 * estimating state. Let's just hope this is a rare event.
818 */
819 s->interval_forks = 0;
820 s->last_interval_start = now;
821 syslog(LOG_WARNING, "time of day clock went backwards");
822 }
823
824 /* If we've been busy lately, we will refuse to fork! */
825 /* (We schedule a wakeup call for sometime soon though to be
826 * sure that we don't wait to do the fork that is required forever! */
827 if ((unsigned int)s->forkrate >= s->maxforkrate) {
828 struct event *evt = (struct event *) xzmalloc(sizeof(struct event));
829
830 evt->name = xstrdup("forkrate wakeup call");
831 evt->mark = now;
832 timeval_add_double(&evt->mark, FORKRATE_INTERVAL);
833
834 schedule_event(evt);
835
836 return 1;
837 }
838 return 0;
839 }
840
spawn_service(int si)841 static void spawn_service(int si)
842 {
843 pid_t p;
844 int i;
845 char path[PATH_MAX];
846 static char name_env[100], name_env2[100], name_env3[100];
847 struct centry *c;
848 struct service *s = &Services[si];
849
850 if (!s->name) {
851 fatal("Serious software bug found: spawn_service() called on unnamed service!",
852 EX_SOFTWARE);
853 }
854
855 if (service_is_fork_limited(s))
856 return;
857
858 get_executable(path, sizeof(path), s->exec);
859
860 switch (p = fork()) {
861 case -1:
862 syslog(LOG_ERR, "can't fork process to run service %s/%s: %m",
863 s->name, s->familyname);
864 break;
865
866 case 0:
867 if (verbose > 2) {
868 syslog(LOG_DEBUG, "forked process to run service %s/%s",
869 s->name, s->familyname);
870 }
871
872 /* Child - Release our pidfile lock. */
873 xclose(pidfd);
874
875 set_caps(AFTER_FORK, /*is_master*/1);
876
877 child_sighandler_setup();
878
879 if (s->listen) {
880 if (dup2(s->stat[1], STATUS_FD) < 0) {
881 syslog(LOG_ERR, "can't duplicate status fd: %m");
882 exit(1);
883 }
884 if (dup2(s->socket, LISTEN_FD) < 0) {
885 syslog(LOG_ERR, "can't duplicate listener fd: %m");
886 exit(1);
887 }
888
889 fcntl_unset(STATUS_FD, FD_CLOEXEC);
890 fcntl_unset(LISTEN_FD, FD_CLOEXEC);
891 }
892 else {
893 snprintf(name_env3, sizeof(name_env3), "CYRUS_ISDAEMON=1");
894 putenv(name_env3);
895 }
896 #ifdef HAVE_SETRLIMIT
897 if (s->maxfds) limit_fds(s->maxfds);
898 #endif
899
900 /* close all listeners */
901 for (i = 0; i < nservices; i++) {
902 xclose(Services[i].socket);
903 xclose(Services[i].stat[0]);
904 xclose(Services[i].stat[1]);
905 }
906
907 syslog(LOG_DEBUG, "about to exec %s", path);
908
909 /* add service name to environment */
910 snprintf(name_env, sizeof(name_env), "CYRUS_SERVICE=%s", s->name);
911 putenv(name_env);
912 snprintf(name_env2, sizeof(name_env2), "CYRUS_ID=%d", s->associate);
913 putenv(name_env2);
914
915 execv(path, s->exec->data);
916 syslog(LOG_ERR, "couldn't exec %s: %m", path);
917 exit(EX_OSERR);
918
919 default: /* parent */
920 s->ready_workers++;
921 s->interval_forks++;
922 s->nforks++;
923 s->nactive++;
924
925 /* add to child table */
926 c = centry_alloc();
927 centry_set_name(c, s->listen ? "SERVICE" : "DAEMON", s->name, path);
928 c->si = si;
929 centry_set_state(c, SERVICE_STATE_READY);
930 centry_add(c, p);
931 break;
932 }
933
934 }
935
schedule_event(struct event * a)936 static void schedule_event(struct event *a)
937 {
938 struct event *ptr;
939
940 if (! a->name)
941 fatal("Serious software bug found: schedule_event() called on unnamed event!",
942 EX_SOFTWARE);
943
944 if (!schedule || timesub(&schedule->mark, &a->mark) < 0.0) {
945 a->next = schedule;
946 schedule = a;
947
948 return;
949 }
950 for (ptr = schedule;
951 ptr->next && timesub(&a->mark, &ptr->next->mark) <= 0.0;
952 ptr = ptr->next) ;
953
954 /* insert a */
955 a->next = ptr->next;
956 ptr->next = a;
957 }
958
spawn_schedule(struct timeval now)959 static void spawn_schedule(struct timeval now)
960 {
961 struct event *a, *b;
962 int i;
963 char path[PATH_MAX];
964 pid_t p;
965 struct centry *c;
966
967 a = NULL;
968 /* update schedule accordingly */
969 while (schedule && timesub(&now, &schedule->mark) <= 0.0) {
970 /* delete from schedule, insert into a */
971 struct event *ptr = schedule;
972
973 /* delete */
974 schedule = schedule->next;
975
976 /* insert */
977 ptr->next = a;
978 a = ptr;
979 }
980
981 /* run all events */
982 while (a && a != schedule) {
983 /* if a->exec is NULL, we just used the event to wake up,
984 * so we actually don't need to exec anything at the moment */
985 if(a->exec) {
986 get_executable(path, sizeof(path), a->exec);
987 switch (p = fork()) {
988 case -1:
989 syslog(LOG_CRIT,
990 "can't fork process to run event %s", a->name);
991 break;
992
993 case 0:
994 /* Child - Release our pidfile lock. */
995 xclose(pidfd);
996
997 set_caps(AFTER_FORK, /*is_master*/1);
998
999 /* close all listeners */
1000 for (i = 0; i < nservices; i++) {
1001 xclose(Services[i].socket);
1002 xclose(Services[i].stat[0]);
1003 xclose(Services[i].stat[1]);
1004 }
1005
1006 syslog(LOG_DEBUG, "about to exec %s", path);
1007 execv(path, a->exec->data);
1008 syslog(LOG_ERR, "can't exec %s on schedule: %m", path);
1009 exit(EX_OSERR);
1010 break;
1011
1012 default:
1013 /* we don't wait for it to complete */
1014
1015 /* add to child table */
1016 c = centry_alloc();
1017 centry_set_name(c, "EVENT", a->name, path);
1018 centry_set_state(c, SERVICE_STATE_READY);
1019 centry_add(c, p);
1020 break;
1021 }
1022 } /* a->exec */
1023
1024 /* reschedule as needed */
1025 b = a->next;
1026 if (a->period) {
1027 if(a->periodic) {
1028 a->mark = now;
1029 a->mark.tv_sec += a->period;
1030 } else {
1031 struct tm *tm;
1032 int delta;
1033 /* Daily Event */
1034 while (timesub(&now, &a->mark) <= 0.0)
1035 a->mark.tv_sec += a->period;
1036 /* check for daylight savings fuzz... */
1037 tm = localtime(&a->mark.tv_sec);
1038 if (tm->tm_hour != a->hour || tm->tm_min != a->min) {
1039 /* calculate the same time on the new day */
1040 tm->tm_hour = a->hour;
1041 tm->tm_min = a->min;
1042 delta = mktime(tm) - a->mark.tv_sec;
1043 /* bring it within half a period either way */
1044 while (delta > (a->period/2)) delta -= a->period;
1045 while (delta < -(a->period/2)) delta += a->period;
1046 /* update the time */
1047 a->mark.tv_sec += delta;
1048 /* and let us know about the change */
1049 syslog(LOG_NOTICE, "timezone shift for %s - altering schedule by %d seconds", a->name, delta);
1050 }
1051 }
1052 /* reschedule a */
1053 schedule_event(a);
1054 } else {
1055 event_free(a);
1056 }
1057 /* examine next event */
1058 a = b;
1059 }
1060 }
1061
reap_child(void)1062 static void reap_child(void)
1063 {
1064 int status;
1065 pid_t pid;
1066 struct centry *c;
1067 struct service *s;
1068 int failed;
1069
1070 while ((pid = waitpid((pid_t) -1, &status, WNOHANG)) > 0) {
1071
1072 /* account for the child */
1073 c = centry_find(pid);
1074
1075 failed = decode_wait_status(c, pid, status);
1076
1077 if (c) {
1078 s = ((c->si) != SERVICE_NONE) ? &Services[c->si] : NULL;
1079
1080 /* paranoia */
1081 switch (c->service_state) {
1082 case SERVICE_STATE_READY:
1083 case SERVICE_STATE_BUSY:
1084 case SERVICE_STATE_UNKNOWN:
1085 case SERVICE_STATE_DEAD:
1086 break;
1087 default:
1088 syslog(LOG_CRIT,
1089 "service %s/%s pid %d in ILLEGAL STATE: exited. Serious "
1090 "software bug or memory corruption detected!",
1091 s ? SERVICEPARAM(s->name) : "unknown",
1092 s ? SERVICEPARAM(s->familyname) : "unknown", pid);
1093 centry_set_state(c, SERVICE_STATE_UNKNOWN);
1094 }
1095 if (s) {
1096 /* update counters for known services */
1097 switch (c->service_state) {
1098 case SERVICE_STATE_READY:
1099 s->nactive--;
1100 s->ready_workers--;
1101 if (!in_shutdown && failed) {
1102 time_t now = time(NULL);
1103
1104 syslog(LOG_WARNING,
1105 "service %s/%s pid %d in READY state: "
1106 "terminated abnormally",
1107 SERVICEPARAM(s->name),
1108 SERVICEPARAM(s->familyname), pid);
1109 if (now - s->lastreadyfail > MAX_READY_FAIL_INTERVAL) {
1110 s->nreadyfails = 0;
1111 }
1112 s->lastreadyfail = now;
1113 if (++s->nreadyfails >= MAX_READY_FAILS && s->exec) {
1114 syslog(LOG_ERR, "too many failures for "
1115 "service %s/%s, disabling until next SIGHUP",
1116 SERVICEPARAM(s->name),
1117 SERVICEPARAM(s->familyname));
1118 service_forget_exec(s);
1119 xclose(s->socket);
1120 }
1121 }
1122 break;
1123
1124 case SERVICE_STATE_DEAD:
1125 /* uh? either we got duplicate signals, or we are now MT */
1126 syslog(LOG_WARNING,
1127 "service %s/%s pid %d in DEAD state: "
1128 "receiving duplicate signals",
1129 SERVICEPARAM(s->name),
1130 SERVICEPARAM(s->familyname), pid);
1131 break;
1132
1133 case SERVICE_STATE_BUSY:
1134 s->nactive--;
1135 if (!in_shutdown && failed) {
1136 syslog(LOG_DEBUG,
1137 "service %s/%s pid %d in BUSY state: "
1138 "terminated abnormally",
1139 SERVICEPARAM(s->name),
1140 SERVICEPARAM(s->familyname), pid);
1141 }
1142 break;
1143
1144 case SERVICE_STATE_UNKNOWN:
1145 s->nactive--;
1146 syslog(LOG_WARNING,
1147 "service %s/%s pid %d in UNKNOWN state: exited",
1148 SERVICEPARAM(s->name),
1149 SERVICEPARAM(s->familyname), pid);
1150 break;
1151 default:
1152 /* Shouldn't get here */
1153 break;
1154 }
1155 } else {
1156 /* children from spawn_schedule (events) or
1157 * children of services removed by reread_conf() */
1158 if (c->service_state != SERVICE_STATE_READY) {
1159 syslog(LOG_WARNING,
1160 "unknown service pid %d in state %d: exited "
1161 "(maybe using a service as an event, "
1162 "or a service was removed by SIGHUP?)",
1163 pid, c->service_state);
1164 }
1165 }
1166 centry_set_state(c, SERVICE_STATE_DEAD);
1167 } else {
1168 /* Are we multithreaded now? we don't know this child */
1169 syslog(LOG_ERR,
1170 "received SIGCHLD from unknown child pid %d, ignoring",
1171 pid);
1172 /* FIXME: is this something we should take lightly? */
1173 }
1174 if (verbose && c && (c->si != SERVICE_NONE))
1175 syslog(LOG_DEBUG, "service %s/%s now has %d ready workers",
1176 SERVICEPARAM(Services[c->si].name),
1177 SERVICEPARAM(Services[c->si].familyname),
1178 Services[c->si].ready_workers);
1179 }
1180 }
1181
init_janitor(struct timeval now)1182 static void init_janitor(struct timeval now)
1183 {
1184 struct event *evt = (struct event *) xzmalloc(sizeof(struct event));
1185
1186 janitor_mark = now;
1187 janitor_position = 0;
1188
1189 evt->name = xstrdup("janitor periodic wakeup call");
1190 evt->period = 10;
1191 evt->periodic = 1;
1192 evt->mark = janitor_mark;
1193 schedule_event(evt);
1194 }
1195
child_janitor(struct timeval now)1196 static void child_janitor(struct timeval now)
1197 {
1198 int i;
1199 struct centry **p;
1200 struct centry *c;
1201
1202 /* Estimate the number of entries to clean up in this sweep */
1203 if (now.tv_sec > janitor_mark.tv_sec + 1) {
1204 /* overflow protection */
1205 i = child_table_size;
1206 } else {
1207 double n;
1208
1209 n = child_table_size * janitor_frequency * timesub(&janitor_mark, &now);
1210 if (n < child_table_size) {
1211 i = n;
1212 } else {
1213 i = child_table_size;
1214 }
1215 }
1216
1217 while (i-- > 0) {
1218 p = &ctable[janitor_position++];
1219 janitor_position = janitor_position % child_table_size;
1220 while (*p) {
1221 c = *p;
1222 if (c->service_state == SERVICE_STATE_DEAD) {
1223 if (c->janitor_deadline < now.tv_sec) {
1224 *p = c->next;
1225 centry_free(c);
1226 } else {
1227 p = &((*p)->next);
1228 }
1229 } else {
1230 time_t delay = (c->sighuptime != (time_t)-1) ?
1231 time(NULL) - c->sighuptime : 0;
1232
1233 if (delay >= 30) {
1234 /* client not yet logged out ? */
1235 struct service *s = ((c->si) != SERVICE_NONE) ?
1236 &Services[c->si] : NULL;
1237
1238 syslog(LOG_INFO, "service %s/%s pid %d in state %d has not "
1239 "yet been recycled since SIGHUP was sent (%ds ago)",
1240 s ? SERVICEPARAM(s->name) : "unknown",
1241 s ? SERVICEPARAM(s->familyname) : "unknown",
1242 c->pid, c->service_state, (int)delay);
1243
1244 /* no need to log it more than once */
1245 c->sighuptime = (time_t)-1;
1246 }
1247 p = &((*p)->next);
1248 }
1249 }
1250 }
1251 }
1252
1253 /* Allow a clean shutdown on SIGQUIT, SIGTERM or SIGINT */
1254 static volatile sig_atomic_t gotsigquit = 0;
1255
sigquit_handler(int sig)1256 static void sigquit_handler(int sig __attribute__((unused)))
1257 {
1258 gotsigquit = 1;
1259 }
1260
begin_shutdown(void)1261 static void begin_shutdown(void)
1262 {
1263 /* Set a flag so main loop knows to shut down when
1264 all children have exited. Note, we will be called
1265 twice as we send SIGTERM to our own process group. */
1266 if (in_shutdown)
1267 return;
1268 in_shutdown = 1;
1269 syslog(LOG_INFO, "attempting clean shutdown on signal");
1270
1271 /* send our process group a SIGTERM */
1272 if (kill(0, SIGTERM) < 0) {
1273 syslog(LOG_ERR, "begin_shutdown: kill(0, SIGTERM): %m");
1274 }
1275 }
1276
1277 static volatile sig_atomic_t gotsigchld = 0;
1278
sigchld_handler(int sig)1279 static void sigchld_handler(int sig __attribute__((unused)))
1280 {
1281 gotsigchld = 1;
1282 }
1283
1284 static volatile int gotsighup = 0;
1285
sighup_handler(int sig)1286 static void sighup_handler(int sig __attribute__((unused)))
1287 {
1288 gotsighup = 1;
1289 }
1290
sigalrm_handler(int sig)1291 static void sigalrm_handler(int sig __attribute__((unused)))
1292 {
1293 return;
1294 }
1295
sighandler_setup(void)1296 static void sighandler_setup(void)
1297 {
1298 struct sigaction action;
1299 sigset_t siglist;
1300
1301 memset(&siglist, 0, sizeof(siglist));
1302 sigemptyset(&siglist);
1303 sigaddset(&siglist, SIGHUP);
1304 sigaddset(&siglist, SIGALRM);
1305 sigaddset(&siglist, SIGQUIT);
1306 sigaddset(&siglist, SIGTERM);
1307 sigaddset(&siglist, SIGINT);
1308 sigaddset(&siglist, SIGCHLD);
1309 sigprocmask(SIG_UNBLOCK, &siglist, NULL);
1310
1311 memset(&action, 0, sizeof(action));
1312 sigemptyset(&action.sa_mask);
1313
1314 action.sa_handler = sighup_handler;
1315 #ifdef SA_RESTART
1316 action.sa_flags |= SA_RESTART;
1317 #endif
1318 if (sigaction(SIGHUP, &action, NULL) < 0)
1319 fatalf(1, "unable to install signal handler for SIGHUP: %m");
1320
1321 action.sa_handler = sigalrm_handler;
1322 if (sigaction(SIGALRM, &action, NULL) < 0)
1323 fatalf(1, "unable to install signal handler for SIGALRM: %m");
1324
1325 /* Allow a clean shutdown on any of SIGQUIT, SIGINT or SIGTERM */
1326 action.sa_handler = sigquit_handler;
1327 if (sigaction(SIGQUIT, &action, NULL) < 0)
1328 fatalf(1, "unable to install signal handler for SIGQUIT: %m");
1329 if (sigaction(SIGTERM, &action, NULL) < 0)
1330 fatalf(1, "unable to install signal handler for SIGTERM: %m");
1331 if (sigaction(SIGINT, &action, NULL) < 0)
1332 fatalf(1, "unable to install signal handler for SIGINT: %m");
1333
1334 action.sa_flags |= SA_NOCLDSTOP;
1335 action.sa_handler = sigchld_handler;
1336 if (sigaction(SIGCHLD, &action, NULL) < 0)
1337 fatalf(1, "unable to install signal handler for SIGCHLD: %m");
1338
1339 #if HAVE_PSELECT
1340 /* block SIGCHLD, and set up pselect_sigmask so SIGCHLD
1341 * will be unblocked again inside pselect(). Ditto SIGQUIT. */
1342 sigemptyset(&siglist);
1343 sigaddset(&siglist, SIGCHLD);
1344 sigaddset(&siglist, SIGQUIT);
1345 sigaddset(&siglist, SIGINT);
1346 sigaddset(&siglist, SIGTERM);
1347 sigprocmask(SIG_BLOCK, &siglist, &pselect_sigmask);
1348 #endif
1349 }
1350
child_sighandler_setup(void)1351 static void child_sighandler_setup(void)
1352 {
1353 #if HAVE_PSELECT
1354 /*
1355 * We need to explicitly reset our SIGQUIT handler to the default
1356 * action. This happens at execv() time, but in the small window
1357 * between fork() and execv() any SIGQUIT signal delivered will be
1358 * caught, and the gotsigquit flag set, but that flag is then
1359 * completely ignored. Ditto SIGINT and SIGTERM.
1360 */
1361 struct sigaction action;
1362
1363 memset(&action, 0, sizeof(action));
1364 sigemptyset(&action.sa_mask);
1365 action.sa_handler = SIG_DFL;
1366 if (sigaction(SIGQUIT, &action, NULL) < 0) {
1367 syslog(LOG_ERR, "unable to remove signal handler for SIGQUIT: %m");
1368 exit(EX_TEMPFAIL);
1369 }
1370 if (sigaction(SIGINT, &action, NULL) < 0) {
1371 syslog(LOG_ERR, "unable to remove signal handler for SIGINT: %m");
1372 exit(EX_TEMPFAIL);
1373 }
1374 if (sigaction(SIGTERM, &action, NULL) < 0) {
1375 syslog(LOG_ERR, "unable to remove signal handler for SIGTERM: %m");
1376 exit(EX_TEMPFAIL);
1377 }
1378
1379 /* Unblock SIGCHLD et al in the child */
1380 sigprocmask(SIG_SETMASK, &pselect_sigmask, NULL);
1381 #endif
1382 }
1383
1384 /*
1385 * Receives a message from a service.
1386 *
1387 * Returns zero if all goes well
1388 * 1 if no msg available
1389 * 2 if bad message received (incorrectly sized)
1390 * -1 on error (errno set)
1391 *
1392 * TODO: should use retry_read() which has almost the
1393 * exact same semantics apart from the return value.
1394 */
read_msg(int fd,struct notify_message * msg)1395 static int read_msg(int fd, struct notify_message *msg)
1396 {
1397 ssize_t r = 0;
1398 size_t off = 0;
1399 int s = sizeof(struct notify_message);
1400
1401 while (s > 0) {
1402 do
1403 r = read(fd, ((char *)msg) + off, s);
1404 while ((r == -1) && (errno == EINTR));
1405 if (r <= 0) break;
1406 s -= r;
1407 off += r;
1408 }
1409 if ( ((r == 0) && (off == 0)) ||
1410 ((r == -1) && (errno == EAGAIN)) )
1411 return 1;
1412 if (r == -1) return -1;
1413 if (s != 0) return 2;
1414 return 0;
1415 }
1416
process_msg(int si,struct notify_message * msg)1417 static void process_msg(int si, struct notify_message *msg)
1418 {
1419 struct centry *c;
1420 /* si must NOT point to an invalid service */
1421 struct service *s = &Services[si];
1422
1423 c = centry_find(msg->service_pid);
1424
1425 /* Did we find it? */
1426 if (!c) {
1427 /* If we don't know about the child, that means it has expired from
1428 * the child list, due to large message delivery delays. This is
1429 * indeed possible, although it is rare (Debian bug report).
1430 *
1431 * Note that this analysis depends on master's single-threaded
1432 * nature */
1433 syslog(LOG_WARNING,
1434 "service %s/%s pid %d: receiving messages from long dead children",
1435 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), msg->service_pid);
1436 /* re-add child to list */
1437 c = centry_alloc();
1438 centry_set_name(c, "ZOMBIE", NULL, NULL);
1439 c->si = si;
1440 centry_set_state(c, SERVICE_STATE_DEAD);
1441 centry_add(c, msg->service_pid);
1442 }
1443
1444 /* paranoia */
1445 if (si != c->si) {
1446 syslog(LOG_ERR,
1447 "service %s/%s pid %d: changing from service %s/%s due to received message",
1448 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid,
1449 ((c->si != SERVICE_NONE) ? SERVICEPARAM(Services[c->si].name) : "unknown"),
1450 ((c->si != SERVICE_NONE) ? SERVICEPARAM(Services[c->si].familyname) : "unknown"));
1451 c->si = si;
1452 }
1453 switch (c->service_state) {
1454 case SERVICE_STATE_UNKNOWN:
1455 syslog(LOG_WARNING,
1456 "service %s/%s pid %d in UNKNOWN state: processing message 0x%x",
1457 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid, msg->message);
1458 break;
1459 case SERVICE_STATE_READY:
1460 case SERVICE_STATE_BUSY:
1461 case SERVICE_STATE_DEAD:
1462 break;
1463 default:
1464 syslog(LOG_CRIT,
1465 "service %s/%s pid %d in ILLEGAL state: detected. Serious software bug or memory corruption uncloaked while processing message 0x%x from child!",
1466 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid, msg->message);
1467 centry_set_state(c, SERVICE_STATE_UNKNOWN);
1468 break;
1469 }
1470
1471 /* process message, according to state machine */
1472 switch (msg->message) {
1473 case MASTER_SERVICE_AVAILABLE:
1474 switch (c->service_state) {
1475 case SERVICE_STATE_READY:
1476 /* duplicate message? */
1477 syslog(LOG_WARNING,
1478 "service %s/%s pid %d in READY state: sent available message but it is already ready",
1479 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1480 break;
1481
1482 case SERVICE_STATE_UNKNOWN:
1483 /* since state is unknown, error in non-DoS way, i.e.
1484 * we don't increment ready_workers */
1485 syslog(LOG_DEBUG,
1486 "service %s/%s pid %d in UNKNOWN state: now available and in READY state",
1487 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1488 centry_set_state(c, SERVICE_STATE_READY);
1489 break;
1490
1491 case SERVICE_STATE_BUSY:
1492 if (verbose)
1493 syslog(LOG_DEBUG,
1494 "service %s/%s pid %d in BUSY state: now available and in READY state",
1495 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1496 centry_set_state(c, SERVICE_STATE_READY);
1497 s->ready_workers++;
1498 break;
1499
1500 case SERVICE_STATE_DEAD:
1501 /* echoes from the past... just ignore */
1502 break;
1503
1504 default:
1505 /* Shouldn't get here */
1506 break;
1507 }
1508 break;
1509
1510 case MASTER_SERVICE_UNAVAILABLE:
1511 switch (c->service_state) {
1512 case SERVICE_STATE_BUSY:
1513 /* duplicate message? */
1514 syslog(LOG_WARNING,
1515 "service %s/%s pid %d in BUSY state: sent unavailable message but it is already busy",
1516 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1517 break;
1518
1519 case SERVICE_STATE_UNKNOWN:
1520 syslog(LOG_DEBUG,
1521 "service %s/%s pid %d in UNKNOWN state: now unavailable and in BUSY state",
1522 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1523 centry_set_state(c, SERVICE_STATE_BUSY);
1524 break;
1525
1526 case SERVICE_STATE_READY:
1527 if (verbose)
1528 syslog(LOG_DEBUG,
1529 "service %s/%s pid %d in READY state: now unavailable and in BUSY state",
1530 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1531 centry_set_state(c, SERVICE_STATE_BUSY);
1532 s->ready_workers--;
1533 break;
1534
1535 case SERVICE_STATE_DEAD:
1536 /* echoes from the past... just ignore */
1537 break;
1538
1539 default:
1540 /* Shouldn't get here */
1541 break;
1542 }
1543 break;
1544
1545 case MASTER_SERVICE_CONNECTION:
1546 switch (c->service_state) {
1547 case SERVICE_STATE_BUSY:
1548 s->nconnections++;
1549 if (verbose)
1550 syslog(LOG_DEBUG,
1551 "service %s/%s pid %d in BUSY state: now serving connection",
1552 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1553 break;
1554
1555 case SERVICE_STATE_UNKNOWN:
1556 s->nconnections++;
1557 centry_set_state(c, SERVICE_STATE_BUSY);
1558 syslog(LOG_DEBUG,
1559 "service %s/%s pid %d in UNKNOWN state: now in BUSY state and serving connection",
1560 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1561 break;
1562
1563 case SERVICE_STATE_READY:
1564 syslog(LOG_ERR,
1565 "service %s/%s pid %d in READY state: reported new connection, forced to BUSY state",
1566 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1567 /* be resilient on face of a bogon source, so lets err to the side
1568 * of non-denial-of-service */
1569 centry_set_state(c, SERVICE_STATE_BUSY);
1570 s->nconnections++;
1571 s->ready_workers--;
1572 break;
1573
1574 case SERVICE_STATE_DEAD:
1575 /* echoes from the past... do the accounting */
1576 s->nconnections++;
1577 break;
1578
1579 default:
1580 /* Shouldn't get here */
1581 break;
1582 }
1583 break;
1584
1585 case MASTER_SERVICE_CONNECTION_MULTI:
1586 switch (c->service_state) {
1587 case SERVICE_STATE_READY:
1588 s->nconnections++;
1589 if (verbose)
1590 syslog(LOG_DEBUG,
1591 "service %s/%s pid %d in READY state: serving one more multi-threaded connection",
1592 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1593 break;
1594
1595 case SERVICE_STATE_BUSY:
1596 syslog(LOG_ERR,
1597 "service %s/%s pid %d in BUSY state: serving one more multi-threaded connection, forced to READY state",
1598 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1599 /* be resilient on face of a bogon source, so lets err to the side
1600 * of non-denial-of-service */
1601 centry_set_state(c, SERVICE_STATE_READY);
1602 s->nconnections++;
1603 s->ready_workers++;
1604 break;
1605
1606 case SERVICE_STATE_UNKNOWN:
1607 s->nconnections++;
1608 centry_set_state(c, SERVICE_STATE_READY);
1609 syslog(LOG_ERR,
1610 "service %s/%s pid %d in UNKNOWN state: serving one more multi-threaded connection, forced to READY state",
1611 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid);
1612 break;
1613
1614 case SERVICE_STATE_DEAD:
1615 /* echoes from the past... do the accounting */
1616 s->nconnections++;
1617 break;
1618
1619 default:
1620 /* Shouldn't get here */
1621 break;
1622 }
1623 break;
1624
1625 default:
1626 syslog(LOG_CRIT, "service %s/%s pid %d: Software bug: unrecognized message 0x%x",
1627 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), c->pid, msg->message);
1628 break;
1629 }
1630
1631 if (verbose)
1632 syslog(LOG_DEBUG, "service %s/%s now has %d ready workers",
1633 SERVICEPARAM(s->name), SERVICEPARAM(s->familyname), s->ready_workers);
1634 }
1635
add_start(const char * name,struct entry * e,void * rock)1636 static void add_start(const char *name, struct entry *e,
1637 void *rock __attribute__((unused)))
1638 {
1639 const char *cmd = masterconf_getstring(e, "cmd", "");
1640 strarray_t *tok;
1641
1642 if (!strcmp(cmd,""))
1643 fatalf(EX_CONFIG, "unable to find command for %s", name);
1644
1645 tok = strarray_split(cmd, NULL, 0);
1646 run_startup(name, tok);
1647 strarray_free(tok);
1648 }
1649
add_daemon(const char * name,struct entry * e,void * rock)1650 static void add_daemon(const char *name, struct entry *e, void *rock)
1651 {
1652 int ignore_err = rock ? 1 : 0;
1653 char *cmd = xstrdup(masterconf_getstring(e, "cmd", ""));
1654 rlim_t maxfds = (rlim_t) masterconf_getint(e, "maxfds", 0);
1655 int maxforkrate = masterconf_getint(e, "maxforkrate", 0);
1656 int reconfig = 0;
1657 int i;
1658
1659 if (maxforkrate == 0) maxforkrate = 10; /* reasonable safety */
1660
1661 if (!strcmp(cmd, "")) {
1662 char buf[256];
1663 snprintf(buf, sizeof(buf),
1664 "unable to find command or port for service '%s'", name);
1665
1666 if (ignore_err) {
1667 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1668 goto done;
1669 }
1670
1671 fatal(buf, EX_CONFIG);
1672 }
1673
1674 /* see if we have an existing entry that can be reused */
1675 for (i = 0; i < nservices; i++) {
1676 /* skip non-primary instances */
1677 if (Services[i].associate > 0)
1678 continue;
1679
1680 if (!strcmpsafe(Services[i].name, name) && Services[i].exec) {
1681 /* we have duplicate service names in the config file */
1682 char buf[256];
1683 snprintf(buf, sizeof(buf), "multiple entries for service '%s'", name);
1684
1685 if (ignore_err) {
1686 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1687 goto done;
1688 }
1689
1690 fatal(buf, EX_CONFIG);
1691 }
1692
1693 /* must have empty/same service name, listen and proto */
1694 if (!Services[i].name || !strcmp(Services[i].name, name))
1695 break;
1696 }
1697
1698 if (i == nservices) {
1699 /* we don't have an existing one, so create a new service */
1700 struct service *s = service_add(NULL);
1701 gettimeofday(&s->last_interval_start, 0);
1702 }
1703 else reconfig = 1;
1704
1705 if (!Services[i].name) Services[i].name = xstrdup(name);
1706
1707 strarray_free(Services[i].exec);
1708 Services[i].exec = strarray_split(cmd, NULL, 0);
1709
1710 /* is this daemon actually there? */
1711 if (!verify_service_file(Services[i].exec)) {
1712 fatalf(EX_CONFIG,
1713 "cannot find executable for daemon '%s'", name);
1714 /* if it is not, we're misconfigured, die. */
1715 }
1716
1717 Services[i].maxforkrate = maxforkrate;
1718 Services[i].maxfds = maxfds;
1719 Services[i].babysit = 1;
1720 Services[i].max_workers = 1;
1721 Services[i].desired_workers = 1;
1722 Services[i].familyname = "daemon";
1723
1724 if (verbose > 2)
1725 syslog(LOG_DEBUG, "%s: daemon '%s' (%s, %d)",
1726 reconfig ? "reconfig" : "add",
1727 Services[i].name, cmd,
1728 (int) Services[i].maxfds);
1729
1730 done:
1731 free(cmd);
1732 return;
1733 }
1734
add_service(const char * name,struct entry * e,void * rock)1735 static void add_service(const char *name, struct entry *e, void *rock)
1736 {
1737 int ignore_err = rock ? 1 : 0;
1738 char *cmd = xstrdup(masterconf_getstring(e, "cmd", ""));
1739 int prefork = masterconf_getint(e, "prefork", 0);
1740 int babysit = masterconf_getswitch(e, "babysit", 0);
1741 int maxforkrate = masterconf_getint(e, "maxforkrate", 0);
1742 char *listen = xstrdup(masterconf_getstring(e, "listen", ""));
1743 char *proto = xstrdup(masterconf_getstring(e, "proto", "tcp"));
1744 char *max = xstrdup(masterconf_getstring(e, "maxchild", "-1"));
1745 rlim_t maxfds = (rlim_t) masterconf_getint(e, "maxfds", 0);
1746 int reconfig = 0;
1747 int i, j;
1748
1749 if(babysit && prefork == 0) prefork = 1;
1750 if(babysit && maxforkrate == 0) maxforkrate = 10; /* reasonable safety */
1751
1752 if (!strcmp(cmd,"") || !strcmp(listen,"")) {
1753 char buf[256];
1754 snprintf(buf, sizeof(buf),
1755 "unable to find command or port for service '%s'", name);
1756
1757 if (ignore_err) {
1758 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1759 goto done;
1760 }
1761
1762 fatal(buf, EX_CONFIG);
1763 }
1764
1765 /* see if we have an existing entry that can be reused */
1766 for (i = 0; i < nservices; i++) {
1767 /* skip non-primary instances */
1768 if (Services[i].associate > 0)
1769 continue;
1770
1771 if (!strcmpsafe(Services[i].name, name) && Services[i].exec) {
1772 /* we have duplicate service names in the config file */
1773 char buf[256];
1774 snprintf(buf, sizeof(buf), "multiple entries for service '%s'", name);
1775
1776 if (ignore_err) {
1777 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1778 goto done;
1779 }
1780
1781 fatal(buf, EX_CONFIG);
1782 }
1783
1784 /* must have empty/same service name, listen and proto */
1785 if ((!Services[i].name || !strcmp(Services[i].name, name)) &&
1786 (!Services[i].listen || !strcmp(Services[i].listen, listen)) &&
1787 (!Services[i].proto || !strcmp(Services[i].proto, proto)))
1788 break;
1789 }
1790
1791 if (i == nservices) {
1792 /* either we don't have an existing entry or we are changing
1793 * the port parameters, so create a new service
1794 */
1795 struct service *s = service_add(NULL);
1796 gettimeofday(&s->last_interval_start, 0);
1797 }
1798 else if (Services[i].listen) reconfig = 1;
1799
1800 if (!Services[i].name) Services[i].name = xstrdup(name);
1801 if (Services[i].listen) free(Services[i].listen);
1802 Services[i].listen = listen;
1803 listen = NULL; /* avoid freeing it */
1804 if (Services[i].proto) free(Services[i].proto);
1805 Services[i].proto = proto;
1806 proto = NULL; /* avoid freeing it */
1807
1808 strarray_free(Services[i].exec);
1809 Services[i].exec = strarray_split(cmd, NULL, 0);
1810
1811 /* is this service actually there? */
1812 if (!verify_service_file(Services[i].exec)) {
1813 fatalf(EX_CONFIG,
1814 "cannot find executable for service '%s'", name);
1815 /* if it is not, we're misconfigured, die. */
1816 }
1817
1818 Services[i].maxforkrate = maxforkrate;
1819 Services[i].maxfds = maxfds;
1820
1821 if (!strcmp(Services[i].proto, "tcp") ||
1822 !strcmp(Services[i].proto, "tcp4") ||
1823 !strcmp(Services[i].proto, "tcp6")) {
1824 Services[i].desired_workers = prefork;
1825 Services[i].babysit = babysit;
1826 Services[i].max_workers = atoi(max);
1827 if (Services[i].max_workers < 0) {
1828 Services[i].max_workers = INT_MAX;
1829 }
1830 } else {
1831 /* udp */
1832 if (prefork > 1) prefork = 1;
1833 Services[i].desired_workers = prefork;
1834 Services[i].max_workers = 1;
1835 }
1836
1837 if (reconfig) {
1838 /* reconfiguring an existing service, update any other instances */
1839 for (j = 0; j < nservices; j++) {
1840 if (Services[j].associate > 0 && Services[j].listen &&
1841 Services[j].name && !strcmp(Services[j].name, name)) {
1842 Services[j].maxforkrate = Services[i].maxforkrate;
1843 Services[j].exec = Services[i].exec;
1844 Services[j].desired_workers = Services[i].desired_workers;
1845 Services[j].babysit = Services[i].babysit;
1846 Services[j].max_workers = Services[i].max_workers;
1847 }
1848 }
1849 }
1850
1851 if (verbose > 2)
1852 syslog(LOG_DEBUG, "%s: service '%s' (%s, %s:%s, %d, %d, %d)",
1853 reconfig ? "reconfig" : "add",
1854 Services[i].name, cmd,
1855 Services[i].proto, Services[i].listen,
1856 Services[i].desired_workers,
1857 Services[i].max_workers,
1858 (int) Services[i].maxfds);
1859
1860 done:
1861 free(cmd);
1862 free(listen);
1863 free(proto);
1864 free(max);
1865 return;
1866 }
1867
add_event(const char * name,struct entry * e,void * rock)1868 static void add_event(const char *name, struct entry *e, void *rock)
1869 {
1870 int ignore_err = rock ? 1 : 0;
1871 /* Note: masterconf_getstring() shares a static buffer with
1872 * masterconf_getint() so we *must* strdup here */
1873 char *cmd = xstrdup(masterconf_getstring(e, "cmd", ""));
1874 int period = 60 * masterconf_getint(e, "period", 0);
1875 int at = masterconf_getint(e, "at", -1), hour, min;
1876 struct timeval now;
1877 struct event *evt;
1878
1879 gettimeofday(&now, 0);
1880
1881 if (!strcmp(cmd,"")) {
1882 char buf[256];
1883 snprintf(buf, sizeof(buf),
1884 "unable to find command or port for event '%s'", name);
1885
1886 if (ignore_err) {
1887 syslog(LOG_WARNING, "WARNING: %s -- ignored", buf);
1888 free(cmd);
1889 return;
1890 }
1891
1892 fatal(buf, EX_CONFIG);
1893 }
1894
1895 evt = (struct event *) xzmalloc(sizeof(struct event));
1896 evt->name = xstrdup(name);
1897
1898 if (at >= 0 && ((hour = at / 100) <= 23) && ((min = at % 100) <= 59)) {
1899 struct tm *tm = localtime(&now.tv_sec);
1900
1901 period = 86400; /* 24 hours */
1902 evt->periodic = 0;
1903 evt->hour = hour;
1904 evt->min = min;
1905 tm->tm_hour = hour;
1906 tm->tm_min = min;
1907 tm->tm_sec = 0;
1908 evt->mark.tv_sec = mktime(tm);
1909 evt->mark.tv_usec = 0;
1910 if (timesub(&now, &evt->mark) < 0.0) {
1911 /* already missed it, so schedule for next day */
1912 evt->mark.tv_sec += period;
1913 }
1914 }
1915 else {
1916 evt->periodic = 1;
1917 evt->mark = now;
1918 }
1919 evt->period = period;
1920
1921 evt->exec = strarray_splitm(cmd, NULL, 0);
1922
1923 schedule_event(evt);
1924 }
1925
1926 #ifdef HAVE_SETRLIMIT
1927
1928 #ifdef RLIMIT_NOFILE
1929 # define RLIMIT_NUMFDS RLIMIT_NOFILE
1930 #else
1931 # ifdef RLIMIT_OFILE
1932 # define RLIMIT_NUMFDS RLIMIT_OFILE
1933 # endif
1934 #endif
limit_fds(rlim_t x)1935 static void limit_fds(rlim_t x)
1936 {
1937 struct rlimit rl;
1938
1939 #ifdef HAVE_GETRLIMIT
1940 if (!getrlimit(RLIMIT_NUMFDS, &rl)) {
1941 if (x != RLIM_INFINITY && rl.rlim_max != RLIM_INFINITY && x > rl.rlim_max) {
1942 syslog(LOG_WARNING,
1943 "limit_fds: requested %" PRIu64 ", but capped to %" PRIu64,
1944 (uint64_t) x, (uint64_t) rl.rlim_max);
1945 }
1946 rl.rlim_cur = (x == RLIM_INFINITY || x > rl.rlim_max) ? rl.rlim_max : x;
1947 }
1948 else
1949 #endif /* HAVE_GETRLIMIT */
1950 {
1951 rl.rlim_cur = rl.rlim_max = x;
1952 }
1953
1954 if (verbose > 1) {
1955 syslog(LOG_DEBUG, "set maximum file descriptors to " RLIM_T_FMT "/" RLIM_T_FMT,
1956 rl.rlim_cur, rl.rlim_max);
1957 }
1958
1959 if (setrlimit(RLIMIT_NUMFDS, &rl) < 0) {
1960 syslog(LOG_ERR,
1961 "setrlimit: Unable to set file descriptors limit to " RLIM_T_FMT ": %m",
1962 rl.rlim_cur);
1963 }
1964 }
1965 #endif /* HAVE_SETRLIMIT */
1966
1967 /* minimal-dependency prometheus text report */
init_prom_report(struct timeval now)1968 static void init_prom_report(struct timeval now)
1969 {
1970 struct buf buf = BUF_INITIALIZER;
1971 struct event *evt;
1972 const char *tmp;
1973
1974 prom_enabled = config_getswitch(IMAPOPT_PROMETHEUS_ENABLED);
1975 prom_frequency = config_getduration(IMAPOPT_PROMETHEUS_UPDATE_FREQ, 's');
1976
1977 if (prom_frequency < 1) prom_enabled = 0;
1978 if (!prom_enabled) return;
1979
1980 prom_prev_report.tv_sec = now.tv_sec - prom_frequency; /* next report asap */
1981 prom_prev_report.tv_usec = 0;
1982
1983 if ((tmp = config_getstring(IMAPOPT_PROMETHEUS_STATS_DIR))) {
1984 if (tmp[0] == '/' && tmp[1] != '\0') {
1985 buf_setcstr(&buf, tmp);
1986 if (buf.s[buf.len-1] != '/')
1987 buf_putc(&buf, '/');
1988 buf_appendcstr(&buf, FNAME_PROM_MASTER_REPORT);
1989 }
1990 }
1991 else if ((tmp = config_getstring(IMAPOPT_CONFIGDIRECTORY))) {
1992 buf_setcstr(&buf, tmp);
1993 buf_appendcstr(&buf, FNAME_PROM_STATS_DIR);
1994 buf_putc(&buf, '/');
1995 buf_appendcstr(&buf, FNAME_PROM_MASTER_REPORT);
1996 }
1997
1998 if (!buf_len(&buf)) {
1999 syslog(LOG_NOTICE, "couldn't find somewhere to write prometheus report to"
2000 " - disabling master prometheus report until next reload");
2001 prom_enabled = 0;
2002 buf_free(&buf);
2003 return;
2004 }
2005
2006 if (prom_report_fname) free(prom_report_fname);
2007 prom_report_fname = buf_release(&buf);
2008 cyrus_mkdir(prom_report_fname, 0755);
2009
2010 evt = xzmalloc(sizeof(*evt));
2011 evt->name = xstrdup("master prometheus report periodic wakeup call");
2012 evt->period = prom_frequency;
2013 evt->periodic = 1;
2014 evt->mark = now;
2015 schedule_event(evt);
2016
2017 syslog(LOG_DEBUG, "updating %s every %d seconds", prom_report_fname, prom_frequency);
2018 }
2019
do_prom_report(struct timeval now)2020 static void do_prom_report(struct timeval now)
2021 {
2022 struct buf report = BUF_INITIALIZER;
2023 int fd, i, r;
2024 int64_t last_updated;
2025
2026 if (!prom_enabled || timesub(&prom_prev_report, &now) + 0.5 < prom_frequency)
2027 return;
2028
2029 /* open and grab the lock -- but if we would block, just skip this time */
2030 fd = open(prom_report_fname, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
2031 if (fd == -1) {
2032 syslog(LOG_ERR, "open(%s): %m - %s",
2033 prom_report_fname,
2034 "disabling master prometheus report until next reload");
2035 prom_enabled = 0;
2036 return;
2037 }
2038 r = lock_setlock(fd, /*ex*/ 1, /*nb*/ 1, prom_report_fname);
2039 if (r == -1) {
2040 if (errno != EWOULDBLOCK) {
2041 syslog(LOG_ERR, "lock_setlock(%s): %m - %s",
2042 prom_report_fname,
2043 "disabling master prometheus report until next reload");
2044 prom_enabled = 0;
2045 }
2046 return;
2047 }
2048
2049 /* okay, now prepare the report */
2050 syslog(LOG_DEBUG, "updating prometheus report for master process");
2051 last_updated = now_ms();
2052
2053 buf_printf(&report, "# HELP %s %s\n",
2054 "cyrus_master_ready_workers",
2055 "The number of ready workers");
2056 buf_appendcstr(&report, "# TYPE cyrus_master_ready_workers gauge\n");
2057 for (i = 0; i < nservices; i++) {
2058 const struct service *s = &Services[i];
2059 buf_printf(&report, "cyrus_master_ready_workers{service=\"%s\",family=\"%s\"}",
2060 s->name, s->familyname);
2061 buf_printf(&report, " %d %" PRId64 "\n",
2062 s->ready_workers, last_updated);
2063 }
2064
2065 buf_printf(&report, "# HELP %s %s\n",
2066 "cyrus_master_forks_total",
2067 "The number of children spawned");
2068 buf_appendcstr(&report, "# TYPE cyrus_master_forks_total counter\n");
2069 for (i = 0; i < nservices; i++) {
2070 const struct service *s = &Services[i];
2071 buf_printf(&report, "cyrus_master_forks_total{service=\"%s\",family=\"%s\"}",
2072 s->name, s->familyname);
2073 buf_printf(&report, " %d %" PRId64 "\n",
2074 s->nforks, last_updated);
2075 }
2076
2077 buf_printf(&report, "# HELP %s %s\n",
2078 "cyrus_master_active_children",
2079 "The number of children servicing clients");
2080 buf_appendcstr(&report, "# TYPE cyrus_master_active_children gauge\n");
2081 for (i = 0; i < nservices; i++) {
2082 const struct service *s = &Services[i];
2083 buf_printf(&report, "cyrus_master_active_children{service=\"%s\",family=\"%s\"}",
2084 s->name, s->familyname);
2085 buf_printf(&report, " %d %" PRId64 "\n",
2086 s->nactive, last_updated);
2087 }
2088
2089 buf_printf(&report, "# HELP %s %s\n",
2090 "cyrus_master_max_children",
2091 "The maximum number of child processes");
2092 buf_appendcstr(&report, "# TYPE cyrus_master_max_children gauge\n");
2093 for (i = 0; i < nservices; i++) {
2094 const struct service *s = &Services[i];
2095 buf_printf(&report, "cyrus_master_max_children{service=\"%s\",family=\"%s\"}",
2096 s->name, s->familyname);
2097 buf_printf(&report, " %d %" PRId64 "\n",
2098 s->max_workers, last_updated);
2099 }
2100
2101 /* XXX what is nconnections? */
2102
2103 buf_printf(&report, "# HELP %s %s\n",
2104 "cyrus_master_forks_per_second",
2105 "The rate at which we're spawning children");
2106 buf_appendcstr(&report, "# TYPE cyrus_master_forks_per_second gauge\n");
2107 for (i = 0; i < nservices; i++) {
2108 const struct service *s = &Services[i];
2109 buf_printf(&report, "cyrus_master_forks_per_second{service=\"%s\",family=\"%s\"}",
2110 s->name, s->familyname);
2111 buf_printf(&report, " %g %" PRId64 "\n",
2112 s->forkrate, last_updated);
2113 }
2114
2115 buf_printf(&report, "# HELP %s %s\n",
2116 "cyrus_master_max_forks_per_second",
2117 "The maximum rate at which we will spawn children");
2118 buf_appendcstr(&report, "# TYPE cyrus_master_max_forks_per_second gauge\n");
2119 for (i = 0; i < nservices; i++) {
2120 const struct service *s = &Services[i];
2121 buf_printf(&report, "cyrus_master_max_forks_per_second{service=\"%s\",family=\"%s\"}",
2122 s->name, s->familyname);
2123 buf_printf(&report, " %u %" PRId64 "\n",
2124 s->maxforkrate, last_updated);
2125 }
2126
2127 buf_printf(&report, "# HELP %s %s\n",
2128 "cyrus_master_ready_fails_total",
2129 "The number of failures in READY state");
2130 buf_appendcstr(&report, "# TYPE cyrus_master_ready_fails_total counter\n");
2131 for (i = 0; i < nservices; i++) {
2132 const struct service *s = &Services[i];
2133 buf_printf(&report, "cyrus_master_ready_fails_total{service=\"%s\",family=\"%s\"}",
2134 s->name, s->familyname);
2135 buf_printf(&report, " %d %" PRId64 "\n",
2136 s->nreadyfails, last_updated);
2137 }
2138
2139 /* write it out */
2140 retry_write(fd, buf_cstring(&report), buf_len(&report));
2141 ftruncate(fd, buf_len(&report));
2142 lock_unlock(fd, prom_report_fname);
2143 close(fd);
2144
2145 prom_prev_report = now;
2146
2147 buf_free(&report);
2148 }
2149
reread_conf(struct timeval now)2150 static void reread_conf(struct timeval now)
2151 {
2152 int i,j;
2153 struct event *ptr;
2154 struct centry *c;
2155
2156 /* disable all services -
2157 they will be re-enabled if they appear in config file */
2158 for (i = 0; i < nservices; i++) service_forget_exec(&Services[i]);
2159
2160 /* read services */
2161 masterconf_getsection("SERVICES", &add_service, (void*) 1);
2162 masterconf_getsection("DAEMON", &add_daemon, (void *)1);
2163
2164 for (i = 0; i < nservices; i++) {
2165 /* Send SIGHUP to all children:
2166 * - for services being added, there are still no children
2167 * - for services being disabled, we need to terminate the children
2168 * - otherwise (remaining services) we want to recycle children
2169 * Note that for services being disabled, it is important to first
2170 * signal them before shutting down their socket.
2171 */
2172 for (j = 0 ; j < child_table_size ; j++ ) {
2173 c = ctable[j];
2174 while (c != NULL) {
2175 if ((c->si == i) &&
2176 (c->service_state != SERVICE_STATE_DEAD)) {
2177 kill(c->pid, SIGHUP);
2178 c->sighuptime = time(NULL);
2179 }
2180 c = c->next;
2181 }
2182 }
2183
2184 if (!Services[i].exec && (Services[i].socket >= 0)) {
2185 /* cleanup newly disabled services */
2186
2187 if (verbose > 2)
2188 syslog(LOG_DEBUG, "disable: service %s/%s socket %d pipe %d %d",
2189 Services[i].name, Services[i].familyname,
2190 Services[i].socket,
2191 Services[i].stat[0], Services[i].stat[1]);
2192
2193 /* Only free the service info on the primary */
2194 if(Services[i].associate == 0) {
2195 free(Services[i].listen);
2196 free(Services[i].proto);
2197 }
2198 Services[i].listen = NULL;
2199 Services[i].proto = NULL;
2200 Services[i].desired_workers = 0;
2201
2202 /* close all listeners */
2203 shutdown(Services[i].socket, SHUT_RDWR);
2204 xclose(Services[i].socket);
2205 }
2206 else if (Services[i].exec && (Services[i].socket < 0)) {
2207 /* initialize new services */
2208
2209 service_create(&Services[i], 0);
2210 if (verbose > 2)
2211 syslog(LOG_DEBUG, "init: service %s/%s socket %d pipe %d %d",
2212 Services[i].name, Services[i].familyname,
2213 Services[i].socket,
2214 Services[i].stat[0], Services[i].stat[1]);
2215 }
2216 }
2217
2218 /* remove existing events */
2219 while (schedule) {
2220 ptr = schedule;
2221 schedule = schedule->next;
2222 event_free(ptr);
2223 }
2224 schedule = NULL;
2225
2226 /* read events */
2227 masterconf_getsection("EVENTS", &add_event, (void*) 1);
2228
2229 /* reinit child janitor */
2230 init_janitor(now);
2231
2232 /* reinit prom report */
2233 init_prom_report(now);
2234
2235 /* send some feedback to admin */
2236 syslog(LOG_NOTICE,
2237 "Services reconfigured. %d out of %d (max %d) services structures are now in use",
2238 nservices, allocservices, SERVICE_MAX);
2239 }
2240
main(int argc,char ** argv)2241 int main(int argc, char **argv)
2242 {
2243 static const char lock_suffix[] = ".lock";
2244
2245 const char *pidfile = MASTER_PIDFILE;
2246 char *pidfile_lock = NULL;
2247
2248 int startup_pipe[2] = { -1, -1 };
2249 int pidlock_fd = -1;
2250
2251 int i, opt, close_std = 1, daemon_mode = 0;
2252 const char *error_log = NULL;
2253 extern char *optarg;
2254
2255 char *alt_config = NULL;
2256
2257 int fd;
2258 fd_set rfds;
2259 char *p = NULL;
2260 int r = 0;
2261
2262 struct timeval now;
2263
2264 p = getenv("CYRUS_VERBOSE");
2265 if (p) verbose = atoi(p) + 1;
2266 while ((opt = getopt(argc, argv, "C:L:M:p:l:Ddj:vV")) != EOF) {
2267 switch (opt) {
2268 case 'C': /* alt imapd.conf file */
2269 alt_config = optarg;
2270 break;
2271 case 'M': /* alt cyrus.conf file */
2272 MASTER_CONFIG_FILENAME = optarg;
2273 break;
2274 case 'l':
2275 /* user defined listen queue backlog */
2276 listen_queue_backlog = atoi(optarg);
2277 break;
2278 case 'p':
2279 /* Set the pidfile name */
2280 pidfile = optarg;
2281 break;
2282 case 'd':
2283 /* Daemon Mode */
2284 daemon_mode = 1;
2285 break;
2286 case 'D':
2287 /* Debug Mode */
2288 close_std = 0;
2289 break;
2290 case 'L':
2291 /* error log */
2292 error_log = optarg;
2293 break;
2294 case 'j':
2295 /* Janitor frequency */
2296 janitor_frequency = atoi(optarg);
2297 if(janitor_frequency < 1)
2298 fatal("The janitor period must be at least 1 second", EX_CONFIG);
2299 break;
2300 case 'v':
2301 verbose++;
2302 break;
2303 case 'V':
2304 /* print version information and exit */
2305 printf("%s %s\n", PACKAGE_NAME, CYRUS_VERSION);
2306 return 0;
2307 default:
2308 break;
2309 }
2310 }
2311
2312 if (daemon_mode && !close_std)
2313 fatal("Unable to be both debug and daemon mode", EX_CONFIG);
2314
2315 /* we reserve fds for children to communicate with us, so they
2316 better be available. */
2317 for (fd = STATUS_FD; fd <= LISTEN_FD; fd++) {
2318 close(fd);
2319 if (dup(0) != fd) fatalf(2, "couldn't dup fd 0: %m");
2320 }
2321
2322 masterconf_init("master", alt_config);
2323
2324 if (close_std || error_log) {
2325 /* close stdin/out/err */
2326 for (fd = 0; fd < 3; fd++) {
2327 const char *file = (error_log && fd > 0 ?
2328 error_log : "/dev/null");
2329 int mode = (fd > 0 ? O_WRONLY : O_RDWR) |
2330 (error_log && fd > 0 ? O_CREAT|O_APPEND : 0);
2331 close(fd);
2332 if (open(file, mode, 0666) != fd)
2333 fatalf(2, "couldn't open %s: %m", file);
2334 }
2335 }
2336
2337 /* Pidfile Algorithm in Daemon Mode. This is a little subtle because
2338 * we want to ensure that we can report an error to our parent if the
2339 * child fails to lock the pidfile.
2340 *
2341 * [A] Create/lock pidfile.lock. If locked, exit(failure).
2342 * [A] Create a pipe
2343 * [A] Fork [B]
2344 * [A] Block on reading exit code from pipe
2345 * [B] Create/lock pidfile. If locked, write failure code to pipe and
2346 * exit(failure)
2347 * [B] write pid to pidfile
2348 * [B] write success code to pipe & finish starting up
2349 * [A] unlink pidfile.lock and exit(code read from pipe)
2350 *
2351 */
2352 if(daemon_mode) {
2353 /* Daemonize */
2354 pid_t pid = -1;
2355
2356 pidfile_lock = strconcat(pidfile, lock_suffix, (char *)NULL);
2357
2358 pidlock_fd = open(pidfile_lock, O_CREAT|O_TRUNC|O_RDWR, 0644);
2359 if(pidlock_fd == -1) {
2360 syslog(LOG_ERR, "can't open pidfile lock: %s (%m)", pidfile_lock);
2361 exit(EX_OSERR);
2362 } else {
2363 if(lock_nonblocking(pidlock_fd, pidfile)) {
2364 syslog(LOG_ERR, "can't get exclusive lock on %s",
2365 pidfile_lock);
2366 exit(EX_TEMPFAIL);
2367 }
2368 }
2369
2370 if(pipe(startup_pipe) == -1) {
2371 syslog(LOG_ERR, "can't create startup pipe (%m)");
2372 exit(EX_OSERR);
2373 }
2374
2375 /* Set the current working directory where cores can go to die. */
2376 const char *path = config_getstring(IMAPOPT_CONFIGDIRECTORY);
2377 if (path == NULL) {
2378 path = getenv("TMPDIR");
2379 if (path == NULL)
2380 path = "/tmp";
2381 }
2382 if (chdir(path))
2383 fatalf(2, "couldn't chdir to %s: %m", path);
2384 r = chdir("cores");
2385
2386 do {
2387 pid = fork();
2388
2389 if ((pid == -1) && (errno == EAGAIN)) {
2390 syslog(LOG_WARNING, "master fork failed (sleeping): %m");
2391 sleep(5);
2392 }
2393 } while ((pid == -1) && (errno == EAGAIN));
2394
2395 if (pid == -1) {
2396 fatal("fork error", EX_OSERR);
2397 } else if (pid != 0) {
2398 int exit_code;
2399
2400 /* Parent, wait for child */
2401 if(read(startup_pipe[0], &exit_code, sizeof(exit_code)) == -1) {
2402 syslog(LOG_ERR, "could not read from startup_pipe (%m)");
2403 unlink(pidfile_lock);
2404 exit(EX_OSERR);
2405 } else {
2406 unlink(pidfile_lock);
2407 exit(exit_code);
2408 }
2409 }
2410
2411 /* Child! */
2412 close(startup_pipe[0]);
2413
2414 free(pidfile_lock);
2415
2416 /*
2417 * We're now running in the child. Lose our controlling terminal
2418 * and obtain a new process group.
2419 */
2420 if (setsid() == -1) {
2421 int exit_result = EX_OSERR;
2422
2423 /* Tell our parent that we failed. */
2424 if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2425 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2426 }
2427
2428 fatal("setsid failure", EX_OSERR);
2429 }
2430 }
2431
2432 /* Write out the pidfile */
2433 pidfd = open(pidfile, O_CREAT|O_RDWR, 0644);
2434 if(pidfd == -1) {
2435 int exit_result = EX_OSERR;
2436
2437 syslog(LOG_ERR, "can't open pidfile: %m");
2438
2439 /* Tell our parent that we failed. */
2440 if (daemon_mode && write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2441 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2442 }
2443
2444 exit(EX_OSERR);
2445 } else {
2446 char buf[100];
2447
2448 if(lock_nonblocking(pidfd, pidfile)) {
2449 int exit_result = EX_OSERR;
2450
2451 /* Tell our parent that we failed. */
2452 if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2453 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2454 }
2455
2456 fatal("cannot get exclusive lock on pidfile (is another master still running?)", EX_OSERR);
2457 } else {
2458 int pidfd_flags = fcntl(pidfd, F_GETFD, 0);
2459 if (pidfd_flags != -1)
2460 pidfd_flags = fcntl(pidfd, F_SETFD,
2461 pidfd_flags | FD_CLOEXEC);
2462 if (pidfd_flags == -1) {
2463 int exit_result = EX_OSERR;
2464
2465 syslog(LOG_ERR, "unable to set close-on-exec for pidfile: %m");
2466
2467 /* Tell our parent that we failed. */
2468 if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2469 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2470 }
2471
2472 fatalf(EX_OSERR, "unable to set close-on-exec for pidfile (see syslog for details)");
2473 }
2474
2475 /* Write PID */
2476 snprintf(buf, sizeof(buf), "%lu\n", (unsigned long int)getpid());
2477 if(lseek(pidfd, 0, SEEK_SET) == -1 ||
2478 ftruncate(pidfd, 0) == -1 ||
2479 write(pidfd, buf, strlen(buf)) == -1) {
2480 int exit_result = EX_OSERR;
2481
2482 syslog(LOG_ERR, "unable to write to pidfile: %m");
2483
2484 /* Tell our parent that we failed. */
2485 if (daemon_mode && write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1) {
2486 syslog(LOG_ERR, "can't write to startup parent pipe: %m");
2487 }
2488
2489 fatalf(EX_OSERR, "unable to write to pidfile (see syslog for details)");
2490 }
2491 if (fsync(pidfd))
2492 fatalf(EX_OSERR, "unable to sync pidfile: %m");
2493 }
2494 }
2495
2496 if(daemon_mode) {
2497 int exit_result = 0;
2498
2499 /* success! */
2500 if (write(startup_pipe[1], &exit_result, sizeof(exit_result)) == -1)
2501 fatalf(EX_OSERR,
2502 "could not write success result to startup pipe (%m)");
2503
2504 close(startup_pipe[1]);
2505 xclose(pidlock_fd);
2506 }
2507
2508 syslog(LOG_DEBUG, "process started");
2509
2510 #if defined(__linux__) && defined(HAVE_LIBCAP)
2511 if (become_cyrus(/*is_master*/1) != 0) {
2512 syslog(LOG_ERR, "can't change to the cyrus user: %m");
2513 exit(1);
2514 }
2515 #endif
2516
2517 masterconf_getsection("START", &add_start, NULL);
2518 masterconf_getsection("SERVICES", &add_service, NULL);
2519 masterconf_getsection("EVENTS", &add_event, NULL);
2520 masterconf_getsection("DAEMON", &add_daemon, NULL);
2521
2522 /* set signal handlers */
2523 sighandler_setup();
2524
2525 /* initialize services */
2526 for (i = 0; i < nservices; i++) {
2527 service_create(&Services[i], 1);
2528 if (verbose > 2)
2529 syslog(LOG_DEBUG, "init: service %s/%s socket %d pipe %d %d",
2530 Services[i].name, Services[i].familyname,
2531 Services[i].socket,
2532 Services[i].stat[0], Services[i].stat[1]);
2533 }
2534
2535 #if !defined(__linux__) || !defined(HAVE_LIBCAP)
2536 if (become_cyrus(/*is_master*/1) != 0) {
2537 syslog(LOG_ERR, "can't change to the cyrus user: %m");
2538 exit(1);
2539 }
2540 #endif
2541
2542 /* init ctable janitor */
2543 gettimeofday(&now, 0);
2544 init_janitor(now);
2545
2546 /* init prom report */
2547 init_prom_report(now);
2548
2549 /* ok, we're going to start spawning like mad now */
2550 syslog(LOG_DEBUG, "ready for work");
2551
2552 for (;;) {
2553 int i, maxfd, ready_fds, total_children = 0;
2554 struct timeval tv, *tvptr;
2555 struct notify_message msg;
2556
2557 if (gotsigquit) {
2558 gotsigquit = 0;
2559 begin_shutdown();
2560 }
2561
2562 /* run any scheduled processes */
2563 if (!in_shutdown)
2564 spawn_schedule(now);
2565
2566 /* reap first, that way if we need to babysit we will */
2567 if (gotsigchld) {
2568 /* order matters here */
2569 gotsigchld = 0;
2570 reap_child();
2571 }
2572
2573 /* do we have any services undermanned? */
2574 for (i = 0; i < nservices; i++) {
2575 total_children += Services[i].nactive;
2576 if (!in_shutdown) {
2577 if (Services[i].exec /* enabled */ &&
2578 (Services[i].nactive < Services[i].max_workers) &&
2579 (Services[i].ready_workers < Services[i].desired_workers))
2580 {
2581 /* bring us up to desired_workers */
2582 int j = Services[i].desired_workers - Services[i].ready_workers;
2583
2584 if (verbose) {
2585 syslog(LOG_DEBUG, "service %s/%s needs %d more ready workers",
2586 Services[i].name, Services[i].familyname, j);
2587 }
2588
2589 while (j-- > 0) {
2590 spawn_service(i);
2591 }
2592 } else if (Services[i].exec
2593 && Services[i].babysit
2594 && Services[i].nactive == 0) {
2595 syslog(LOG_ERR,
2596 "lost all children for service: %s/%s. " \
2597 "Applying babysitter.",
2598 Services[i].name, Services[i].familyname);
2599 spawn_service(i);
2600 } else if (!Services[i].exec /* disabled */ &&
2601 Services[i].name /* not yet removed */ &&
2602 Services[i].nactive == 0) {
2603 if (verbose > 2)
2604 syslog(LOG_DEBUG, "remove: service %s/%s pipe %d %d",
2605 Services[i].name, Services[i].familyname,
2606 Services[i].stat[0], Services[i].stat[1]);
2607
2608 /* Only free the service info on the primary */
2609 if (Services[i].associate == 0) {
2610 free(Services[i].name);
2611 }
2612 Services[i].name = NULL;
2613 Services[i].nforks = 0;
2614 Services[i].nactive = 0;
2615 Services[i].nconnections = 0;
2616 Services[i].associate = 0;
2617
2618 xclose(Services[i].stat[0]);
2619 xclose(Services[i].stat[1]);
2620 }
2621 }
2622 }
2623
2624 if (in_shutdown && total_children == 0) {
2625 syslog(LOG_NOTICE, "All children have exited, closing down");
2626 exit(0);
2627 }
2628
2629 if (gotsighup) {
2630 syslog(LOG_NOTICE, "got SIGHUP");
2631 gotsighup = 0;
2632 reread_conf(now);
2633 }
2634
2635 FD_ZERO(&rfds);
2636 maxfd = 0;
2637 for (i = 0; i < nservices; i++) {
2638 int x = Services[i].stat[0];
2639
2640 int y = Services[i].socket;
2641
2642 /* messages */
2643 if (x >= 0) {
2644 if (verbose > 2)
2645 syslog(LOG_DEBUG, "listening for messages from %s/%s",
2646 Services[i].name, Services[i].familyname);
2647 FD_SET(x, &rfds);
2648 }
2649 if (x > maxfd) maxfd = x;
2650
2651 /* connections */
2652 if (y >= 0 && Services[i].ready_workers == 0 &&
2653 Services[i].nactive < Services[i].max_workers &&
2654 !service_is_fork_limited(&Services[i])) {
2655 if (verbose > 2)
2656 syslog(LOG_DEBUG, "listening for connections for %s/%s",
2657 Services[i].name, Services[i].familyname);
2658 FD_SET(y, &rfds);
2659 if (y > maxfd) maxfd = y;
2660 }
2661
2662 /* paranoia */
2663 if (Services[i].ready_workers < 0) {
2664 syslog(LOG_ERR, "%s/%s has %d workers?!?", Services[i].name,
2665 Services[i].familyname, Services[i].ready_workers);
2666 }
2667 }
2668 maxfd++; /* need 1 greater than maxfd */
2669
2670 int interrupted = 0;
2671 do {
2672 /* how long to wait? - do now so that any scheduled wakeup
2673 * calls get accounted for*/
2674 gettimeofday(&now, 0);
2675 tvptr = NULL;
2676 if (schedule && !in_shutdown) {
2677 double delay = timesub(&now, &schedule->mark);
2678 if (!interrupted && delay > 0.0) {
2679 timeval_set_double(&tv, delay);
2680 }
2681 else {
2682 tv.tv_sec = 0;
2683 tv.tv_usec = 0;
2684 }
2685 tvptr = &tv;
2686 }
2687
2688 errno = 0;
2689 ready_fds = myselect(maxfd, &rfds, NULL, NULL, tvptr);
2690
2691 if (ready_fds < 0) {
2692 switch (errno) {
2693 case EAGAIN:
2694 case EINTR:
2695 /* Try again to get valid rfds, this time without blocking so we
2696 * will definitely process messages without getting interrupted
2697 * again. */
2698 interrupted++;
2699 if (interrupted > 5) {
2700 syslog(LOG_WARNING, "Repeatedly interrupted, too many signals?");
2701 /* Fake a timeout */
2702 ready_fds = 0;
2703 FD_ZERO(&rfds);
2704 }
2705 break;
2706 default:
2707 /* uh oh */
2708 fatalf(1, "select failed: %m");
2709 }
2710 }
2711 } while (!in_shutdown && ready_fds < 0);
2712
2713 if (ready_fds > 0) {
2714 for (i = 0; i < nservices; i++) {
2715 int x = Services[i].stat[0];
2716 int y = Services[i].socket;
2717
2718 if ((x >= 0) && FD_ISSET(x, &rfds)) {
2719 while ((r = read_msg(x, &msg)) == 0)
2720 process_msg(i, &msg);
2721
2722 if (r == 2) {
2723 syslog(LOG_ERR,
2724 "got incorrectly sized response from child: %x", i);
2725 continue;
2726 }
2727 if (r < 0) {
2728 syslog(LOG_ERR,
2729 "error while receiving message from child %x: %m", i);
2730 continue;
2731 }
2732 }
2733
2734 if (!in_shutdown && Services[i].exec &&
2735 Services[i].nactive < Services[i].max_workers &&
2736 Services[i].ready_workers == 0 &&
2737 y >= 0 && FD_ISSET(y, &rfds))
2738 {
2739 /* huh, someone wants to talk to us */
2740 spawn_service(i);
2741 }
2742 }
2743 }
2744
2745 gettimeofday(&now, 0);
2746 child_janitor(now);
2747 do_prom_report(now);
2748 }
2749
2750 /* never reached */
2751 return r;
2752 }
2753