1 /* $NetBSD: statd.c,v 1.20 2001/11/23 17:10:29 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Christos Zoulas. All rights reserved. 5 * Copyright (c) 1995 6 * A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed for the FreeBSD project 19 * This product includes software developed by Christos Zoulas. 20 * 4. Neither the name of the author nor the names of any co-contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 */ 37 38 #include <sys/cdefs.h> 39 #ifndef lint 40 __RCSID("$NetBSD: statd.c,v 1.20 2001/11/23 17:10:29 christos Exp $"); 41 #endif 42 43 /* main() function for status monitor daemon. Some of the code in this */ 44 /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */ 45 /* The actual program logic is in the file procs.c */ 46 47 #include <sys/param.h> 48 #include <sys/wait.h> 49 50 #include <err.h> 51 #include <ctype.h> 52 #include <errno.h> 53 #include <fcntl.h> 54 #include <signal.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <syslog.h> 59 #include <unistd.h> 60 #include <util.h> 61 #include <db.h> 62 #include <netconfig.h> 63 64 #include <rpc/rpc.h> 65 66 #include "statd.h" 67 68 struct sigaction sa; 69 int debug = 0; /* Controls syslog() for debug msgs */ 70 int _rpcsvcdirty = 0; /* XXX ??? */ 71 static DB *db; /* Database file */ 72 73 Header status_info; 74 75 static char undefdata[] = "\0\1\2\3\4\5\6\7"; 76 static DBT undefkey = { 77 undefdata, 78 sizeof(undefdata) 79 }; 80 81 82 /* statd.c */ 83 static int walk_one __P((int (*fun )__P ((DBT *, HostInfo *, void *)), DBT *, DBT *, void *)); 84 static int walk_db __P((int (*fun )__P ((DBT *, HostInfo *, void *)), void *)); 85 static int reset_host __P((DBT *, HostInfo *, void *)); 86 static int check_work __P((DBT *, HostInfo *, void *)); 87 static int unmon_host __P((DBT *, HostInfo *, void *)); 88 static int notify_one __P((DBT *, HostInfo *, void *)); 89 static void init_file __P((char *)); 90 static int notify_one_host __P((char *)); 91 static void die __P((int)) __attribute__((__noreturn__)); 92 93 int main __P((int, char **)); 94 95 int 96 main(argc, argv) 97 int argc; 98 char **argv; 99 { 100 int ch; 101 struct sigaction nsa; 102 103 sigemptyset(&nsa.sa_mask); 104 nsa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT; 105 nsa.sa_handler = SIG_IGN; 106 (void)sigaction(SIGCHLD, &nsa, NULL); 107 108 while ((ch = getopt(argc, argv, "d")) != (-1)) { 109 switch (ch) { 110 case 'd': 111 debug = 1; 112 break; 113 default: 114 case '?': 115 (void)fprintf(stderr, "usage: %s [-d]\n", 116 getprogname()); 117 exit(1); 118 /* NOTREACHED */ 119 } 120 } 121 (void)rpcb_unset(SM_PROG, SM_VERS, NULL); 122 123 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "udp")) { 124 errx(1, "cannot create udp service."); 125 /* NOTREACHED */ 126 } 127 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "tcp")) { 128 errx(1, "cannot create udp service."); 129 /* NOTREACHED */ 130 } 131 132 init_file("/var/db/statd.status"); 133 134 /* 135 * Note that it is NOT sensible to run this program from inetd - the 136 * protocol assumes that it will run immediately at boot time. 137 */ 138 if (!debug) 139 daemon(0, 0); 140 pidfile(NULL); 141 openlog("rpc.statd", 0, LOG_DAEMON); 142 if (debug) 143 syslog(LOG_INFO, "Starting - debug enabled"); 144 else 145 syslog(LOG_INFO, "Starting"); 146 147 sa.sa_handler = die; 148 sa.sa_flags = 0; 149 sigemptyset(&sa.sa_mask); 150 (void)sigaction(SIGTERM, &sa, NULL); 151 (void)sigaction(SIGQUIT, &sa, NULL); 152 (void)sigaction(SIGHUP, &sa, NULL); 153 (void)sigaction(SIGINT, &sa, NULL); 154 155 sa.sa_handler = SIG_IGN; 156 sa.sa_flags = SA_RESTART; 157 sigemptyset(&sa.sa_mask); 158 sigaddset(&sa.sa_mask, SIGALRM); 159 160 /* Initialisation now complete - start operating */ 161 162 /* Notify hosts that need it */ 163 notify_handler(0); 164 165 while (1) 166 svc_run(); /* Should never return */ 167 die(0); 168 } 169 170 /* notify_handler ---------------------------------------------------------- */ 171 /* 172 * Purpose: Catch SIGALRM and collect process status 173 * Returns: Nothing. 174 * Notes: No special action required, other than to collect the 175 * process status and hence allow the child to die: 176 * we only use child processes for asynchronous transmission 177 * of SM_NOTIFY to other systems, so it is normal for the 178 * children to exit when they have done their work. 179 */ 180 void 181 notify_handler(sig) 182 int sig; 183 { 184 time_t now; 185 186 NO_ALARM; 187 sa.sa_handler = SIG_IGN; 188 (void)sigaction(SIGALRM, &sa, NULL); 189 190 now = time(NULL); 191 192 (void) walk_db(notify_one, &now); 193 194 if (walk_db(check_work, &now) == 0) { 195 /* 196 * No more work to be done. 197 */ 198 CLR_ALARM; 199 return; 200 } 201 sync_file(); 202 ALARM; 203 alarm(5); 204 } 205 206 /* sync_file --------------------------------------------------------------- */ 207 /* 208 * Purpose: Packaged call of msync() to flush changes to mmap()ed file 209 * Returns: Nothing. Errors to syslog. 210 */ 211 void 212 sync_file() 213 { 214 DBT data; 215 216 data.data = &status_info; 217 data.size = sizeof(status_info); 218 switch ((*db->put)(db, &undefkey, &data, 0)) { 219 case 0: 220 return; 221 case -1: 222 goto bad; 223 default: 224 abort(); 225 } 226 if ((*db->sync)(db, 0) == -1) { 227 bad: 228 syslog(LOG_ERR, "database corrupted %m"); 229 die(1); 230 } 231 } 232 233 /* change_host -------------------------------------------------------------- */ 234 /* 235 * Purpose: Update/Create an entry for host 236 * Returns: Nothing 237 * Notes: 238 * 239 */ 240 void 241 change_host(hostname, hp) 242 char *hostname; 243 HostInfo *hp; 244 { 245 DBT key, data; 246 char *ptr; 247 248 for (ptr = hostname; *ptr; ptr++) 249 if (isupper((unsigned char) *ptr)) 250 *ptr = tolower((unsigned char) *ptr); 251 252 key.data = hostname; 253 key.size = ptr - hostname + 1; 254 data.data = hp; 255 data.size = sizeof(*hp); 256 257 switch ((*db->put)(db, &key, &data, 0)) { 258 case -1: 259 syslog(LOG_ERR, "database corrupted %m"); 260 die(1); 261 case 0: 262 return; 263 default: 264 abort(); 265 } 266 } 267 268 269 /* find_host -------------------------------------------------------------- */ 270 /* 271 * Purpose: Find the entry in the status file for a given host 272 * Returns: Copy of entry in hd, or NULL 273 * Notes: 274 * 275 */ 276 HostInfo * 277 find_host(hostname, hp) 278 char *hostname; 279 HostInfo *hp; 280 { 281 DBT key, data; 282 char *ptr; 283 284 for (ptr = hostname; *ptr; ptr++) 285 if (isupper((unsigned char) *ptr)) 286 *ptr = tolower((unsigned char) *ptr); 287 288 key.data = hostname; 289 key.size = ptr - hostname + 1; 290 switch ((*db->get)(db, &key, &data, 0)) { 291 case 0: 292 if (data.size != sizeof(*hp)) 293 goto bad; 294 return memcpy(hp, data.data, sizeof(*hp)); 295 case 1: 296 return NULL; 297 case -1: 298 goto bad; 299 default: 300 abort(); 301 } 302 303 bad: 304 syslog(LOG_ERR, "Database corrupted %m"); 305 return NULL; 306 } 307 308 /* walk_one ------------------------------------------------------------- */ 309 /* 310 * Purpose: Call the given function if the element is valid 311 * Returns: Nothing - exits on error 312 * Notes: 313 */ 314 static int 315 walk_one(fun, key, data, ptr) 316 int (*fun) __P((DBT *, HostInfo *, void *)); 317 DBT *key, *data; 318 void *ptr; 319 { 320 HostInfo h; 321 if (key->size == undefkey.size && 322 memcmp(key->data, undefkey.data, key->size) == 0) 323 return 0; 324 if (data->size != sizeof(HostInfo)) { 325 syslog(LOG_ERR, "Bad data in database"); 326 die(1); 327 } 328 memcpy(&h, data->data, sizeof(h)); 329 return (*fun)(key, &h, ptr); 330 } 331 332 /* walk_db -------------------------------------------------------------- */ 333 /* 334 * Purpose: Iterate over all elements calling the given function 335 * Returns: -1 if function failed, 0 on success 336 * Notes: 337 */ 338 static int 339 walk_db(fun, ptr) 340 int (*fun) __P((DBT *, HostInfo *, void *)); 341 void *ptr; 342 { 343 DBT key, data; 344 345 switch ((*db->seq)(db, &key, &data, R_FIRST)) { 346 case -1: 347 goto bad; 348 case 1: 349 /* We should have at least the magic entry at this point */ 350 abort(); 351 case 0: 352 if (walk_one(fun, &key, &data, ptr) == -1) 353 return -1; 354 break; 355 default: 356 abort(); 357 } 358 359 360 for (;;) 361 switch ((*db->seq)(db, &key, &data, R_NEXT)) { 362 case -1: 363 goto bad; 364 case 0: 365 if (walk_one(fun, &key, &data, ptr) == -1) 366 return -1; 367 break; 368 case 1: 369 return 0; 370 default: 371 abort(); 372 } 373 bad: 374 syslog(LOG_ERR, "Corrupted database %m"); 375 die(1); 376 } 377 378 /* reset_host ------------------------------------------------------------ */ 379 /* 380 * Purpose: Clean up existing hosts in file. 381 * Returns: Always success 0. 382 * Notes: Clean-up of existing file - monitored hosts will have a 383 * pointer to a list of clients, which refers to memory in 384 * the previous incarnation of the program and so are 385 * meaningless now. These pointers are zeroed and the fact 386 * that the host was previously monitored is recorded by 387 * setting the notifyReqd flag, which will in due course 388 * cause a SM_NOTIFY to be sent. 389 * 390 * Note that if we crash twice in quick succession, some hosts 391 * may already have notifyReqd set, where we didn't manage to 392 * notify them before the second crash occurred. 393 */ 394 static int 395 reset_host(key, hi, ptr) 396 DBT *key; 397 HostInfo *hi; 398 void *ptr; 399 { 400 401 if (hi->monList) { 402 hi->notifyReqd = *(time_t *) ptr; 403 hi->attempts = 0; 404 hi->monList = NULL; 405 } 406 return 0; 407 } 408 409 /* check_work ------------------------------------------------------------ */ 410 /* 411 * Purpose: Check if there is work to be done. 412 * Returns: 0 if there is no work to be done -1 if there is. 413 * Notes: 414 */ 415 static int 416 check_work(key, hi, ptr) 417 DBT *key; 418 HostInfo *hi; 419 void *ptr; 420 { 421 return hi->notifyReqd ? -1 : 0; 422 } 423 424 /* unmon_host ------------------------------------------------------------ */ 425 /* 426 * Purpose: Unmonitor a host 427 * Returns: 0 428 * Notes: 429 */ 430 static int 431 unmon_host(key, hi, ptr) 432 DBT *key; 433 HostInfo *hi; 434 void *ptr; 435 { 436 char *name = key->data; 437 438 if (do_unmon(name, hi, ptr)) 439 change_host(name, hi); 440 return 0; 441 } 442 443 /* notify_one ------------------------------------------------------------ */ 444 /* 445 * Purpose: Notify one host. 446 * Returns: 0 if success -1 on failure 447 * Notes: 448 */ 449 static int 450 notify_one(key, hi, ptr) 451 DBT *key; 452 HostInfo *hi; 453 void *ptr; 454 { 455 time_t now = *(time_t *) ptr; 456 char *name = key->data; 457 DBT data; 458 459 if (hi->notifyReqd == 0 || hi->notifyReqd > now) 460 return 0; 461 462 if (notify_one_host(name)) { 463 give_up: 464 hi->notifyReqd = 0; 465 hi->attempts = 0; 466 data.data = hi; 467 data.size = sizeof(*hi); 468 switch ((*db->put)(db, key, &data, 0)) { 469 case -1: 470 syslog(LOG_ERR, "Error storing %s (%m)", name); 471 case 0: 472 return 0; 473 474 default: 475 abort(); 476 } 477 } 478 else { 479 /* 480 * If one of the initial attempts fails, we wait 481 * for a while and have another go. This is necessary 482 * because when we have crashed, (eg. a power outage) 483 * it is quite possible that we won't be able to 484 * contact all monitored hosts immediately on restart, 485 * either because they crashed too and take longer 486 * to come up (in which case the notification isn't 487 * really required), or more importantly if some 488 * router etc. needed to reach the monitored host 489 * has not come back up yet. In this case, we will 490 * be a bit late in re-establishing locks (after the 491 * grace period) but that is the best we can do. We 492 * try 10 times at 5 sec intervals, 10 more times at 493 * 1 minute intervals, then 24 more times at hourly 494 * intervals, finally giving up altogether if the 495 * host hasn't come back to life after 24 hours. 496 */ 497 if (hi->attempts++ >= 44) 498 goto give_up; 499 else if (hi->attempts < 10) 500 hi->notifyReqd += 5; 501 else if (hi->attempts < 20) 502 hi->notifyReqd += 60; 503 else 504 hi->notifyReqd += 60 * 60; 505 return -1; 506 } 507 } 508 509 /* init_file -------------------------------------------------------------- */ 510 /* 511 * Purpose: Open file, create if necessary, initialise it. 512 * Returns: Nothing - exits on error 513 * Notes: Called before process becomes daemon, hence logs to 514 * stderr rather than syslog. 515 * Opens the file, then mmap()s it for ease of access. 516 * Also performs initial clean-up of the file, zeroing 517 * monitor list pointers, setting the notifyReqd flag in 518 * all hosts that had a monitor list, and incrementing 519 * the state number to the next even value. 520 */ 521 static void 522 init_file(filename) 523 char *filename; 524 { 525 DBT data; 526 527 db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 0644, DB_HASH, 528 NULL); 529 if (db == NULL) 530 err(1, "Cannot open `%s'", filename); 531 532 switch ((*db->get)(db, &undefkey, &data, 0)) { 533 case 1: 534 /* New database */ 535 (void)memset(&status_info, 0, sizeof(status_info)); 536 sync_file(); 537 return; 538 539 case -1: 540 err(1, "error accessing database (%m)"); 541 case 0: 542 /* Existing database */ 543 if (data.size != sizeof(status_info)) 544 errx(1, "database corrupted %lu != %lu", 545 (u_long)data.size, (u_long)sizeof(status_info)); 546 break; 547 default: 548 abort(); 549 } 550 551 reset_database(); 552 return; 553 } 554 555 /* reset_database --------------------------------------------------------- */ 556 /* 557 * Purpose: Clears the statd database 558 * Returns: Nothing 559 * Notes: If this is not called on reset, it will leak memory. 560 */ 561 void 562 reset_database() 563 { 564 time_t now = time(NULL); 565 walk_db(reset_host, &now); 566 567 /* Select the next higher even number for the state counter */ 568 status_info.ourState = 569 (status_info.ourState + 2) & 0xfffffffe; 570 status_info.ourState++; /* XXX - ??? */ 571 sync_file(); 572 } 573 574 /* unmon_hosts --------------------------------------------------------- */ 575 /* 576 * Purpose: Unmonitor all the hosts 577 * Returns: Nothing 578 * Notes: 579 */ 580 void 581 unmon_hosts() 582 { 583 time_t now = time(NULL); 584 walk_db(unmon_host, &now); 585 sync_file(); 586 } 587 588 static int 589 notify_one_host(hostname) 590 char *hostname; 591 { 592 struct timeval timeout = {20, 0}; /* 20 secs timeout */ 593 CLIENT *cli; 594 char dummy; 595 stat_chge arg; 596 char our_hostname[MAXHOSTNAMELEN + 1]; 597 598 gethostname(our_hostname, sizeof(our_hostname)); 599 our_hostname[sizeof(our_hostname) - 1] = '\0'; 600 our_hostname[SM_MAXSTRLEN] = '\0'; 601 arg.mon_name = our_hostname; 602 arg.state = status_info.ourState; 603 604 if (debug) 605 syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s", 606 hostname, our_hostname); 607 608 cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp"); 609 if (!cli) { 610 syslog(LOG_ERR, "Failed to contact host %s%s", hostname, 611 clnt_spcreateerror("")); 612 return (FALSE); 613 } 614 if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void, 615 &dummy, timeout) != RPC_SUCCESS) { 616 syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", 617 hostname); 618 clnt_destroy(cli); 619 return (FALSE); 620 } 621 clnt_destroy(cli); 622 return (TRUE); 623 } 624 625 626 static void 627 die(n) 628 int n; 629 { 630 (*db->close)(db); 631 exit(n); 632 } 633