1 /* $NetBSD: statd.c,v 1.21 2002/11/08 00:16:39 fvdl Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Christos Zoulas. All rights reserved. 5 * Copyright (c) 1995 6 * A.R. Gordon (andrew.gordon@net-tel.co.uk). All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed for the FreeBSD project 19 * This product includes software developed by Christos Zoulas. 20 * 4. Neither the name of the author nor the names of any co-contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 */ 37 38 #include <sys/cdefs.h> 39 #ifndef lint 40 __RCSID("$NetBSD: statd.c,v 1.21 2002/11/08 00:16:39 fvdl Exp $"); 41 #endif 42 43 /* main() function for status monitor daemon. Some of the code in this */ 44 /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */ 45 /* The actual program logic is in the file procs.c */ 46 47 #include <sys/param.h> 48 #include <sys/wait.h> 49 50 #include <err.h> 51 #include <ctype.h> 52 #include <errno.h> 53 #include <fcntl.h> 54 #include <signal.h> 55 #include <stdio.h> 56 #include <stdlib.h> 57 #include <string.h> 58 #include <syslog.h> 59 #include <unistd.h> 60 #include <util.h> 61 #include <db.h> 62 #include <netconfig.h> 63 64 #include <rpc/rpc.h> 65 66 #include "statd.h" 67 68 struct sigaction sa; 69 int debug = 0; /* Controls syslog() for debug msgs */ 70 int _rpcsvcdirty = 0; /* XXX ??? */ 71 static DB *db; /* Database file */ 72 73 Header status_info; 74 75 static char undefdata[] = "\0\1\2\3\4\5\6\7"; 76 static DBT undefkey = { 77 undefdata, 78 sizeof(undefdata) 79 }; 80 81 82 /* statd.c */ 83 static int walk_one __P((int (*fun )__P ((DBT *, HostInfo *, void *)), DBT *, DBT *, void *)); 84 static int walk_db __P((int (*fun )__P ((DBT *, HostInfo *, void *)), void *)); 85 static int reset_host __P((DBT *, HostInfo *, void *)); 86 static int check_work __P((DBT *, HostInfo *, void *)); 87 static int unmon_host __P((DBT *, HostInfo *, void *)); 88 static int notify_one __P((DBT *, HostInfo *, void *)); 89 static void init_file __P((char *)); 90 static int notify_one_host __P((char *)); 91 static void die __P((int)) __attribute__((__noreturn__)); 92 93 int main __P((int, char **)); 94 95 int 96 main(argc, argv) 97 int argc; 98 char **argv; 99 { 100 int ch; 101 struct sigaction nsa; 102 int maxrec = RPC_MAXDATASIZE; 103 104 sigemptyset(&nsa.sa_mask); 105 nsa.sa_flags = SA_NOCLDSTOP|SA_NOCLDWAIT; 106 nsa.sa_handler = SIG_IGN; 107 (void)sigaction(SIGCHLD, &nsa, NULL); 108 109 while ((ch = getopt(argc, argv, "d")) != (-1)) { 110 switch (ch) { 111 case 'd': 112 debug = 1; 113 break; 114 default: 115 case '?': 116 (void)fprintf(stderr, "usage: %s [-d]\n", 117 getprogname()); 118 exit(1); 119 /* NOTREACHED */ 120 } 121 } 122 (void)rpcb_unset(SM_PROG, SM_VERS, NULL); 123 124 rpc_control(RPC_SVC_CONNMAXREC_SET, &maxrec); 125 126 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "udp")) { 127 errx(1, "cannot create udp service."); 128 /* NOTREACHED */ 129 } 130 if (!svc_create(sm_prog_1, SM_PROG, SM_VERS, "tcp")) { 131 errx(1, "cannot create udp service."); 132 /* NOTREACHED */ 133 } 134 135 init_file("/var/db/statd.status"); 136 137 /* 138 * Note that it is NOT sensible to run this program from inetd - the 139 * protocol assumes that it will run immediately at boot time. 140 */ 141 if (!debug) 142 daemon(0, 0); 143 pidfile(NULL); 144 openlog("rpc.statd", 0, LOG_DAEMON); 145 if (debug) 146 syslog(LOG_INFO, "Starting - debug enabled"); 147 else 148 syslog(LOG_INFO, "Starting"); 149 150 sa.sa_handler = die; 151 sa.sa_flags = 0; 152 sigemptyset(&sa.sa_mask); 153 (void)sigaction(SIGTERM, &sa, NULL); 154 (void)sigaction(SIGQUIT, &sa, NULL); 155 (void)sigaction(SIGHUP, &sa, NULL); 156 (void)sigaction(SIGINT, &sa, NULL); 157 158 sa.sa_handler = SIG_IGN; 159 sa.sa_flags = SA_RESTART; 160 sigemptyset(&sa.sa_mask); 161 sigaddset(&sa.sa_mask, SIGALRM); 162 163 /* Initialisation now complete - start operating */ 164 165 /* Notify hosts that need it */ 166 notify_handler(0); 167 168 while (1) 169 svc_run(); /* Should never return */ 170 die(0); 171 } 172 173 /* notify_handler ---------------------------------------------------------- */ 174 /* 175 * Purpose: Catch SIGALRM and collect process status 176 * Returns: Nothing. 177 * Notes: No special action required, other than to collect the 178 * process status and hence allow the child to die: 179 * we only use child processes for asynchronous transmission 180 * of SM_NOTIFY to other systems, so it is normal for the 181 * children to exit when they have done their work. 182 */ 183 void 184 notify_handler(sig) 185 int sig; 186 { 187 time_t now; 188 189 NO_ALARM; 190 sa.sa_handler = SIG_IGN; 191 (void)sigaction(SIGALRM, &sa, NULL); 192 193 now = time(NULL); 194 195 (void) walk_db(notify_one, &now); 196 197 if (walk_db(check_work, &now) == 0) { 198 /* 199 * No more work to be done. 200 */ 201 CLR_ALARM; 202 return; 203 } 204 sync_file(); 205 ALARM; 206 alarm(5); 207 } 208 209 /* sync_file --------------------------------------------------------------- */ 210 /* 211 * Purpose: Packaged call of msync() to flush changes to mmap()ed file 212 * Returns: Nothing. Errors to syslog. 213 */ 214 void 215 sync_file() 216 { 217 DBT data; 218 219 data.data = &status_info; 220 data.size = sizeof(status_info); 221 switch ((*db->put)(db, &undefkey, &data, 0)) { 222 case 0: 223 return; 224 case -1: 225 goto bad; 226 default: 227 abort(); 228 } 229 if ((*db->sync)(db, 0) == -1) { 230 bad: 231 syslog(LOG_ERR, "database corrupted %m"); 232 die(1); 233 } 234 } 235 236 /* change_host -------------------------------------------------------------- */ 237 /* 238 * Purpose: Update/Create an entry for host 239 * Returns: Nothing 240 * Notes: 241 * 242 */ 243 void 244 change_host(hostname, hp) 245 char *hostname; 246 HostInfo *hp; 247 { 248 DBT key, data; 249 char *ptr; 250 251 for (ptr = hostname; *ptr; ptr++) 252 if (isupper((unsigned char) *ptr)) 253 *ptr = tolower((unsigned char) *ptr); 254 255 key.data = hostname; 256 key.size = ptr - hostname + 1; 257 data.data = hp; 258 data.size = sizeof(*hp); 259 260 switch ((*db->put)(db, &key, &data, 0)) { 261 case -1: 262 syslog(LOG_ERR, "database corrupted %m"); 263 die(1); 264 case 0: 265 return; 266 default: 267 abort(); 268 } 269 } 270 271 272 /* find_host -------------------------------------------------------------- */ 273 /* 274 * Purpose: Find the entry in the status file for a given host 275 * Returns: Copy of entry in hd, or NULL 276 * Notes: 277 * 278 */ 279 HostInfo * 280 find_host(hostname, hp) 281 char *hostname; 282 HostInfo *hp; 283 { 284 DBT key, data; 285 char *ptr; 286 287 for (ptr = hostname; *ptr; ptr++) 288 if (isupper((unsigned char) *ptr)) 289 *ptr = tolower((unsigned char) *ptr); 290 291 key.data = hostname; 292 key.size = ptr - hostname + 1; 293 switch ((*db->get)(db, &key, &data, 0)) { 294 case 0: 295 if (data.size != sizeof(*hp)) 296 goto bad; 297 return memcpy(hp, data.data, sizeof(*hp)); 298 case 1: 299 return NULL; 300 case -1: 301 goto bad; 302 default: 303 abort(); 304 } 305 306 bad: 307 syslog(LOG_ERR, "Database corrupted %m"); 308 return NULL; 309 } 310 311 /* walk_one ------------------------------------------------------------- */ 312 /* 313 * Purpose: Call the given function if the element is valid 314 * Returns: Nothing - exits on error 315 * Notes: 316 */ 317 static int 318 walk_one(fun, key, data, ptr) 319 int (*fun) __P((DBT *, HostInfo *, void *)); 320 DBT *key, *data; 321 void *ptr; 322 { 323 HostInfo h; 324 if (key->size == undefkey.size && 325 memcmp(key->data, undefkey.data, key->size) == 0) 326 return 0; 327 if (data->size != sizeof(HostInfo)) { 328 syslog(LOG_ERR, "Bad data in database"); 329 die(1); 330 } 331 memcpy(&h, data->data, sizeof(h)); 332 return (*fun)(key, &h, ptr); 333 } 334 335 /* walk_db -------------------------------------------------------------- */ 336 /* 337 * Purpose: Iterate over all elements calling the given function 338 * Returns: -1 if function failed, 0 on success 339 * Notes: 340 */ 341 static int 342 walk_db(fun, ptr) 343 int (*fun) __P((DBT *, HostInfo *, void *)); 344 void *ptr; 345 { 346 DBT key, data; 347 348 switch ((*db->seq)(db, &key, &data, R_FIRST)) { 349 case -1: 350 goto bad; 351 case 1: 352 /* We should have at least the magic entry at this point */ 353 abort(); 354 case 0: 355 if (walk_one(fun, &key, &data, ptr) == -1) 356 return -1; 357 break; 358 default: 359 abort(); 360 } 361 362 363 for (;;) 364 switch ((*db->seq)(db, &key, &data, R_NEXT)) { 365 case -1: 366 goto bad; 367 case 0: 368 if (walk_one(fun, &key, &data, ptr) == -1) 369 return -1; 370 break; 371 case 1: 372 return 0; 373 default: 374 abort(); 375 } 376 bad: 377 syslog(LOG_ERR, "Corrupted database %m"); 378 die(1); 379 } 380 381 /* reset_host ------------------------------------------------------------ */ 382 /* 383 * Purpose: Clean up existing hosts in file. 384 * Returns: Always success 0. 385 * Notes: Clean-up of existing file - monitored hosts will have a 386 * pointer to a list of clients, which refers to memory in 387 * the previous incarnation of the program and so are 388 * meaningless now. These pointers are zeroed and the fact 389 * that the host was previously monitored is recorded by 390 * setting the notifyReqd flag, which will in due course 391 * cause a SM_NOTIFY to be sent. 392 * 393 * Note that if we crash twice in quick succession, some hosts 394 * may already have notifyReqd set, where we didn't manage to 395 * notify them before the second crash occurred. 396 */ 397 static int 398 reset_host(key, hi, ptr) 399 DBT *key; 400 HostInfo *hi; 401 void *ptr; 402 { 403 404 if (hi->monList) { 405 hi->notifyReqd = *(time_t *) ptr; 406 hi->attempts = 0; 407 hi->monList = NULL; 408 } 409 return 0; 410 } 411 412 /* check_work ------------------------------------------------------------ */ 413 /* 414 * Purpose: Check if there is work to be done. 415 * Returns: 0 if there is no work to be done -1 if there is. 416 * Notes: 417 */ 418 static int 419 check_work(key, hi, ptr) 420 DBT *key; 421 HostInfo *hi; 422 void *ptr; 423 { 424 return hi->notifyReqd ? -1 : 0; 425 } 426 427 /* unmon_host ------------------------------------------------------------ */ 428 /* 429 * Purpose: Unmonitor a host 430 * Returns: 0 431 * Notes: 432 */ 433 static int 434 unmon_host(key, hi, ptr) 435 DBT *key; 436 HostInfo *hi; 437 void *ptr; 438 { 439 char *name = key->data; 440 441 if (do_unmon(name, hi, ptr)) 442 change_host(name, hi); 443 return 0; 444 } 445 446 /* notify_one ------------------------------------------------------------ */ 447 /* 448 * Purpose: Notify one host. 449 * Returns: 0 if success -1 on failure 450 * Notes: 451 */ 452 static int 453 notify_one(key, hi, ptr) 454 DBT *key; 455 HostInfo *hi; 456 void *ptr; 457 { 458 time_t now = *(time_t *) ptr; 459 char *name = key->data; 460 DBT data; 461 462 if (hi->notifyReqd == 0 || hi->notifyReqd > now) 463 return 0; 464 465 if (notify_one_host(name)) { 466 give_up: 467 hi->notifyReqd = 0; 468 hi->attempts = 0; 469 data.data = hi; 470 data.size = sizeof(*hi); 471 switch ((*db->put)(db, key, &data, 0)) { 472 case -1: 473 syslog(LOG_ERR, "Error storing %s (%m)", name); 474 case 0: 475 return 0; 476 477 default: 478 abort(); 479 } 480 } 481 else { 482 /* 483 * If one of the initial attempts fails, we wait 484 * for a while and have another go. This is necessary 485 * because when we have crashed, (eg. a power outage) 486 * it is quite possible that we won't be able to 487 * contact all monitored hosts immediately on restart, 488 * either because they crashed too and take longer 489 * to come up (in which case the notification isn't 490 * really required), or more importantly if some 491 * router etc. needed to reach the monitored host 492 * has not come back up yet. In this case, we will 493 * be a bit late in re-establishing locks (after the 494 * grace period) but that is the best we can do. We 495 * try 10 times at 5 sec intervals, 10 more times at 496 * 1 minute intervals, then 24 more times at hourly 497 * intervals, finally giving up altogether if the 498 * host hasn't come back to life after 24 hours. 499 */ 500 if (hi->attempts++ >= 44) 501 goto give_up; 502 else if (hi->attempts < 10) 503 hi->notifyReqd += 5; 504 else if (hi->attempts < 20) 505 hi->notifyReqd += 60; 506 else 507 hi->notifyReqd += 60 * 60; 508 return -1; 509 } 510 } 511 512 /* init_file -------------------------------------------------------------- */ 513 /* 514 * Purpose: Open file, create if necessary, initialise it. 515 * Returns: Nothing - exits on error 516 * Notes: Called before process becomes daemon, hence logs to 517 * stderr rather than syslog. 518 * Opens the file, then mmap()s it for ease of access. 519 * Also performs initial clean-up of the file, zeroing 520 * monitor list pointers, setting the notifyReqd flag in 521 * all hosts that had a monitor list, and incrementing 522 * the state number to the next even value. 523 */ 524 static void 525 init_file(filename) 526 char *filename; 527 { 528 DBT data; 529 530 db = dbopen(filename, O_RDWR|O_CREAT|O_NDELAY|O_EXLOCK, 0644, DB_HASH, 531 NULL); 532 if (db == NULL) 533 err(1, "Cannot open `%s'", filename); 534 535 switch ((*db->get)(db, &undefkey, &data, 0)) { 536 case 1: 537 /* New database */ 538 (void)memset(&status_info, 0, sizeof(status_info)); 539 sync_file(); 540 return; 541 542 case -1: 543 err(1, "error accessing database (%m)"); 544 case 0: 545 /* Existing database */ 546 if (data.size != sizeof(status_info)) 547 errx(1, "database corrupted %lu != %lu", 548 (u_long)data.size, (u_long)sizeof(status_info)); 549 break; 550 default: 551 abort(); 552 } 553 554 reset_database(); 555 return; 556 } 557 558 /* reset_database --------------------------------------------------------- */ 559 /* 560 * Purpose: Clears the statd database 561 * Returns: Nothing 562 * Notes: If this is not called on reset, it will leak memory. 563 */ 564 void 565 reset_database() 566 { 567 time_t now = time(NULL); 568 walk_db(reset_host, &now); 569 570 /* Select the next higher even number for the state counter */ 571 status_info.ourState = 572 (status_info.ourState + 2) & 0xfffffffe; 573 status_info.ourState++; /* XXX - ??? */ 574 sync_file(); 575 } 576 577 /* unmon_hosts --------------------------------------------------------- */ 578 /* 579 * Purpose: Unmonitor all the hosts 580 * Returns: Nothing 581 * Notes: 582 */ 583 void 584 unmon_hosts() 585 { 586 time_t now = time(NULL); 587 walk_db(unmon_host, &now); 588 sync_file(); 589 } 590 591 static int 592 notify_one_host(hostname) 593 char *hostname; 594 { 595 struct timeval timeout = {20, 0}; /* 20 secs timeout */ 596 CLIENT *cli; 597 char dummy; 598 stat_chge arg; 599 char our_hostname[MAXHOSTNAMELEN + 1]; 600 601 gethostname(our_hostname, sizeof(our_hostname)); 602 our_hostname[sizeof(our_hostname) - 1] = '\0'; 603 our_hostname[SM_MAXSTRLEN] = '\0'; 604 arg.mon_name = our_hostname; 605 arg.state = status_info.ourState; 606 607 if (debug) 608 syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s", 609 hostname, our_hostname); 610 611 cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp"); 612 if (!cli) { 613 syslog(LOG_ERR, "Failed to contact host %s%s", hostname, 614 clnt_spcreateerror("")); 615 return (FALSE); 616 } 617 if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void, 618 &dummy, timeout) != RPC_SUCCESS) { 619 syslog(LOG_ERR, "Failed to contact rpc.statd at host %s", 620 hostname); 621 clnt_destroy(cli); 622 return (FALSE); 623 } 624 clnt_destroy(cli); 625 return (TRUE); 626 } 627 628 629 static void 630 die(n) 631 int n; 632 { 633 (*db->close)(db); 634 exit(n); 635 } 636