1 /* 2 * $Id: srvr_nfs.c,v 5.2 90/06/23 22:20:02 jsp Rel $ 3 * 4 * Copyright (c) 1990 Jan-Simon Pendry 5 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine 6 * Copyright (c) 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Jan-Simon Pendry at Imperial College, London. 11 * 12 * %sccs.include.redist.c% 13 * 14 * @(#)srvr_nfs.c 5.1 (Berkeley) 06/29/90 15 */ 16 17 /* 18 * NFS server modeling 19 */ 20 21 #include "am.h" 22 #include <netdb.h> 23 #include <rpc/pmap_prot.h> 24 #include "mount.h" 25 26 extern qelem nfs_srvr_list; 27 qelem nfs_srvr_list = { &nfs_srvr_list, &nfs_srvr_list }; 28 29 typedef struct nfs_private { 30 u_short np_mountd; /* Mount daemon port number */ 31 char np_mountd_inval; /* Port may be invalid */ 32 int np_ping; /* Number of failed ping attempts */ 33 time_t np_ttl; /* Time when server is thought dead */ 34 int np_xid; /* RPC transaction id for pings */ 35 int np_error; /* Error during portmap request */ 36 } nfs_private; 37 38 static int np_xid; /* For NFS pings */ 39 #define NPXID_ALLOC() (++np_xid) 40 /*#define NPXID_ALLOC() ((++np_xid&0x0fffffff) == 0 ? npxid_gc() : np_xid)*/ 41 42 /* 43 * Number of pings allowed to fail before host is declared down 44 * - three-fifths of the allowed mount time... 45 #define MAX_ALLOWED_PINGS ((((ALLOWED_MOUNT_TIME + 5 * AM_PINGER - 1) * 3) / 5) / AM_PINGER) 46 */ 47 #define MAX_ALLOWED_PINGS (3 + /* for luck ... */ 1) 48 49 /* 50 * How often to ping when starting a new server 51 */ 52 #define FAST_NFS_PING 3 53 54 #if (FAST_NFS_PING * MAX_ALLOWED_PINGS) >= ALLOWED_MOUNT_TIME 55 #error: sanity check failed 56 /* 57 you cannot do things this way... 58 sufficient fast pings must be given the chance to fail 59 within the allowed mount time 60 */ 61 #endif /* (FAST_NFS_PING * MAX_ALLOWED_PINGS) >= ALLOWED_MOUNT_TIME */ 62 63 static int ping_len; 64 static char ping_buf[sizeof(struct rpc_msg) + 32]; 65 66 /* 67 * Startup the NFS ping 68 */ 69 static void start_ping() 70 { 71 XDR ping_xdr; 72 struct rpc_msg ping_msg; 73 74 rpc_msg_init(&ping_msg, NFS_PROGRAM, NFS_VERSION, NFSPROC_NULL); 75 76 /* 77 * Create an XDR endpoint 78 */ 79 xdrmem_create(&ping_xdr, ping_buf, sizeof(ping_buf), XDR_ENCODE); 80 81 /* 82 * Create the NFS ping message 83 */ 84 if (!xdr_callmsg(&ping_xdr, &ping_msg)) { 85 plog(XLOG_ERROR, "Couldn't create ping RPC message"); 86 going_down(3); 87 } 88 89 /* 90 * Find out how long it is 91 */ 92 ping_len = xdr_getpos(&ping_xdr); 93 94 /* 95 * Destroy the XDR endpoint - we don't need it anymore 96 */ 97 xdr_destroy(&ping_xdr); 98 } 99 100 101 /* 102 * Called when a portmap reply arrives 103 */ 104 static void got_portmap(pkt, len, sa, ia, idv, done) 105 voidp pkt; 106 int len; 107 struct sockaddr_in *sa, *ia; 108 voidp idv; 109 int done; 110 { 111 fserver *fs2 = (fserver *) idv; 112 fserver *fs = 0; 113 ITER(fs, fserver, &nfs_srvr_list) 114 if (fs == fs2) 115 break; 116 117 if (fs == fs2) { 118 u_long port = 0; /* XXX - should be short but protocol is naff */ 119 int error = done ? pickup_rpc_reply(pkt, len, (voidp) &port, xdr_u_long) : -1; 120 nfs_private *np = (nfs_private *) fs->fs_private; 121 if (!error && port) { 122 #ifdef DEBUG 123 dlog("got port (%d) for mountd on %s", port, fs->fs_host); 124 #endif /* DEBUG */ 125 /* 126 * Grab the port number. Portmap sends back 127 * an unsigned long in native ordering, so it 128 * needs converting to a unsigned short in 129 * network ordering. 130 */ 131 np->np_mountd = htons((u_short) port); 132 np->np_mountd_inval = FALSE; 133 np->np_error = 0; 134 } else { 135 #ifdef DEBUG 136 dlog("Error fetching port for mountd on %s", fs->fs_host); 137 #endif /* DEBUG */ 138 /* 139 * Almost certainly no mountd running on remote host 140 */ 141 np->np_error = error ? error : ETIMEDOUT; 142 } 143 if (fs->fs_flags & FSF_WANT) 144 wakeup_srvr(fs); 145 } else if (done) { 146 #ifdef DEBUG 147 dlog("Got portmap for old port request"); 148 #endif /* DEBUG */ 149 } else { 150 #ifdef DEBUG 151 dlog("portmap request timed out"); 152 #endif /* DEBUG */ 153 } 154 } 155 156 /* 157 * Obtain portmap information 158 */ 159 static int call_portmap(fs, auth, prog, vers, prot) 160 fserver *fs; 161 AUTH *auth; 162 unsigned long prog, vers, prot; 163 { 164 struct rpc_msg pmap_msg; 165 int len; 166 char iobuf[UDPMSGSIZE]; 167 int error; 168 struct pmap pmap; 169 170 rpc_msg_init(&pmap_msg, PMAPPROG, PMAPVERS, (unsigned long) 0); 171 pmap.pm_prog = prog; 172 pmap.pm_vers = vers; 173 pmap.pm_prot = prot; 174 pmap.pm_port = 0; 175 len = make_rpc_packet(iobuf, sizeof(iobuf), PMAPPROC_GETPORT, 176 &pmap_msg, (voidp) &pmap, xdr_pmap, auth); 177 if (len > 0) { 178 struct sockaddr_in sin; 179 bzero((voidp) &sin, sizeof(sin)); 180 sin = *fs->fs_ip; 181 sin.sin_port = htons(PMAPPORT); 182 error = fwd_packet(RPC_XID_PORTMAP, (voidp) iobuf, len, 183 &sin, &sin, (voidp) fs, got_portmap); 184 } else { 185 error = -len; 186 } 187 return error; 188 } 189 190 static void nfs_keepalive P((fserver*)); 191 192 static void recompute_portmap P((fserver *fs)); 193 static void recompute_portmap(fs) 194 fserver *fs; 195 { 196 if (!nfs_auth) 197 nfs_auth = authunix_create_default(); 198 if (!nfs_auth) { 199 nfs_private *np = (nfs_private *) fs->fs_private; 200 np->np_error = ENOBUFS; 201 } else { 202 call_portmap(fs, nfs_auth, MOUNTPROG, 203 MOUNTVERS, (unsigned long) IPPROTO_UDP); 204 } 205 } 206 207 /* 208 * This is called when we get a reply to an RPC ping. 209 * The value of id was taken from the nfs_private 210 * structure when the ping was transmitted. 211 */ 212 /*ARGSUSED*/ 213 static void nfs_pinged(pkt, len, sp, tsp, idv, done) 214 voidp pkt; 215 int len; 216 struct sockaddr_in *sp, *tsp; 217 voidp idv; 218 int done; 219 { 220 int xid = (int) idv; 221 fserver *fs; 222 int found_map = 0; 223 224 if (!done) 225 return; 226 227 /* 228 * For each node... 229 */ 230 ITER(fs, fserver, &nfs_srvr_list) { 231 nfs_private *np = (nfs_private *) fs->fs_private; 232 if (np->np_xid == xid) { 233 /* 234 * Reset the ping counter. 235 * Update the keepalive timer. 236 * Log what happened. 237 */ 238 if (fs->fs_flags & FSF_DOWN) { 239 fs->fs_flags &= ~FSF_DOWN; 240 if (fs->fs_flags & FSF_VALID) { 241 srvrlog(fs, "is up"); 242 } else { 243 srvrlog(fs, "ok"); 244 fs->fs_flags |= FSF_VALID; 245 } 246 247 #ifdef notdef 248 /* why ??? */ 249 if (fs->fs_flags & FSF_WANT) 250 wakeup_srvr(fs); 251 #endif /* notdef */ 252 } else { 253 if (fs->fs_flags & FSF_VALID) { 254 #ifdef DEBUG 255 dlog("file server %s type nfs is still up", fs->fs_host); 256 #endif /* DEBUG */ 257 } else { 258 srvrlog(fs, "ok"); 259 fs->fs_flags |= FSF_VALID; 260 } 261 } 262 263 /* 264 * Adjust ping interval 265 */ 266 untimeout(fs->fs_cid); 267 fs->fs_cid = timeout(fs->fs_pinger, nfs_keepalive, (voidp) fs); 268 269 /* 270 * Update ttl for this server 271 */ 272 np->np_ttl = clocktime() + 273 (MAX_ALLOWED_PINGS - 1) * FAST_NFS_PING + fs->fs_pinger - 1; 274 275 /* 276 * New RPC xid... 277 */ 278 np->np_xid = NPXID_ALLOC(); 279 280 /* 281 * Failed pings is zero... 282 */ 283 np->np_ping = 0; 284 285 /* 286 * Recompute portmap information if not known 287 */ 288 if (np->np_mountd_inval) 289 recompute_portmap(fs); 290 291 found_map++; 292 break; 293 } 294 } 295 296 #ifdef DEBUG 297 if (found_map == 0) 298 dlog("Spurious ping packet"); 299 #endif /* DEBUG */ 300 } 301 302 /* 303 * Called when no ping-reply received 304 */ 305 static void nfs_timed_out P((fserver *fs)); 306 static void nfs_timed_out(fs) 307 fserver *fs; 308 { 309 nfs_private *np = (nfs_private *) fs->fs_private; 310 311 /* 312 * Not known to be up any longer 313 */ 314 if (FSRV_ISUP(fs)) { 315 fs->fs_flags &= ~FSF_VALID; 316 srvrlog(fs, "not responding"); 317 } 318 319 /* 320 * Another ping has failed 321 */ 322 np->np_ping++; 323 324 /* 325 * If ttl has expired then guess that it is dead 326 */ 327 if (np->np_ttl < clocktime()) { 328 if ((fs->fs_flags & FSF_DOWN) == 0) { 329 /* 330 * Server was up, but is now down. 331 */ 332 srvrlog(fs, "is down"); 333 fs->fs_flags |= FSF_DOWN|FSF_VALID; 334 if (fs->fs_flags & FSF_WANT) 335 wakeup_srvr(fs); 336 /* 337 * Since the server is down, the portmap 338 * information may now be wrong, so it 339 * must be flushed from the local cache 340 */ 341 flush_nfs_fhandle_cache(fs); 342 np->np_error = -1; 343 /* 344 * Pretend just one ping has failed now 345 */ 346 np->np_ping = 1; 347 } else { 348 /* 349 * Known to be down 350 */ 351 fs->fs_flags |= FSF_VALID; 352 } 353 } else { 354 #ifdef DEBUG 355 if (np->np_ping > 1) 356 dlog("%d pings to %s failed - at most %d allowed", np->np_ping, fs->fs_host, MAX_ALLOWED_PINGS); 357 #endif /* DEBUG */ 358 } 359 360 /* 361 * Run keepalive again 362 */ 363 nfs_keepalive(fs); 364 } 365 366 /* 367 * Keep track of whether a server is alive 368 */ 369 static void nfs_keepalive P((fserver *fs)); 370 static void nfs_keepalive(fs) 371 fserver *fs; 372 { 373 int error; 374 nfs_private *np = (nfs_private *) fs->fs_private; 375 int fstimeo = -1; 376 377 /* 378 * Send an NFS ping to this node 379 */ 380 381 if (ping_len == 0) 382 start_ping(); 383 384 /* 385 * Queue the packet... 386 */ 387 error = fwd_packet(MK_RPC_XID(RPC_XID_NFSPING, np->np_xid), (voidp) ping_buf, 388 ping_len, fs->fs_ip, (struct sockaddr_in *) 0, (voidp) np->np_xid, nfs_pinged); 389 390 /* 391 * See if a hard error occured 392 */ 393 switch (error) { 394 case ENETDOWN: 395 case ENETUNREACH: 396 case EHOSTDOWN: 397 case EHOSTUNREACH: 398 np->np_ping = MAX_ALLOWED_PINGS; /* immediately down */ 399 np->np_ttl = (time_t) 0; 400 /* 401 * This causes an immediate call to nfs_timed_out 402 * whenever the server was thought to be up. 403 * See +++ below. 404 */ 405 fstimeo = 0; 406 break; 407 408 case 0: 409 #ifdef DEBUG 410 dlog("Sent NFS ping to %s", fs->fs_host); 411 #endif /* DEBUG */ 412 break; 413 } 414 415 #ifdef DEBUG 416 /*dlog("keepalive, ping = %d", np->np_ping);*/ 417 #endif /* DEBUG */ 418 419 /* 420 * Back off the ping interval if we are not getting replies and 421 * the remote system is know to be down. 422 */ 423 switch (fs->fs_flags & (FSF_DOWN|FSF_VALID)) { 424 case FSF_VALID: /* Up */ 425 if (fstimeo < 0) /* +++ see above */ 426 fstimeo = FAST_NFS_PING; 427 break; 428 429 case FSF_VALID|FSF_DOWN: /* Down */ 430 fstimeo = fs->fs_pinger; 431 break; 432 433 default: /* Unknown */ 434 fstimeo = FAST_NFS_PING; 435 break; 436 } 437 438 #ifdef DEBUG 439 dlog("NFS timeout in %d seconds", fstimeo); 440 #endif /* DEBUG */ 441 442 fs->fs_cid = timeout(fstimeo, nfs_timed_out, (voidp) fs); 443 } 444 445 int nfs_srvr_port(fs, port, wchan) 446 fserver *fs; 447 u_short *port; 448 voidp wchan; 449 { 450 int error = -1; 451 if ((fs->fs_flags & FSF_VALID) == FSF_VALID) { 452 if ((fs->fs_flags & FSF_DOWN) == 0) { 453 nfs_private *np = (nfs_private *) fs->fs_private; 454 if (np->np_error == 0) { 455 *port = np->np_mountd; 456 /* 457 * Now go get it again in case it changed 458 */ 459 np->np_mountd_inval = TRUE; 460 error = 0; 461 } else { 462 if (np->np_error < 0) 463 recompute_portmap(fs); 464 error = np->np_error; 465 } 466 } else { 467 error = EWOULDBLOCK; 468 } 469 } 470 if (error < 0 && wchan && !(fs->fs_flags & FSF_WANT)) { 471 /* 472 * If a wait channel is supplied, and no 473 * error has yet occured, then arrange 474 * that a wakeup is done on the wait channel, 475 * whenever a wakeup is done on this fs node. 476 * Wakeup's are done on the fs node whenever 477 * it changes state - thus causing control to 478 * come back here and new, better things to happen. 479 */ 480 fs->fs_flags |= FSF_WANT; 481 sched_task(wakeup_task, wchan, (voidp) fs); 482 } 483 return error; 484 } 485 486 static void start_nfs_pings P((fserver *fs, int pingval)); 487 static void start_nfs_pings(fs, pingval) 488 fserver *fs; 489 int pingval; 490 { 491 if (!(fs->fs_flags & FSF_PINGING)) { 492 fs->fs_flags |= FSF_PINGING; 493 if (fs->fs_cid) 494 untimeout(fs->fs_cid); 495 if (pingval < 0) { 496 srvrlog(fs, "wired up"); 497 fs->fs_flags |= FSF_VALID; 498 fs->fs_flags &= ~FSF_DOWN; 499 } else { 500 nfs_keepalive(fs); 501 } 502 } else { 503 #ifdef DEBUG 504 dlog("Already running pings to %s", fs->fs_host); 505 #endif /* DEBUG */ 506 } 507 } 508 509 /* 510 * Find an nfs server for a host. 511 */ 512 fserver *find_nfs_srvr P((mntfs *mf)); 513 fserver *find_nfs_srvr(mf) 514 mntfs *mf; 515 { 516 fserver *fs; 517 struct hostent *hp = 0; 518 char *host = mf->mf_fo->opt_rhost; 519 struct sockaddr_in *ip; 520 nfs_private *np; 521 int pingval; 522 523 /* 524 * Get ping interval from mount options. 525 * Current only used to decide whether pings 526 * are required or not. < 0 = no pings. 527 */ 528 { struct mntent mnt; 529 mnt.mnt_opts = mf->mf_fo->opt_opts; 530 pingval = hasmntval(&mnt, "ping"); 531 #ifdef HAS_TCP_NFS 532 /* 533 * Over TCP mount, don't bother to do pings. 534 * This is experimental - maybe you want to 535 * do pings anyway... 536 */ 537 if (pingval == 0 && hasmntopt(&mnt, "tcp")) 538 pingval = -1; 539 #endif /* HAS_TCP_NFS */ 540 } 541 542 543 top: 544 /* 545 * Scan the list of known servers looking 546 * for one with the same name 547 */ 548 ITER(fs, fserver, &nfs_srvr_list) { 549 if (STREQ(host, fs->fs_host)) { 550 start_nfs_pings(fs, pingval); 551 fs->fs_refc++; 552 return fs; 553 } 554 } 555 556 /* 557 * If the name is not known, it may be 558 * because it was an alternate name for 559 * the same machine. So do a lookup and 560 * try again with the primary name if that 561 * is different. 562 * All that assuming it wasn't normalized 563 * earlier of course... 564 */ 565 if (hp == 0) { 566 hp = gethostbyname(host); 567 if (hp && !STREQ(host, hp->h_name) && !normalize_hosts) { 568 host = hp->h_name; 569 goto top; 570 } 571 } 572 573 /* 574 * Get here if we can't find an entry 575 */ 576 if (hp) { 577 switch (hp->h_addrtype) { 578 case AF_INET: 579 ip = ALLOC(sockaddr_in); 580 bzero((voidp) ip, sizeof(*ip)); 581 ip->sin_family = AF_INET; 582 ip->sin_addr = *(struct in_addr *) hp->h_addr; 583 ip->sin_port = htons(NFS_PORT); 584 break; 585 586 default: 587 ip = 0; 588 break; 589 } 590 } else { 591 ip = 0; 592 } 593 594 /* 595 * Allocate a new server 596 */ 597 fs = ALLOC(fserver); 598 fs->fs_refc = 1; 599 fs->fs_host = strdup(hp ? hp->h_name : "unknown_hostname"); 600 host_normalize(&fs->fs_host); 601 fs->fs_ip = ip; 602 fs->fs_cid = 0; 603 if (ip) { 604 fs->fs_flags = FSF_DOWN; /* Starts off down */ 605 } else { 606 fs->fs_flags = FSF_ERROR|FSF_VALID; 607 mf->mf_flags |= MFF_ERROR; 608 mf->mf_error = ENOENT; 609 } 610 fs->fs_type = "nfs"; 611 fs->fs_pinger = AM_PINGER; 612 np = ALLOC(nfs_private); 613 bzero((voidp) np, sizeof(*np)); 614 np->np_mountd_inval = TRUE; 615 np->np_xid = NPXID_ALLOC(); 616 np->np_error = -1; 617 /* 618 * Initially the server will be deemed dead after 619 * MAX_ALLOWED_PINGS of the fast variety have failed. 620 */ 621 np->np_ttl = clocktime() + MAX_ALLOWED_PINGS * FAST_NFS_PING - 1; 622 fs->fs_private = (voidp) np; 623 fs->fs_prfree = (void (*)()) free; 624 625 if (!(fs->fs_flags & FSF_ERROR)) { 626 /* 627 * Start of keepalive timer 628 */ 629 start_nfs_pings(fs, pingval); 630 } 631 632 /* 633 * Add to list of servers 634 */ 635 ins_que(&fs->fs_q, &nfs_srvr_list); 636 637 return fs; 638 } 639