1 /* 2 * Copyright (c) 1989, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_socket.c 8.5 (Berkeley) 03/30/95 11 */ 12 13 /* 14 * Socket operations for use by nfs 15 */ 16 17 #include <sys/param.h> 18 #include <sys/systm.h> 19 #include <sys/proc.h> 20 #include <sys/mount.h> 21 #include <sys/kernel.h> 22 #include <sys/mbuf.h> 23 #include <sys/vnode.h> 24 #include <sys/domain.h> 25 #include <sys/protosw.h> 26 #include <sys/socket.h> 27 #include <sys/socketvar.h> 28 #include <sys/syslog.h> 29 #include <sys/tprintf.h> 30 31 #include <netinet/in.h> 32 #include <netinet/tcp.h> 33 34 #include <nfs/rpcv2.h> 35 #include <nfs/nfsproto.h> 36 #include <nfs/nfs.h> 37 #include <nfs/xdr_subs.h> 38 #include <nfs/nfsm_subs.h> 39 #include <nfs/nfsmount.h> 40 #include <nfs/nfsnode.h> 41 #include <nfs/nfsrtt.h> 42 #include <nfs/nqnfs.h> 43 44 #define TRUE 1 45 #define FALSE 0 46 47 /* 48 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 49 * Use the mean and mean deviation of rtt for the appropriate type of rpc 50 * for the frequent rpcs and a default for the others. 51 * The justification for doing "other" this way is that these rpcs 52 * happen so infrequently that timer est. would probably be stale. 53 * Also, since many of these rpcs are 54 * non-idempotent, a conservative timeout is desired. 55 * getattr, lookup - A+2D 56 * read, write - A+4D 57 * other - nm_timeo 58 */ 59 #define NFS_RTO(n, t) \ 60 ((t) == 0 ? (n)->nm_timeo : \ 61 ((t) < 3 ? \ 62 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 63 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 64 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 65 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 66 /* 67 * External data, mostly RPC constants in XDR form 68 */ 69 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 70 rpc_msgaccepted, rpc_call, rpc_autherr, 71 rpc_auth_kerb; 72 extern u_long nfs_prog, nqnfs_prog; 73 extern time_t nqnfsstarttime; 74 extern struct nfsstats nfsstats; 75 extern int nfsv3_procid[NFS_NPROCS]; 76 extern int nfs_ticks; 77 78 /* 79 * Defines which timer to use for the procnum. 80 * 0 - default 81 * 1 - getattr 82 * 2 - lookup 83 * 3 - read 84 * 4 - write 85 */ 86 static int proct[NFS_NPROCS] = { 87 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 88 0, 0, 0, 89 }; 90 91 /* 92 * There is a congestion window for outstanding rpcs maintained per mount 93 * point. The cwnd size is adjusted in roughly the way that: 94 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 95 * SIGCOMM '88". ACM, August 1988. 96 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 97 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 98 * of rpcs is in progress. 99 * (The sent count and cwnd are scaled for integer arith.) 100 * Variants of "slow start" were tried and were found to be too much of a 101 * performance hit (ave. rtt 3 times larger), 102 * I suspect due to the large rtt that nfs rpcs have. 103 */ 104 #define NFS_CWNDSCALE 256 105 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 106 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 107 int nfs_sbwait(); 108 void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 109 void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease(); 110 struct mbuf *nfsm_rpchead(); 111 int nfsrtton = 0; 112 struct nfsrtt nfsrtt; 113 114 int nfsrv_null(), 115 nfsrv_getattr(), 116 nfsrv_setattr(), 117 nfsrv_lookup(), 118 nfsrv3_access(), 119 nfsrv_readlink(), 120 nfsrv_read(), 121 nfsrv_write(), 122 nfsrv_create(), 123 nfsrv_mknod(), 124 nfsrv_remove(), 125 nfsrv_rename(), 126 nfsrv_link(), 127 nfsrv_symlink(), 128 nfsrv_mkdir(), 129 nfsrv_rmdir(), 130 nfsrv_readdir(), 131 nfsrv_readdirplus(), 132 nfsrv_statfs(), 133 nfsrv_fsinfo(), 134 nfsrv_pathconf(), 135 nfsrv_commit(), 136 nfsrv_noop(), 137 nqnfsrv_getlease(), 138 nqnfsrv_vacated(); 139 140 int (*nfsrv3_procs[NFS_NPROCS])() = { 141 nfsrv_null, 142 nfsrv_getattr, 143 nfsrv_setattr, 144 nfsrv_lookup, 145 nfsrv3_access, 146 nfsrv_readlink, 147 nfsrv_read, 148 nfsrv_write, 149 nfsrv_create, 150 nfsrv_mkdir, 151 nfsrv_symlink, 152 nfsrv_mknod, 153 nfsrv_remove, 154 nfsrv_rmdir, 155 nfsrv_rename, 156 nfsrv_link, 157 nfsrv_readdir, 158 nfsrv_readdirplus, 159 nfsrv_statfs, 160 nfsrv_fsinfo, 161 nfsrv_pathconf, 162 nfsrv_commit, 163 nqnfsrv_getlease, 164 nqnfsrv_vacated, 165 nfsrv_noop, 166 nfsrv_noop 167 }; 168 169 /* 170 * Initialize sockets and congestion for a new NFS connection. 171 * We do not free the sockaddr if error. 172 */ 173 int 174 nfs_connect(nmp, rep) 175 register struct nfsmount *nmp; 176 struct nfsreq *rep; 177 { 178 register struct socket *so; 179 int s, error, rcvreserve, sndreserve; 180 struct sockaddr *saddr; 181 struct sockaddr_in *sin; 182 struct mbuf *m; 183 u_short tport; 184 185 nmp->nm_so = (struct socket *)0; 186 saddr = mtod(nmp->nm_nam, struct sockaddr *); 187 error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype, 188 nmp->nm_soproto); 189 if (error) 190 goto bad; 191 so = nmp->nm_so; 192 nmp->nm_soflags = so->so_proto->pr_flags; 193 194 /* 195 * Some servers require that the client port be a reserved port number. 196 */ 197 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { 198 MGET(m, M_WAIT, MT_SONAME); 199 sin = mtod(m, struct sockaddr_in *); 200 sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 201 sin->sin_family = AF_INET; 202 sin->sin_addr.s_addr = INADDR_ANY; 203 tport = IPPORT_RESERVED - 1; 204 sin->sin_port = htons(tport); 205 while ((error = sobind(so, m)) == EADDRINUSE && 206 --tport > IPPORT_RESERVED / 2) 207 sin->sin_port = htons(tport); 208 m_freem(m); 209 if (error) 210 goto bad; 211 } 212 213 /* 214 * Protocols that do not require connections may be optionally left 215 * unconnected for servers that reply from a port other than NFS_PORT. 216 */ 217 if (nmp->nm_flag & NFSMNT_NOCONN) { 218 if (nmp->nm_soflags & PR_CONNREQUIRED) { 219 error = ENOTCONN; 220 goto bad; 221 } 222 } else { 223 error = soconnect(so, nmp->nm_nam); 224 if (error) 225 goto bad; 226 227 /* 228 * Wait for the connection to complete. Cribbed from the 229 * connect system call but with the wait timing out so 230 * that interruptible mounts don't hang here for a long time. 231 */ 232 s = splnet(); 233 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 234 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 235 "nfscon", 2 * hz); 236 if ((so->so_state & SS_ISCONNECTING) && 237 so->so_error == 0 && rep && 238 (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 239 so->so_state &= ~SS_ISCONNECTING; 240 splx(s); 241 goto bad; 242 } 243 } 244 if (so->so_error) { 245 error = so->so_error; 246 so->so_error = 0; 247 splx(s); 248 goto bad; 249 } 250 splx(s); 251 } 252 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 253 so->so_rcv.sb_timeo = (5 * hz); 254 so->so_snd.sb_timeo = (5 * hz); 255 } else { 256 so->so_rcv.sb_timeo = 0; 257 so->so_snd.sb_timeo = 0; 258 } 259 if (nmp->nm_sotype == SOCK_DGRAM) { 260 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 261 rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 262 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 263 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 264 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 265 } else { 266 if (nmp->nm_sotype != SOCK_STREAM) 267 panic("nfscon sotype"); 268 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 269 MGET(m, M_WAIT, MT_SOOPTS); 270 *mtod(m, int *) = 1; 271 m->m_len = sizeof(int); 272 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 273 } 274 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 275 MGET(m, M_WAIT, MT_SOOPTS); 276 *mtod(m, int *) = 1; 277 m->m_len = sizeof(int); 278 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 279 } 280 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 281 * 2; 282 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 283 * 2; 284 } 285 error = soreserve(so, sndreserve, rcvreserve); 286 if (error) 287 goto bad; 288 so->so_rcv.sb_flags |= SB_NOINTR; 289 so->so_snd.sb_flags |= SB_NOINTR; 290 291 /* Initialize other non-zero congestion variables */ 292 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 293 nmp->nm_srtt[4] = (NFS_TIMEO << 3); 294 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 295 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 296 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 297 nmp->nm_sent = 0; 298 nmp->nm_timeouts = 0; 299 return (0); 300 301 bad: 302 nfs_disconnect(nmp); 303 return (error); 304 } 305 306 /* 307 * Reconnect routine: 308 * Called when a connection is broken on a reliable protocol. 309 * - clean up the old socket 310 * - nfs_connect() again 311 * - set R_MUSTRESEND for all outstanding requests on mount point 312 * If this fails the mount point is DEAD! 313 * nb: Must be called with the nfs_sndlock() set on the mount point. 314 */ 315 int 316 nfs_reconnect(rep) 317 register struct nfsreq *rep; 318 { 319 register struct nfsreq *rp; 320 register struct nfsmount *nmp = rep->r_nmp; 321 int error; 322 323 nfs_disconnect(nmp); 324 while ((error = nfs_connect(nmp, rep))) { 325 if (error == EINTR || error == ERESTART) 326 return (EINTR); 327 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 328 } 329 330 /* 331 * Loop through outstanding request list and fix up all requests 332 * on old socket. 333 */ 334 for (rp = nfs_reqq.tqh_first; rp != 0; rp = rp->r_chain.tqe_next) { 335 if (rp->r_nmp == nmp) 336 rp->r_flags |= R_MUSTRESEND; 337 } 338 return (0); 339 } 340 341 /* 342 * NFS disconnect. Clean up and unlink. 343 */ 344 void 345 nfs_disconnect(nmp) 346 register struct nfsmount *nmp; 347 { 348 register struct socket *so; 349 350 if (nmp->nm_so) { 351 so = nmp->nm_so; 352 nmp->nm_so = (struct socket *)0; 353 soshutdown(so, 2); 354 soclose(so); 355 } 356 } 357 358 /* 359 * This is the nfs send routine. For connection based socket types, it 360 * must be called with an nfs_sndlock() on the socket. 361 * "rep == NULL" indicates that it has been called from a server. 362 * For the client side: 363 * - return EINTR if the RPC is terminated, 0 otherwise 364 * - set R_MUSTRESEND if the send fails for any reason 365 * - do any cleanup required by recoverable socket errors (???) 366 * For the server side: 367 * - return EINTR or ERESTART if interrupted by a signal 368 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 369 * - do any cleanup required by recoverable socket errors (???) 370 */ 371 int 372 nfs_send(so, nam, top, rep) 373 register struct socket *so; 374 struct mbuf *nam; 375 register struct mbuf *top; 376 struct nfsreq *rep; 377 { 378 struct mbuf *sendnam; 379 int error, soflags, flags; 380 381 if (rep) { 382 if (rep->r_flags & R_SOFTTERM) { 383 m_freem(top); 384 return (EINTR); 385 } 386 if ((so = rep->r_nmp->nm_so) == NULL) { 387 rep->r_flags |= R_MUSTRESEND; 388 m_freem(top); 389 return (0); 390 } 391 rep->r_flags &= ~R_MUSTRESEND; 392 soflags = rep->r_nmp->nm_soflags; 393 } else 394 soflags = so->so_proto->pr_flags; 395 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 396 sendnam = (struct mbuf *)0; 397 else 398 sendnam = nam; 399 if (so->so_type == SOCK_SEQPACKET) 400 flags = MSG_EOR; 401 else 402 flags = 0; 403 404 error = sosend(so, sendnam, (struct uio *)0, top, 405 (struct mbuf *)0, flags); 406 if (error) { 407 if (rep) { 408 log(LOG_INFO, "nfs send error %d for server %s\n",error, 409 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 410 /* 411 * Deal with errors for the client side. 412 */ 413 if (rep->r_flags & R_SOFTTERM) 414 error = EINTR; 415 else 416 rep->r_flags |= R_MUSTRESEND; 417 } else 418 log(LOG_INFO, "nfsd send error %d\n", error); 419 420 /* 421 * Handle any recoverable (soft) socket errors here. (???) 422 */ 423 if (error != EINTR && error != ERESTART && 424 error != EWOULDBLOCK && error != EPIPE) 425 error = 0; 426 } 427 return (error); 428 } 429 430 /* 431 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 432 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 433 * Mark and consolidate the data into a new mbuf list. 434 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 435 * small mbufs. 436 * For SOCK_STREAM we must be very careful to read an entire record once 437 * we have read any of it, even if the system call has been interrupted. 438 */ 439 int 440 nfs_receive(rep, aname, mp) 441 register struct nfsreq *rep; 442 struct mbuf **aname; 443 struct mbuf **mp; 444 { 445 register struct socket *so; 446 struct uio auio; 447 struct iovec aio; 448 register struct mbuf *m; 449 struct mbuf *control; 450 u_long len; 451 struct mbuf **getnam; 452 int error, sotype, rcvflg; 453 struct proc *p = curproc; /* XXX */ 454 455 /* 456 * Set up arguments for soreceive() 457 */ 458 *mp = (struct mbuf *)0; 459 *aname = (struct mbuf *)0; 460 sotype = rep->r_nmp->nm_sotype; 461 462 /* 463 * For reliable protocols, lock against other senders/receivers 464 * in case a reconnect is necessary. 465 * For SOCK_STREAM, first get the Record Mark to find out how much 466 * more there is to get. 467 * We must lock the socket against other receivers 468 * until we have an entire rpc request/reply. 469 */ 470 if (sotype != SOCK_DGRAM) { 471 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 472 if (error) 473 return (error); 474 tryagain: 475 /* 476 * Check for fatal errors and resending request. 477 */ 478 /* 479 * Ugh: If a reconnect attempt just happened, nm_so 480 * would have changed. NULL indicates a failed 481 * attempt that has essentially shut down this 482 * mount point. 483 */ 484 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 485 nfs_sndunlock(&rep->r_nmp->nm_flag); 486 return (EINTR); 487 } 488 so = rep->r_nmp->nm_so; 489 if (!so) { 490 error = nfs_reconnect(rep); 491 if (error) { 492 nfs_sndunlock(&rep->r_nmp->nm_flag); 493 return (error); 494 } 495 goto tryagain; 496 } 497 while (rep->r_flags & R_MUSTRESEND) { 498 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 499 nfsstats.rpcretries++; 500 error = nfs_send(so, rep->r_nmp->nm_nam, m, rep); 501 if (error) { 502 if (error == EINTR || error == ERESTART || 503 (error = nfs_reconnect(rep))) { 504 nfs_sndunlock(&rep->r_nmp->nm_flag); 505 return (error); 506 } 507 goto tryagain; 508 } 509 } 510 nfs_sndunlock(&rep->r_nmp->nm_flag); 511 if (sotype == SOCK_STREAM) { 512 aio.iov_base = (caddr_t) &len; 513 aio.iov_len = sizeof(u_long); 514 auio.uio_iov = &aio; 515 auio.uio_iovcnt = 1; 516 auio.uio_segflg = UIO_SYSSPACE; 517 auio.uio_rw = UIO_READ; 518 auio.uio_offset = 0; 519 auio.uio_resid = sizeof(u_long); 520 auio.uio_procp = p; 521 do { 522 rcvflg = MSG_WAITALL; 523 error = soreceive(so, (struct mbuf **)0, &auio, 524 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 525 if (error == EWOULDBLOCK && rep) { 526 if (rep->r_flags & R_SOFTTERM) 527 return (EINTR); 528 } 529 } while (error == EWOULDBLOCK); 530 if (!error && auio.uio_resid > 0) { 531 log(LOG_INFO, 532 "short receive (%d/%d) from nfs server %s\n", 533 sizeof(u_long) - auio.uio_resid, 534 sizeof(u_long), 535 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 536 error = EPIPE; 537 } 538 if (error) 539 goto errout; 540 len = ntohl(len) & ~0x80000000; 541 /* 542 * This is SERIOUS! We are out of sync with the sender 543 * and forcing a disconnect/reconnect is all I can do. 544 */ 545 if (len > NFS_MAXPACKET) { 546 log(LOG_ERR, "%s (%d) from nfs server %s\n", 547 "impossible packet length", 548 len, 549 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 550 error = EFBIG; 551 goto errout; 552 } 553 auio.uio_resid = len; 554 do { 555 rcvflg = MSG_WAITALL; 556 error = soreceive(so, (struct mbuf **)0, 557 &auio, mp, (struct mbuf **)0, &rcvflg); 558 } while (error == EWOULDBLOCK || error == EINTR || 559 error == ERESTART); 560 if (!error && auio.uio_resid > 0) { 561 log(LOG_INFO, 562 "short receive (%d/%d) from nfs server %s\n", 563 len - auio.uio_resid, len, 564 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 565 error = EPIPE; 566 } 567 } else { 568 /* 569 * NB: Since uio_resid is big, MSG_WAITALL is ignored 570 * and soreceive() will return when it has either a 571 * control msg or a data msg. 572 * We have no use for control msg., but must grab them 573 * and then throw them away so we know what is going 574 * on. 575 */ 576 auio.uio_resid = len = 100000000; /* Anything Big */ 577 auio.uio_procp = p; 578 do { 579 rcvflg = 0; 580 error = soreceive(so, (struct mbuf **)0, 581 &auio, mp, &control, &rcvflg); 582 if (control) 583 m_freem(control); 584 if (error == EWOULDBLOCK && rep) { 585 if (rep->r_flags & R_SOFTTERM) 586 return (EINTR); 587 } 588 } while (error == EWOULDBLOCK || 589 (!error && *mp == NULL && control)); 590 if ((rcvflg & MSG_EOR) == 0) 591 printf("Egad!!\n"); 592 if (!error && *mp == NULL) 593 error = EPIPE; 594 len -= auio.uio_resid; 595 } 596 errout: 597 if (error && error != EINTR && error != ERESTART) { 598 m_freem(*mp); 599 *mp = (struct mbuf *)0; 600 if (error != EPIPE) 601 log(LOG_INFO, 602 "receive error %d from nfs server %s\n", 603 error, 604 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 605 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 606 if (!error) 607 error = nfs_reconnect(rep); 608 if (!error) 609 goto tryagain; 610 } 611 } else { 612 if ((so = rep->r_nmp->nm_so) == NULL) 613 return (EACCES); 614 if (so->so_state & SS_ISCONNECTED) 615 getnam = (struct mbuf **)0; 616 else 617 getnam = aname; 618 auio.uio_resid = len = 1000000; 619 auio.uio_procp = p; 620 do { 621 rcvflg = 0; 622 error = soreceive(so, getnam, &auio, mp, 623 (struct mbuf **)0, &rcvflg); 624 if (error == EWOULDBLOCK && 625 (rep->r_flags & R_SOFTTERM)) 626 return (EINTR); 627 } while (error == EWOULDBLOCK); 628 len -= auio.uio_resid; 629 } 630 if (error) { 631 m_freem(*mp); 632 *mp = (struct mbuf *)0; 633 } 634 /* 635 * Search for any mbufs that are not a multiple of 4 bytes long 636 * or with m_data not longword aligned. 637 * These could cause pointer alignment problems, so copy them to 638 * well aligned mbufs. 639 */ 640 nfs_realign(*mp, 5 * NFSX_UNSIGNED); 641 return (error); 642 } 643 644 /* 645 * Implement receipt of reply on a socket. 646 * We must search through the list of received datagrams matching them 647 * with outstanding requests using the xid, until ours is found. 648 */ 649 /* ARGSUSED */ 650 int 651 nfs_reply(myrep) 652 struct nfsreq *myrep; 653 { 654 register struct nfsreq *rep; 655 register struct nfsmount *nmp = myrep->r_nmp; 656 register long t1; 657 struct mbuf *mrep, *nam, *md; 658 u_long rxid, *tl; 659 caddr_t dpos, cp2; 660 int error; 661 662 /* 663 * Loop around until we get our own reply 664 */ 665 for (;;) { 666 /* 667 * Lock against other receivers so that I don't get stuck in 668 * sbwait() after someone else has received my reply for me. 669 * Also necessary for connection based protocols to avoid 670 * race conditions during a reconnect. 671 */ 672 error = nfs_rcvlock(myrep); 673 if (error) 674 return (error); 675 /* Already received, bye bye */ 676 if (myrep->r_mrep != NULL) { 677 nfs_rcvunlock(&nmp->nm_flag); 678 return (0); 679 } 680 /* 681 * Get the next Rpc reply off the socket 682 */ 683 error = nfs_receive(myrep, &nam, &mrep); 684 nfs_rcvunlock(&nmp->nm_flag); 685 if (error) { 686 687 /* 688 * Ignore routing errors on connectionless protocols?? 689 */ 690 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 691 nmp->nm_so->so_error = 0; 692 if (myrep->r_flags & R_GETONEREP) 693 return (0); 694 continue; 695 } 696 return (error); 697 } 698 if (nam) 699 m_freem(nam); 700 701 /* 702 * Get the xid and check that it is an rpc reply 703 */ 704 md = mrep; 705 dpos = mtod(md, caddr_t); 706 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 707 rxid = *tl++; 708 if (*tl != rpc_reply) { 709 if (nmp->nm_flag & NFSMNT_NQNFS) { 710 if (nqnfs_callback(nmp, mrep, md, dpos)) 711 nfsstats.rpcinvalid++; 712 } else { 713 nfsstats.rpcinvalid++; 714 m_freem(mrep); 715 } 716 nfsmout: 717 if (myrep->r_flags & R_GETONEREP) 718 return (0); 719 continue; 720 } 721 722 /* 723 * Loop through the request list to match up the reply 724 * Iff no match, just drop the datagram 725 */ 726 for (rep = nfs_reqq.tqh_first; rep != 0; 727 rep = rep->r_chain.tqe_next) { 728 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 729 /* Found it.. */ 730 rep->r_mrep = mrep; 731 rep->r_md = md; 732 rep->r_dpos = dpos; 733 if (nfsrtton) { 734 struct rttl *rt; 735 736 rt = &nfsrtt.rttl[nfsrtt.pos]; 737 rt->proc = rep->r_procnum; 738 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 739 rt->sent = nmp->nm_sent; 740 rt->cwnd = nmp->nm_cwnd; 741 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 742 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 743 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 744 rt->tstamp = time; 745 if (rep->r_flags & R_TIMING) 746 rt->rtt = rep->r_rtt; 747 else 748 rt->rtt = 1000000; 749 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 750 } 751 /* 752 * Update congestion window. 753 * Do the additive increase of 754 * one rpc/rtt. 755 */ 756 if (nmp->nm_cwnd <= nmp->nm_sent) { 757 nmp->nm_cwnd += 758 (NFS_CWNDSCALE * NFS_CWNDSCALE + 759 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 760 if (nmp->nm_cwnd > NFS_MAXCWND) 761 nmp->nm_cwnd = NFS_MAXCWND; 762 } 763 rep->r_flags &= ~R_SENT; 764 nmp->nm_sent -= NFS_CWNDSCALE; 765 /* 766 * Update rtt using a gain of 0.125 on the mean 767 * and a gain of 0.25 on the deviation. 768 */ 769 if (rep->r_flags & R_TIMING) { 770 /* 771 * Since the timer resolution of 772 * NFS_HZ is so course, it can often 773 * result in r_rtt == 0. Since 774 * r_rtt == N means that the actual 775 * rtt is between N+dt and N+2-dt ticks, 776 * add 1. 777 */ 778 t1 = rep->r_rtt + 1; 779 t1 -= (NFS_SRTT(rep) >> 3); 780 NFS_SRTT(rep) += t1; 781 if (t1 < 0) 782 t1 = -t1; 783 t1 -= (NFS_SDRTT(rep) >> 2); 784 NFS_SDRTT(rep) += t1; 785 } 786 nmp->nm_timeouts = 0; 787 break; 788 } 789 } 790 /* 791 * If not matched to a request, drop it. 792 * If it's mine, get out. 793 */ 794 if (rep == 0) { 795 nfsstats.rpcunexpected++; 796 m_freem(mrep); 797 } else if (rep == myrep) { 798 if (rep->r_mrep == NULL) 799 panic("nfsreply nil"); 800 return (0); 801 } 802 if (myrep->r_flags & R_GETONEREP) 803 return (0); 804 } 805 } 806 807 /* 808 * nfs_request - goes something like this 809 * - fill in request struct 810 * - links it into list 811 * - calls nfs_send() for first transmit 812 * - calls nfs_receive() to get reply 813 * - break down rpc header and return with nfs reply pointed to 814 * by mrep or error 815 * nb: always frees up mreq mbuf list 816 */ 817 int 818 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 819 struct vnode *vp; 820 struct mbuf *mrest; 821 int procnum; 822 struct proc *procp; 823 struct ucred *cred; 824 struct mbuf **mrp; 825 struct mbuf **mdp; 826 caddr_t *dposp; 827 { 828 register struct mbuf *m, *mrep; 829 register struct nfsreq *rep; 830 register u_long *tl; 831 register int i; 832 struct nfsmount *nmp; 833 struct mbuf *md, *mheadend; 834 struct nfsnode *np; 835 char nickv[RPCX_NICKVERF]; 836 time_t reqtime, waituntil; 837 caddr_t dpos, cp2; 838 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 839 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 840 int verf_len, verf_type; 841 u_long xid; 842 u_quad_t frev; 843 char *auth_str, *verf_str; 844 NFSKERBKEY_T key; /* save session key */ 845 846 nmp = VFSTONFS(vp->v_mount); 847 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 848 rep->r_nmp = nmp; 849 rep->r_vp = vp; 850 rep->r_procp = procp; 851 rep->r_procnum = procnum; 852 i = 0; 853 m = mrest; 854 while (m) { 855 i += m->m_len; 856 m = m->m_next; 857 } 858 mrest_len = i; 859 860 /* 861 * Get the RPC header with authorization. 862 */ 863 kerbauth: 864 verf_str = auth_str = (char *)0; 865 if (nmp->nm_flag & NFSMNT_KERB) { 866 verf_str = nickv; 867 verf_len = sizeof (nickv); 868 auth_type = RPCAUTH_KERB4; 869 bzero((caddr_t)key, sizeof (key)); 870 if (failed_auth || nfs_getnickauth(nmp, cred, &auth_str, 871 &auth_len, verf_str, verf_len)) { 872 error = nfs_getauth(nmp, rep, cred, &auth_str, 873 &auth_len, verf_str, &verf_len, key); 874 if (error) { 875 free((caddr_t)rep, M_NFSREQ); 876 m_freem(mrest); 877 return (error); 878 } 879 } 880 } else { 881 auth_type = RPCAUTH_UNIX; 882 if (cred->cr_ngroups < 1) 883 panic("nfsreq nogrps"); 884 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 885 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 886 5 * NFSX_UNSIGNED; 887 } 888 m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, 889 auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); 890 if (auth_str) 891 free(auth_str, M_TEMP); 892 893 /* 894 * For stream protocols, insert a Sun RPC Record Mark. 895 */ 896 if (nmp->nm_sotype == SOCK_STREAM) { 897 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 898 *mtod(m, u_long *) = htonl(0x80000000 | 899 (m->m_pkthdr.len - NFSX_UNSIGNED)); 900 } 901 rep->r_mreq = m; 902 rep->r_xid = xid; 903 tryagain: 904 if (nmp->nm_flag & NFSMNT_SOFT) 905 rep->r_retry = nmp->nm_retry; 906 else 907 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 908 rep->r_rtt = rep->r_rexmit = 0; 909 if (proct[procnum] > 0) 910 rep->r_flags = R_TIMING; 911 else 912 rep->r_flags = 0; 913 rep->r_mrep = NULL; 914 915 /* 916 * Do the client side RPC. 917 */ 918 nfsstats.rpcrequests++; 919 /* 920 * Chain request into list of outstanding requests. Be sure 921 * to put it LAST so timer finds oldest requests first. 922 */ 923 s = splsoftclock(); 924 TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain); 925 926 /* Get send time for nqnfs */ 927 reqtime = time.tv_sec; 928 929 /* 930 * If backing off another request or avoiding congestion, don't 931 * send this one now but let timer do it. If not timing a request, 932 * do it now. 933 */ 934 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 935 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 936 nmp->nm_sent < nmp->nm_cwnd)) { 937 splx(s); 938 if (nmp->nm_soflags & PR_CONNREQUIRED) 939 error = nfs_sndlock(&nmp->nm_flag, rep); 940 if (!error) { 941 m = m_copym(m, 0, M_COPYALL, M_WAIT); 942 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 943 if (nmp->nm_soflags & PR_CONNREQUIRED) 944 nfs_sndunlock(&nmp->nm_flag); 945 } 946 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 947 nmp->nm_sent += NFS_CWNDSCALE; 948 rep->r_flags |= R_SENT; 949 } 950 } else { 951 splx(s); 952 rep->r_rtt = -1; 953 } 954 955 /* 956 * Wait for the reply from our send or the timer's. 957 */ 958 if (!error || error == EPIPE) 959 error = nfs_reply(rep); 960 961 /* 962 * RPC done, unlink the request. 963 */ 964 s = splsoftclock(); 965 TAILQ_REMOVE(&nfs_reqq, rep, r_chain); 966 splx(s); 967 968 /* 969 * Decrement the outstanding request count. 970 */ 971 if (rep->r_flags & R_SENT) { 972 rep->r_flags &= ~R_SENT; /* paranoia */ 973 nmp->nm_sent -= NFS_CWNDSCALE; 974 } 975 976 /* 977 * If there was a successful reply and a tprintf msg. 978 * tprintf a response. 979 */ 980 if (!error && (rep->r_flags & R_TPRINTFMSG)) 981 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 982 "is alive again"); 983 mrep = rep->r_mrep; 984 md = rep->r_md; 985 dpos = rep->r_dpos; 986 if (error) { 987 m_freem(rep->r_mreq); 988 free((caddr_t)rep, M_NFSREQ); 989 return (error); 990 } 991 992 /* 993 * break down the rpc header and check if ok 994 */ 995 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); 996 if (*tl++ == rpc_msgdenied) { 997 if (*tl == rpc_mismatch) 998 error = EOPNOTSUPP; 999 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 1000 if (!failed_auth) { 1001 failed_auth++; 1002 mheadend->m_next = (struct mbuf *)0; 1003 m_freem(mrep); 1004 m_freem(rep->r_mreq); 1005 goto kerbauth; 1006 } else 1007 error = EAUTH; 1008 } else 1009 error = EACCES; 1010 m_freem(mrep); 1011 m_freem(rep->r_mreq); 1012 free((caddr_t)rep, M_NFSREQ); 1013 return (error); 1014 } 1015 1016 /* 1017 * Grab any Kerberos verifier, otherwise just throw it away. 1018 */ 1019 verf_type = fxdr_unsigned(int, *tl++); 1020 i = fxdr_unsigned(int, *tl); 1021 if ((nmp->nm_flag & NFSMNT_KERB) && verf_type == RPCAUTH_KERB4) { 1022 error = nfs_savenickauth(nmp, cred, i, key, &md, &dpos, mrep); 1023 if (error) 1024 goto nfsmout; 1025 } else if (i > 0) 1026 nfsm_adv(nfsm_rndup(i)); 1027 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1028 /* 0 == ok */ 1029 if (*tl == 0) { 1030 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1031 if (*tl != 0) { 1032 error = fxdr_unsigned(int, *tl); 1033 if ((nmp->nm_flag & NFSMNT_NFSV3) && 1034 error == NFSERR_TRYLATER) { 1035 m_freem(mrep); 1036 error = 0; 1037 waituntil = time.tv_sec + trylater_delay; 1038 while (time.tv_sec < waituntil) 1039 (void) tsleep((caddr_t)&lbolt, 1040 PSOCK, "nqnfstry", 0); 1041 trylater_delay *= nfs_backoff[trylater_cnt]; 1042 if (trylater_cnt < 7) 1043 trylater_cnt++; 1044 goto tryagain; 1045 } 1046 1047 /* 1048 * If the File Handle was stale, invalidate the 1049 * lookup cache, just in case. 1050 */ 1051 if (error == ESTALE) 1052 cache_purge(vp); 1053 if (nmp->nm_flag & NFSMNT_NFSV3) { 1054 *mrp = mrep; 1055 *mdp = md; 1056 *dposp = dpos; 1057 error |= NFSERR_RETERR; 1058 } else 1059 m_freem(mrep); 1060 m_freem(rep->r_mreq); 1061 free((caddr_t)rep, M_NFSREQ); 1062 return (error); 1063 } 1064 1065 /* 1066 * For nqnfs, get any lease in reply 1067 */ 1068 if (nmp->nm_flag & NFSMNT_NQNFS) { 1069 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1070 if (*tl) { 1071 np = VTONFS(vp); 1072 nqlflag = fxdr_unsigned(int, *tl); 1073 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 1074 cachable = fxdr_unsigned(int, *tl++); 1075 reqtime += fxdr_unsigned(int, *tl++); 1076 if (reqtime > time.tv_sec) { 1077 fxdr_hyper(tl, &frev); 1078 nqnfs_clientlease(nmp, np, nqlflag, 1079 cachable, reqtime, frev); 1080 } 1081 } 1082 } 1083 *mrp = mrep; 1084 *mdp = md; 1085 *dposp = dpos; 1086 m_freem(rep->r_mreq); 1087 FREE((caddr_t)rep, M_NFSREQ); 1088 return (0); 1089 } 1090 m_freem(mrep); 1091 error = EPROTONOSUPPORT; 1092 nfsmout: 1093 m_freem(rep->r_mreq); 1094 free((caddr_t)rep, M_NFSREQ); 1095 return (error); 1096 } 1097 1098 /* 1099 * Generate the rpc reply header 1100 * siz arg. is used to decide if adding a cluster is worthwhile 1101 */ 1102 int 1103 nfs_rephead(siz, nd, slp, err, cache, frev, mrq, mbp, bposp) 1104 int siz; 1105 struct nfsrv_descript *nd; 1106 struct nfssvc_sock *slp; 1107 int err; 1108 int cache; 1109 u_quad_t *frev; 1110 struct mbuf **mrq; 1111 struct mbuf **mbp; 1112 caddr_t *bposp; 1113 { 1114 register u_long *tl; 1115 register struct mbuf *mreq; 1116 caddr_t bpos; 1117 struct mbuf *mb, *mb2; 1118 1119 MGETHDR(mreq, M_WAIT, MT_DATA); 1120 mb = mreq; 1121 /* 1122 * If this is a big reply, use a cluster else 1123 * try and leave leading space for the lower level headers. 1124 */ 1125 siz += RPC_REPLYSIZ; 1126 if (siz >= MINCLSIZE) { 1127 MCLGET(mreq, M_WAIT); 1128 } else 1129 mreq->m_data += max_hdr; 1130 tl = mtod(mreq, u_long *); 1131 mreq->m_len = 6 * NFSX_UNSIGNED; 1132 bpos = ((caddr_t)tl) + mreq->m_len; 1133 *tl++ = txdr_unsigned(nd->nd_retxid); 1134 *tl++ = rpc_reply; 1135 if (err == ERPCMISMATCH || (err & NFSERR_AUTHERR)) { 1136 *tl++ = rpc_msgdenied; 1137 if (err & NFSERR_AUTHERR) { 1138 *tl++ = rpc_autherr; 1139 *tl = txdr_unsigned(err & ~NFSERR_AUTHERR); 1140 mreq->m_len -= NFSX_UNSIGNED; 1141 bpos -= NFSX_UNSIGNED; 1142 } else { 1143 *tl++ = rpc_mismatch; 1144 *tl++ = txdr_unsigned(RPC_VER2); 1145 *tl = txdr_unsigned(RPC_VER2); 1146 } 1147 } else { 1148 *tl++ = rpc_msgaccepted; 1149 1150 /* 1151 * For Kerberos authentication, we must send the nickname 1152 * verifier back, otherwise just RPCAUTH_NULL. 1153 */ 1154 if (nd->nd_flag & ND_KERBFULL) { 1155 register struct nfsuid *nuidp; 1156 struct timeval ktvin, ktvout; 1157 NFSKERBKEYSCHED_T keys; /* stores key schedule */ 1158 1159 for (nuidp = NUIDHASH(slp, nd->nd_cr.cr_uid)->lh_first; 1160 nuidp != 0; nuidp = nuidp->nu_hash.le_next) { 1161 if (nuidp->nu_cr.cr_uid == nd->nd_cr.cr_uid && 1162 (!nd->nd_nam2 || netaddr_match(NU_NETFAM(nuidp), 1163 &nuidp->nu_haddr, nd->nd_nam2))) 1164 break; 1165 } 1166 if (nuidp) { 1167 ktvin.tv_sec = 1168 txdr_unsigned(nuidp->nu_timestamp.tv_sec - 1); 1169 ktvin.tv_usec = 1170 txdr_unsigned(nuidp->nu_timestamp.tv_usec); 1171 1172 /* 1173 * Encrypt the timestamp in ecb mode using the 1174 * session key. 1175 */ 1176 #ifdef NFSKERB 1177 XXX 1178 #endif 1179 1180 *tl++ = rpc_auth_kerb; 1181 *tl++ = txdr_unsigned(3 * NFSX_UNSIGNED); 1182 *tl = ktvout.tv_sec; 1183 nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); 1184 *tl++ = ktvout.tv_usec; 1185 *tl++ = txdr_unsigned(nuidp->nu_cr.cr_uid); 1186 } else { 1187 *tl++ = 0; 1188 *tl++ = 0; 1189 } 1190 } else { 1191 *tl++ = 0; 1192 *tl++ = 0; 1193 } 1194 switch (err) { 1195 case EPROGUNAVAIL: 1196 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1197 break; 1198 case EPROGMISMATCH: 1199 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1200 nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); 1201 if (nd->nd_flag & ND_NQNFS) { 1202 *tl++ = txdr_unsigned(3); 1203 *tl = txdr_unsigned(3); 1204 } else { 1205 *tl++ = txdr_unsigned(2); 1206 *tl = txdr_unsigned(3); 1207 } 1208 break; 1209 case EPROCUNAVAIL: 1210 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1211 break; 1212 case EBADRPC: 1213 *tl = txdr_unsigned(RPC_GARBAGE); 1214 break; 1215 default: 1216 *tl = 0; 1217 if (err != NFSERR_RETVOID) { 1218 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1219 if (err) 1220 *tl = txdr_unsigned(nfsrv_errmap(nd, err)); 1221 else 1222 *tl = 0; 1223 } 1224 break; 1225 }; 1226 } 1227 1228 /* 1229 * For nqnfs, piggyback lease as requested. 1230 */ 1231 if ((nd->nd_flag & ND_NQNFS) && err == 0) { 1232 if (nd->nd_flag & ND_LEASE) { 1233 nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); 1234 *tl++ = txdr_unsigned(nd->nd_flag & ND_LEASE); 1235 *tl++ = txdr_unsigned(cache); 1236 *tl++ = txdr_unsigned(nd->nd_duration); 1237 txdr_hyper(frev, tl); 1238 } else { 1239 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1240 *tl = 0; 1241 } 1242 } 1243 *mrq = mreq; 1244 *mbp = mb; 1245 *bposp = bpos; 1246 if (err != 0 && err != NFSERR_RETVOID) 1247 nfsstats.srvrpc_errs++; 1248 return (0); 1249 } 1250 1251 /* 1252 * Nfs timer routine 1253 * Scan the nfsreq list and retranmit any requests that have timed out 1254 * To avoid retransmission attempts on STREAM sockets (in the future) make 1255 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1256 */ 1257 void 1258 nfs_timer(arg) 1259 void *arg; /* never used */ 1260 { 1261 register struct nfsreq *rep; 1262 register struct mbuf *m; 1263 register struct socket *so; 1264 register struct nfsmount *nmp; 1265 register int timeo; 1266 register struct nfssvc_sock *slp; 1267 static long lasttime = 0; 1268 int s, error; 1269 u_quad_t cur_usec; 1270 1271 s = splnet(); 1272 for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { 1273 nmp = rep->r_nmp; 1274 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1275 continue; 1276 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1277 rep->r_flags |= R_SOFTTERM; 1278 continue; 1279 } 1280 if (rep->r_rtt >= 0) { 1281 rep->r_rtt++; 1282 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1283 timeo = nmp->nm_timeo; 1284 else 1285 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1286 if (nmp->nm_timeouts > 0) 1287 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1288 if (rep->r_rtt <= timeo) 1289 continue; 1290 if (nmp->nm_timeouts < 8) 1291 nmp->nm_timeouts++; 1292 } 1293 /* 1294 * Check for server not responding 1295 */ 1296 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1297 rep->r_rexmit > nmp->nm_deadthresh) { 1298 nfs_msg(rep->r_procp, 1299 nmp->nm_mountp->mnt_stat.f_mntfromname, 1300 "not responding"); 1301 rep->r_flags |= R_TPRINTFMSG; 1302 } 1303 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1304 nfsstats.rpctimeouts++; 1305 rep->r_flags |= R_SOFTTERM; 1306 continue; 1307 } 1308 if (nmp->nm_sotype != SOCK_DGRAM) { 1309 if (++rep->r_rexmit > NFS_MAXREXMIT) 1310 rep->r_rexmit = NFS_MAXREXMIT; 1311 continue; 1312 } 1313 if ((so = nmp->nm_so) == NULL) 1314 continue; 1315 1316 /* 1317 * If there is enough space and the window allows.. 1318 * Resend it 1319 * Set r_rtt to -1 in case we fail to send it now. 1320 */ 1321 rep->r_rtt = -1; 1322 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1323 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1324 (rep->r_flags & R_SENT) || 1325 nmp->nm_sent < nmp->nm_cwnd) && 1326 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1327 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1328 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1329 (struct mbuf *)0, (struct mbuf *)0); 1330 else 1331 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1332 nmp->nm_nam, (struct mbuf *)0); 1333 if (error) { 1334 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1335 so->so_error = 0; 1336 } else { 1337 /* 1338 * Iff first send, start timing 1339 * else turn timing off, backoff timer 1340 * and divide congestion window by 2. 1341 */ 1342 if (rep->r_flags & R_SENT) { 1343 rep->r_flags &= ~R_TIMING; 1344 if (++rep->r_rexmit > NFS_MAXREXMIT) 1345 rep->r_rexmit = NFS_MAXREXMIT; 1346 nmp->nm_cwnd >>= 1; 1347 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1348 nmp->nm_cwnd = NFS_CWNDSCALE; 1349 nfsstats.rpcretries++; 1350 } else { 1351 rep->r_flags |= R_SENT; 1352 nmp->nm_sent += NFS_CWNDSCALE; 1353 } 1354 rep->r_rtt = 0; 1355 } 1356 } 1357 } 1358 1359 /* 1360 * Call the nqnfs server timer once a second to handle leases. 1361 */ 1362 if (lasttime != time.tv_sec) { 1363 lasttime = time.tv_sec; 1364 nqnfs_serverd(); 1365 } 1366 1367 /* 1368 * Scan the write gathering queues for writes that need to be 1369 * completed now. 1370 */ 1371 cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; 1372 for (slp = nfssvc_sockhead.tqh_first; slp != 0; 1373 slp = slp->ns_chain.tqe_next) { 1374 if (slp->ns_tq.lh_first && slp->ns_tq.lh_first->nd_time<=cur_usec) 1375 nfsrv_wakenfsd(slp); 1376 } 1377 splx(s); 1378 timeout(nfs_timer, (void *)0, nfs_ticks); 1379 } 1380 1381 /* 1382 * Test for a termination condition pending on the process. 1383 * This is used for NFSMNT_INT mounts. 1384 */ 1385 int 1386 nfs_sigintr(nmp, rep, p) 1387 struct nfsmount *nmp; 1388 struct nfsreq *rep; 1389 register struct proc *p; 1390 { 1391 1392 if (rep && (rep->r_flags & R_SOFTTERM)) 1393 return (EINTR); 1394 if (!(nmp->nm_flag & NFSMNT_INT)) 1395 return (0); 1396 if (p && p->p_siglist && 1397 (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) & 1398 NFSINT_SIGMASK)) 1399 return (EINTR); 1400 return (0); 1401 } 1402 1403 /* 1404 * Lock a socket against others. 1405 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1406 * and also to avoid race conditions between the processes with nfs requests 1407 * in progress when a reconnect is necessary. 1408 */ 1409 int 1410 nfs_sndlock(flagp, rep) 1411 register int *flagp; 1412 struct nfsreq *rep; 1413 { 1414 struct proc *p; 1415 int slpflag = 0, slptimeo = 0; 1416 1417 if (rep) { 1418 p = rep->r_procp; 1419 if (rep->r_nmp->nm_flag & NFSMNT_INT) 1420 slpflag = PCATCH; 1421 } else 1422 p = (struct proc *)0; 1423 while (*flagp & NFSMNT_SNDLOCK) { 1424 if (nfs_sigintr(rep->r_nmp, rep, p)) 1425 return (EINTR); 1426 *flagp |= NFSMNT_WANTSND; 1427 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck", 1428 slptimeo); 1429 if (slpflag == PCATCH) { 1430 slpflag = 0; 1431 slptimeo = 2 * hz; 1432 } 1433 } 1434 *flagp |= NFSMNT_SNDLOCK; 1435 return (0); 1436 } 1437 1438 /* 1439 * Unlock the stream socket for others. 1440 */ 1441 void 1442 nfs_sndunlock(flagp) 1443 register int *flagp; 1444 { 1445 1446 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1447 panic("nfs sndunlock"); 1448 *flagp &= ~NFSMNT_SNDLOCK; 1449 if (*flagp & NFSMNT_WANTSND) { 1450 *flagp &= ~NFSMNT_WANTSND; 1451 wakeup((caddr_t)flagp); 1452 } 1453 } 1454 1455 int 1456 nfs_rcvlock(rep) 1457 register struct nfsreq *rep; 1458 { 1459 register int *flagp = &rep->r_nmp->nm_flag; 1460 int slpflag, slptimeo = 0; 1461 1462 if (*flagp & NFSMNT_INT) 1463 slpflag = PCATCH; 1464 else 1465 slpflag = 0; 1466 while (*flagp & NFSMNT_RCVLOCK) { 1467 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 1468 return (EINTR); 1469 *flagp |= NFSMNT_WANTRCV; 1470 (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", 1471 slptimeo); 1472 if (slpflag == PCATCH) { 1473 slpflag = 0; 1474 slptimeo = 2 * hz; 1475 } 1476 } 1477 *flagp |= NFSMNT_RCVLOCK; 1478 return (0); 1479 } 1480 1481 /* 1482 * Unlock the stream socket for others. 1483 */ 1484 void 1485 nfs_rcvunlock(flagp) 1486 register int *flagp; 1487 { 1488 1489 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1490 panic("nfs rcvunlock"); 1491 *flagp &= ~NFSMNT_RCVLOCK; 1492 if (*flagp & NFSMNT_WANTRCV) { 1493 *flagp &= ~NFSMNT_WANTRCV; 1494 wakeup((caddr_t)flagp); 1495 } 1496 } 1497 1498 /* 1499 * Check for badly aligned mbuf data areas and 1500 * realign data in an mbuf list by copying the data areas up, as required. 1501 */ 1502 void 1503 nfs_realign(m, hsiz) 1504 register struct mbuf *m; 1505 int hsiz; 1506 { 1507 register struct mbuf *m2; 1508 register int siz, mlen, olen; 1509 register caddr_t tcp, fcp; 1510 struct mbuf *mnew; 1511 1512 while (m) { 1513 /* 1514 * This never happens for UDP, rarely happens for TCP 1515 * but frequently happens for iso transport. 1516 */ 1517 if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 1518 olen = m->m_len; 1519 fcp = mtod(m, caddr_t); 1520 if ((int)fcp & 0x3) { 1521 m->m_flags &= ~M_PKTHDR; 1522 if (m->m_flags & M_EXT) 1523 m->m_data = m->m_ext.ext_buf + 1524 ((m->m_ext.ext_size - olen) & ~0x3); 1525 else 1526 m->m_data = m->m_dat; 1527 } 1528 m->m_len = 0; 1529 tcp = mtod(m, caddr_t); 1530 mnew = m; 1531 m2 = m->m_next; 1532 1533 /* 1534 * If possible, only put the first invariant part 1535 * of the RPC header in the first mbuf. 1536 */ 1537 mlen = M_TRAILINGSPACE(m); 1538 if (olen <= hsiz && mlen > hsiz) 1539 mlen = hsiz; 1540 1541 /* 1542 * Loop through the mbuf list consolidating data. 1543 */ 1544 while (m) { 1545 while (olen > 0) { 1546 if (mlen == 0) { 1547 m2->m_flags &= ~M_PKTHDR; 1548 if (m2->m_flags & M_EXT) 1549 m2->m_data = m2->m_ext.ext_buf; 1550 else 1551 m2->m_data = m2->m_dat; 1552 m2->m_len = 0; 1553 mlen = M_TRAILINGSPACE(m2); 1554 tcp = mtod(m2, caddr_t); 1555 mnew = m2; 1556 m2 = m2->m_next; 1557 } 1558 siz = min(mlen, olen); 1559 if (tcp != fcp) 1560 bcopy(fcp, tcp, siz); 1561 mnew->m_len += siz; 1562 mlen -= siz; 1563 olen -= siz; 1564 tcp += siz; 1565 fcp += siz; 1566 } 1567 m = m->m_next; 1568 if (m) { 1569 olen = m->m_len; 1570 fcp = mtod(m, caddr_t); 1571 } 1572 } 1573 1574 /* 1575 * Finally, set m_len == 0 for any trailing mbufs that have 1576 * been copied out of. 1577 */ 1578 while (m2) { 1579 m2->m_len = 0; 1580 m2 = m2->m_next; 1581 } 1582 return; 1583 } 1584 m = m->m_next; 1585 } 1586 } 1587 1588 /* 1589 * Socket upcall routine for the nfsd sockets. 1590 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 1591 * Essentially do as much as possible non-blocking, else punt and it will 1592 * be called with M_WAIT from an nfsd. 1593 */ 1594 void 1595 nfsrv_rcv(so, arg, waitflag) 1596 struct socket *so; 1597 caddr_t arg; 1598 int waitflag; 1599 { 1600 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 1601 register struct mbuf *m; 1602 struct mbuf *mp, *nam; 1603 struct uio auio; 1604 int flags, error; 1605 1606 if ((slp->ns_flag & SLP_VALID) == 0) 1607 return; 1608 #ifdef notdef 1609 /* 1610 * Define this to test for nfsds handling this under heavy load. 1611 */ 1612 if (waitflag == M_DONTWAIT) { 1613 slp->ns_flag |= SLP_NEEDQ; goto dorecs; 1614 } 1615 #endif 1616 auio.uio_procp = NULL; 1617 if (so->so_type == SOCK_STREAM) { 1618 /* 1619 * If there are already records on the queue, defer soreceive() 1620 * to an nfsd so that there is feedback to the TCP layer that 1621 * the nfs servers are heavily loaded. 1622 */ 1623 if (slp->ns_rec && waitflag == M_DONTWAIT) { 1624 slp->ns_flag |= SLP_NEEDQ; 1625 goto dorecs; 1626 } 1627 1628 /* 1629 * Do soreceive(). 1630 */ 1631 auio.uio_resid = 1000000000; 1632 flags = MSG_DONTWAIT; 1633 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 1634 if (error || mp == (struct mbuf *)0) { 1635 if (error == EWOULDBLOCK) 1636 slp->ns_flag |= SLP_NEEDQ; 1637 else 1638 slp->ns_flag |= SLP_DISCONN; 1639 goto dorecs; 1640 } 1641 m = mp; 1642 if (slp->ns_rawend) { 1643 slp->ns_rawend->m_next = m; 1644 slp->ns_cc += 1000000000 - auio.uio_resid; 1645 } else { 1646 slp->ns_raw = m; 1647 slp->ns_cc = 1000000000 - auio.uio_resid; 1648 } 1649 while (m->m_next) 1650 m = m->m_next; 1651 slp->ns_rawend = m; 1652 1653 /* 1654 * Now try and parse record(s) out of the raw stream data. 1655 */ 1656 error = nfsrv_getstream(slp, waitflag); 1657 if (error) { 1658 if (error == EPERM) 1659 slp->ns_flag |= SLP_DISCONN; 1660 else 1661 slp->ns_flag |= SLP_NEEDQ; 1662 } 1663 } else { 1664 do { 1665 auio.uio_resid = 1000000000; 1666 flags = MSG_DONTWAIT; 1667 error = soreceive(so, &nam, &auio, &mp, 1668 (struct mbuf **)0, &flags); 1669 if (mp) { 1670 nfs_realign(mp, 10 * NFSX_UNSIGNED); 1671 if (nam) { 1672 m = nam; 1673 m->m_next = mp; 1674 } else 1675 m = mp; 1676 if (slp->ns_recend) 1677 slp->ns_recend->m_nextpkt = m; 1678 else 1679 slp->ns_rec = m; 1680 slp->ns_recend = m; 1681 m->m_nextpkt = (struct mbuf *)0; 1682 } 1683 if (error) { 1684 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 1685 && error != EWOULDBLOCK) { 1686 slp->ns_flag |= SLP_DISCONN; 1687 goto dorecs; 1688 } 1689 } 1690 } while (mp); 1691 } 1692 1693 /* 1694 * Now try and process the request records, non-blocking. 1695 */ 1696 dorecs: 1697 if (waitflag == M_DONTWAIT && 1698 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 1699 nfsrv_wakenfsd(slp); 1700 } 1701 1702 /* 1703 * Try and extract an RPC request from the mbuf data list received on a 1704 * stream socket. The "waitflag" argument indicates whether or not it 1705 * can sleep. 1706 */ 1707 int 1708 nfsrv_getstream(slp, waitflag) 1709 register struct nfssvc_sock *slp; 1710 int waitflag; 1711 { 1712 register struct mbuf *m, **mpp; 1713 register char *cp1, *cp2; 1714 register int len; 1715 struct mbuf *om, *m2, *recm = 0; 1716 u_long recmark; 1717 1718 if (slp->ns_flag & SLP_GETSTREAM) 1719 panic("nfs getstream"); 1720 slp->ns_flag |= SLP_GETSTREAM; 1721 for (;;) { 1722 if (slp->ns_reclen == 0) { 1723 if (slp->ns_cc < NFSX_UNSIGNED) { 1724 slp->ns_flag &= ~SLP_GETSTREAM; 1725 return (0); 1726 } 1727 m = slp->ns_raw; 1728 if (m->m_len >= NFSX_UNSIGNED) { 1729 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 1730 m->m_data += NFSX_UNSIGNED; 1731 m->m_len -= NFSX_UNSIGNED; 1732 } else { 1733 cp1 = (caddr_t)&recmark; 1734 cp2 = mtod(m, caddr_t); 1735 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 1736 while (m->m_len == 0) { 1737 m = m->m_next; 1738 cp2 = mtod(m, caddr_t); 1739 } 1740 *cp1++ = *cp2++; 1741 m->m_data++; 1742 m->m_len--; 1743 } 1744 } 1745 slp->ns_cc -= NFSX_UNSIGNED; 1746 recmark = ntohl(recmark); 1747 slp->ns_reclen = recmark & ~0x80000000; 1748 if (recmark & 0x80000000) 1749 slp->ns_flag |= SLP_LASTFRAG; 1750 else 1751 slp->ns_flag &= ~SLP_LASTFRAG; 1752 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 1753 slp->ns_flag &= ~SLP_GETSTREAM; 1754 return (EPERM); 1755 } 1756 } 1757 1758 /* 1759 * Now get the record part. 1760 */ 1761 if (slp->ns_cc == slp->ns_reclen) { 1762 recm = slp->ns_raw; 1763 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 1764 slp->ns_cc = slp->ns_reclen = 0; 1765 } else if (slp->ns_cc > slp->ns_reclen) { 1766 len = 0; 1767 m = slp->ns_raw; 1768 om = (struct mbuf *)0; 1769 while (len < slp->ns_reclen) { 1770 if ((len + m->m_len) > slp->ns_reclen) { 1771 m2 = m_copym(m, 0, slp->ns_reclen - len, 1772 waitflag); 1773 if (m2) { 1774 if (om) { 1775 om->m_next = m2; 1776 recm = slp->ns_raw; 1777 } else 1778 recm = m2; 1779 m->m_data += slp->ns_reclen - len; 1780 m->m_len -= slp->ns_reclen - len; 1781 len = slp->ns_reclen; 1782 } else { 1783 slp->ns_flag &= ~SLP_GETSTREAM; 1784 return (EWOULDBLOCK); 1785 } 1786 } else if ((len + m->m_len) == slp->ns_reclen) { 1787 om = m; 1788 len += m->m_len; 1789 m = m->m_next; 1790 recm = slp->ns_raw; 1791 om->m_next = (struct mbuf *)0; 1792 } else { 1793 om = m; 1794 len += m->m_len; 1795 m = m->m_next; 1796 } 1797 } 1798 slp->ns_raw = m; 1799 slp->ns_cc -= len; 1800 slp->ns_reclen = 0; 1801 } else { 1802 slp->ns_flag &= ~SLP_GETSTREAM; 1803 return (0); 1804 } 1805 1806 /* 1807 * Accumulate the fragments into a record. 1808 */ 1809 mpp = &slp->ns_frag; 1810 while (*mpp) 1811 mpp = &((*mpp)->m_next); 1812 *mpp = recm; 1813 if (slp->ns_flag & SLP_LASTFRAG) { 1814 nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED); 1815 if (slp->ns_recend) 1816 slp->ns_recend->m_nextpkt = slp->ns_frag; 1817 else 1818 slp->ns_rec = slp->ns_frag; 1819 slp->ns_recend = slp->ns_frag; 1820 slp->ns_frag = (struct mbuf *)0; 1821 } 1822 } 1823 } 1824 1825 /* 1826 * Parse an RPC header. 1827 */ 1828 int 1829 nfsrv_dorec(slp, nfsd, ndp) 1830 register struct nfssvc_sock *slp; 1831 struct nfsd *nfsd; 1832 struct nfsrv_descript **ndp; 1833 { 1834 register struct mbuf *m, *nam; 1835 register struct nfsrv_descript *nd; 1836 int error; 1837 1838 *ndp = NULL; 1839 if ((slp->ns_flag & SLP_VALID) == 0 || 1840 (m = slp->ns_rec) == (struct mbuf *)0) 1841 return (ENOBUFS); 1842 slp->ns_rec = m->m_nextpkt; 1843 if (slp->ns_rec) 1844 m->m_nextpkt = (struct mbuf *)0; 1845 else 1846 slp->ns_recend = (struct mbuf *)0; 1847 if (m->m_type == MT_SONAME) { 1848 nam = m; 1849 m = m->m_next; 1850 nam->m_next = NULL; 1851 } else 1852 nam = NULL; 1853 MALLOC(nd, struct nfsrv_descript *, sizeof (struct nfsrv_descript), 1854 M_NFSRVDESC, M_WAITOK); 1855 nd->nd_md = nd->nd_mrep = m; 1856 nd->nd_nam2 = nam; 1857 nd->nd_dpos = mtod(m, caddr_t); 1858 error = nfs_getreq(nd, nfsd, TRUE); 1859 if (error) { 1860 m_freem(nam); 1861 free((caddr_t)nd, M_NFSRVDESC); 1862 return (error); 1863 } 1864 *ndp = nd; 1865 nfsd->nfsd_nd = nd; 1866 return (0); 1867 } 1868 1869 /* 1870 * Parse an RPC request 1871 * - verify it 1872 * - fill in the cred struct. 1873 */ 1874 int 1875 nfs_getreq(nd, nfsd, has_header) 1876 register struct nfsrv_descript *nd; 1877 struct nfsd *nfsd; 1878 int has_header; 1879 { 1880 register int len, i; 1881 register u_long *tl; 1882 register long t1; 1883 struct uio uio; 1884 struct iovec iov; 1885 caddr_t dpos, cp2, cp; 1886 u_long nfsvers, auth_type; 1887 uid_t nickuid; 1888 int error = 0, nqnfs = 0, ticklen; 1889 struct mbuf *mrep, *md; 1890 register struct nfsuid *nuidp; 1891 struct timeval tvin, tvout; 1892 NFSKERBKEYSCHED_T keys; /* stores key schedule */ 1893 1894 mrep = nd->nd_mrep; 1895 md = nd->nd_md; 1896 dpos = nd->nd_dpos; 1897 if (has_header) { 1898 nfsm_dissect(tl, u_long *, 10 * NFSX_UNSIGNED); 1899 nd->nd_retxid = fxdr_unsigned(u_long, *tl++); 1900 if (*tl++ != rpc_call) { 1901 m_freem(mrep); 1902 return (EBADRPC); 1903 } 1904 } else 1905 nfsm_dissect(tl, u_long *, 8 * NFSX_UNSIGNED); 1906 nd->nd_repstat = 0; 1907 nd->nd_flag = 0; 1908 if (*tl++ != rpc_vers) { 1909 nd->nd_repstat = ERPCMISMATCH; 1910 nd->nd_procnum = NFSPROC_NOOP; 1911 return (0); 1912 } 1913 if (*tl != nfs_prog) { 1914 if (*tl == nqnfs_prog) 1915 nqnfs++; 1916 else { 1917 nd->nd_repstat = EPROGUNAVAIL; 1918 nd->nd_procnum = NFSPROC_NOOP; 1919 return (0); 1920 } 1921 } 1922 tl++; 1923 nfsvers = fxdr_unsigned(u_long, *tl++); 1924 if (((nfsvers < NFS_VER2 || nfsvers > NFS_VER3) && !nqnfs) || 1925 (nfsvers != NQNFS_VER3 && nqnfs)) { 1926 nd->nd_repstat = EPROGMISMATCH; 1927 nd->nd_procnum = NFSPROC_NOOP; 1928 return (0); 1929 } 1930 if (nqnfs) 1931 nd->nd_flag = (ND_NFSV3 | ND_NQNFS); 1932 else if (nfsvers == NFS_VER3) 1933 nd->nd_flag = ND_NFSV3; 1934 nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 1935 if (nd->nd_procnum == NFSPROC_NULL) 1936 return (0); 1937 if (nd->nd_procnum >= NFS_NPROCS || 1938 (!nqnfs && nd->nd_procnum >= NQNFSPROC_GETLEASE) || 1939 (!nd->nd_flag && nd->nd_procnum > NFSV2PROC_STATFS)) { 1940 nd->nd_repstat = EPROCUNAVAIL; 1941 nd->nd_procnum = NFSPROC_NOOP; 1942 return (0); 1943 } 1944 if ((nd->nd_flag & ND_NFSV3) == 0) 1945 nd->nd_procnum = nfsv3_procid[nd->nd_procnum]; 1946 auth_type = *tl++; 1947 len = fxdr_unsigned(int, *tl++); 1948 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1949 m_freem(mrep); 1950 return (EBADRPC); 1951 } 1952 1953 nd->nd_flag &= ~ND_KERBAUTH; 1954 /* 1955 * Handle auth_unix or auth_kerb. 1956 */ 1957 if (auth_type == rpc_auth_unix) { 1958 len = fxdr_unsigned(int, *++tl); 1959 if (len < 0 || len > NFS_MAXNAMLEN) { 1960 m_freem(mrep); 1961 return (EBADRPC); 1962 } 1963 nfsm_adv(nfsm_rndup(len)); 1964 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); 1965 bzero((caddr_t)&nd->nd_cr, sizeof (struct ucred)); 1966 nd->nd_cr.cr_ref = 1; 1967 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 1968 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 1969 len = fxdr_unsigned(int, *tl); 1970 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 1971 m_freem(mrep); 1972 return (EBADRPC); 1973 } 1974 nfsm_dissect(tl, u_long *, (len + 2) * NFSX_UNSIGNED); 1975 for (i = 1; i <= len; i++) 1976 if (i < NGROUPS) 1977 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 1978 else 1979 tl++; 1980 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 1981 if (nd->nd_cr.cr_ngroups > 1) 1982 nfsrvw_sort(nd->nd_cr.cr_groups, nd->nd_cr.cr_ngroups); 1983 len = fxdr_unsigned(int, *++tl); 1984 if (len < 0 || len > RPCAUTH_MAXSIZ) { 1985 m_freem(mrep); 1986 return (EBADRPC); 1987 } 1988 if (len > 0) 1989 nfsm_adv(nfsm_rndup(len)); 1990 } else if (auth_type == rpc_auth_kerb) { 1991 switch (fxdr_unsigned(int, *tl++)) { 1992 case RPCAKN_FULLNAME: 1993 ticklen = fxdr_unsigned(int, *tl); 1994 *((u_long *)nfsd->nfsd_authstr) = *tl; 1995 uio.uio_resid = nfsm_rndup(ticklen) + NFSX_UNSIGNED; 1996 nfsd->nfsd_authlen = uio.uio_resid + NFSX_UNSIGNED; 1997 if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) { 1998 m_freem(mrep); 1999 return (EBADRPC); 2000 } 2001 uio.uio_offset = 0; 2002 uio.uio_iov = &iov; 2003 uio.uio_iovcnt = 1; 2004 uio.uio_segflg = UIO_SYSSPACE; 2005 iov.iov_base = (caddr_t)&nfsd->nfsd_authstr[4]; 2006 iov.iov_len = RPCAUTH_MAXSIZ - 4; 2007 nfsm_mtouio(&uio, uio.uio_resid); 2008 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); 2009 if (*tl++ != rpc_auth_kerb || 2010 fxdr_unsigned(int, *tl) != 4 * NFSX_UNSIGNED) { 2011 printf("Bad kerb verifier\n"); 2012 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 2013 nd->nd_procnum = NFSPROC_NOOP; 2014 return (0); 2015 } 2016 nfsm_dissect(cp, caddr_t, 4 * NFSX_UNSIGNED); 2017 tl = (u_long *)cp; 2018 if (fxdr_unsigned(int, *tl) != RPCAKN_FULLNAME) { 2019 printf("Not fullname kerb verifier\n"); 2020 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 2021 nd->nd_procnum = NFSPROC_NOOP; 2022 return (0); 2023 } 2024 cp += NFSX_UNSIGNED; 2025 bcopy(cp, nfsd->nfsd_verfstr, 3 * NFSX_UNSIGNED); 2026 nfsd->nfsd_verflen = 3 * NFSX_UNSIGNED; 2027 nd->nd_flag |= ND_KERBFULL; 2028 nfsd->nfsd_flag |= NFSD_NEEDAUTH; 2029 break; 2030 case RPCAKN_NICKNAME: 2031 if (len != 2 * NFSX_UNSIGNED) { 2032 printf("Kerb nickname short\n"); 2033 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADCRED); 2034 nd->nd_procnum = NFSPROC_NOOP; 2035 return (0); 2036 } 2037 nickuid = fxdr_unsigned(uid_t, *tl); 2038 nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); 2039 if (*tl++ != rpc_auth_kerb || 2040 fxdr_unsigned(int, *tl) != 3 * NFSX_UNSIGNED) { 2041 printf("Kerb nick verifier bad\n"); 2042 nd->nd_repstat = (NFSERR_AUTHERR|AUTH_BADVERF); 2043 nd->nd_procnum = NFSPROC_NOOP; 2044 return (0); 2045 } 2046 nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); 2047 tvin.tv_sec = *tl++; 2048 tvin.tv_usec = *tl; 2049 2050 for (nuidp = NUIDHASH(nfsd->nfsd_slp,nickuid)->lh_first; 2051 nuidp != 0; nuidp = nuidp->nu_hash.le_next) { 2052 if (nuidp->nu_cr.cr_uid == nickuid && 2053 (!nd->nd_nam2 || 2054 netaddr_match(NU_NETFAM(nuidp), 2055 &nuidp->nu_haddr, nd->nd_nam2))) 2056 break; 2057 } 2058 if (!nuidp) { 2059 nd->nd_repstat = 2060 (NFSERR_AUTHERR|AUTH_REJECTCRED); 2061 nd->nd_procnum = NFSPROC_NOOP; 2062 return (0); 2063 } 2064 2065 /* 2066 * Now, decrypt the timestamp using the session key 2067 * and validate it. 2068 */ 2069 #ifdef NFSKERB 2070 XXX 2071 #endif 2072 2073 tvout.tv_sec = fxdr_unsigned(long, tvout.tv_sec); 2074 tvout.tv_usec = fxdr_unsigned(long, tvout.tv_usec); 2075 if (nuidp->nu_expire < time.tv_sec || 2076 nuidp->nu_timestamp.tv_sec > tvout.tv_sec || 2077 (nuidp->nu_timestamp.tv_sec == tvout.tv_sec && 2078 nuidp->nu_timestamp.tv_usec > tvout.tv_usec)) { 2079 nuidp->nu_expire = 0; 2080 nd->nd_repstat = 2081 (NFSERR_AUTHERR|AUTH_REJECTVERF); 2082 nd->nd_procnum = NFSPROC_NOOP; 2083 return (0); 2084 } 2085 nfsrv_setcred(&nuidp->nu_cr, &nd->nd_cr); 2086 nd->nd_flag |= ND_KERBNICK; 2087 }; 2088 } else { 2089 nd->nd_repstat = (NFSERR_AUTHERR | AUTH_REJECTCRED); 2090 nd->nd_procnum = NFSPROC_NOOP; 2091 return (0); 2092 } 2093 2094 /* 2095 * For nqnfs, get piggybacked lease request. 2096 */ 2097 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 2098 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2099 nd->nd_flag |= fxdr_unsigned(int, *tl); 2100 if (nd->nd_flag & ND_LEASE) { 2101 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2102 nd->nd_duration = fxdr_unsigned(int, *tl); 2103 } else 2104 nd->nd_duration = NQ_MINLEASE; 2105 } else 2106 nd->nd_duration = NQ_MINLEASE; 2107 nd->nd_md = md; 2108 nd->nd_dpos = dpos; 2109 return (0); 2110 nfsmout: 2111 return (error); 2112 } 2113 2114 /* 2115 * Search for a sleeping nfsd and wake it up. 2116 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2117 * running nfsds will go look for the work in the nfssvc_sock list. 2118 */ 2119 void 2120 nfsrv_wakenfsd(slp) 2121 struct nfssvc_sock *slp; 2122 { 2123 register struct nfsd *nd; 2124 2125 if ((slp->ns_flag & SLP_VALID) == 0) 2126 return; 2127 for (nd = nfsd_head.tqh_first; nd != 0; nd = nd->nfsd_chain.tqe_next) { 2128 if (nd->nfsd_flag & NFSD_WAITING) { 2129 nd->nfsd_flag &= ~NFSD_WAITING; 2130 if (nd->nfsd_slp) 2131 panic("nfsd wakeup"); 2132 slp->ns_sref++; 2133 nd->nfsd_slp = slp; 2134 wakeup((caddr_t)nd); 2135 return; 2136 } 2137 } 2138 slp->ns_flag |= SLP_DOREC; 2139 nfsd_head_flag |= NFSD_CHECKSLP; 2140 } 2141 2142 int 2143 nfs_msg(p, server, msg) 2144 struct proc *p; 2145 char *server, *msg; 2146 { 2147 tpr_t tpr; 2148 2149 if (p) 2150 tpr = tprintf_open(p); 2151 else 2152 tpr = NULL; 2153 tprintf(tpr, "nfs server %s: %s\n", server, msg); 2154 tprintf_close(tpr); 2155 return (0); 2156 } 2157