1 /* 2 * Copyright (c) 1989, 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_socket.c 7.30 (Berkeley) 05/11/92 11 */ 12 13 /* 14 * Socket operations for use by nfs 15 */ 16 17 #include "types.h" 18 #include "param.h" 19 #include "uio.h" 20 #include "proc.h" 21 #include "signal.h" 22 #include "mount.h" 23 #include "kernel.h" 24 #include "malloc.h" 25 #include "mbuf.h" 26 #include "vnode.h" 27 #include "domain.h" 28 #include "protosw.h" 29 #include "socket.h" 30 #include "socketvar.h" 31 #include "syslog.h" 32 #include "tprintf.h" 33 #include "machine/endian.h" 34 #include "netinet/in.h" 35 #include "netinet/tcp.h" 36 #ifdef ISO 37 #include "netiso/iso.h" 38 #endif 39 #include "ufs/ufs/quota.h" 40 #include "ufs/ufs/ufsmount.h" 41 #include "rpcv2.h" 42 #include "nfsv2.h" 43 #include "nfs.h" 44 #include "xdr_subs.h" 45 #include "nfsm_subs.h" 46 #include "nfsmount.h" 47 #include "nfsnode.h" 48 #include "nfsrtt.h" 49 #include "nqnfs.h" 50 51 #define TRUE 1 52 #define FALSE 0 53 54 int netnetnet = sizeof (struct netaddrhash); 55 /* 56 * Estimate rto for an nfs rpc sent via. an unreliable datagram. 57 * Use the mean and mean deviation of rtt for the appropriate type of rpc 58 * for the frequent rpcs and a default for the others. 59 * The justification for doing "other" this way is that these rpcs 60 * happen so infrequently that timer est. would probably be stale. 61 * Also, since many of these rpcs are 62 * non-idempotent, a conservative timeout is desired. 63 * getattr, lookup - A+2D 64 * read, write - A+4D 65 * other - nm_timeo 66 */ 67 #define NFS_RTO(n, t) \ 68 ((t) == 0 ? (n)->nm_timeo : \ 69 ((t) < 3 ? \ 70 (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \ 71 ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1))) 72 #define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1] 73 #define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1] 74 /* 75 * External data, mostly RPC constants in XDR form 76 */ 77 extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix, 78 rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred, 79 rpc_auth_kerb; 80 extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers; 81 extern time_t nqnfsstarttime; 82 extern int nonidempotent[NFS_NPROCS]; 83 84 /* 85 * Maps errno values to nfs error numbers. 86 * Use NFSERR_IO as the catch all for ones not specifically defined in 87 * RFC 1094. 88 */ 89 static int nfsrv_errmap[ELAST] = { 90 NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, 91 NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 92 NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, 93 NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, 94 NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 95 NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, 96 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 97 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 98 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 99 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 100 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 101 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 102 NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, 103 NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, 104 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 105 NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, 106 NFSERR_IO, 107 }; 108 109 /* 110 * Defines which timer to use for the procnum. 111 * 0 - default 112 * 1 - getattr 113 * 2 - lookup 114 * 3 - read 115 * 4 - write 116 */ 117 static int proct[NFS_NPROCS] = { 118 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 119 }; 120 121 /* 122 * There is a congestion window for outstanding rpcs maintained per mount 123 * point. The cwnd size is adjusted in roughly the way that: 124 * Van Jacobson, Congestion avoidance and Control, In "Proceedings of 125 * SIGCOMM '88". ACM, August 1988. 126 * describes for TCP. The cwnd size is chopped in half on a retransmit timeout 127 * and incremented by 1/cwnd when each rpc reply is received and a full cwnd 128 * of rpcs is in progress. 129 * (The sent count and cwnd are scaled for integer arith.) 130 * Variants of "slow start" were tried and were found to be too much of a 131 * performance hit (ave. rtt 3 times larger), 132 * I suspect due to the large rtt that nfs rpcs have. 133 */ 134 #define NFS_CWNDSCALE 256 135 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) 136 static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; 137 int nfs_sbwait(); 138 void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock(); 139 void nfs_rcvunlock(), nqnfs_serverd(); 140 struct mbuf *nfsm_rpchead(); 141 int nfsrtton = 0; 142 struct nfsrtt nfsrtt; 143 struct nfsd nfsd_head; 144 145 int nfsrv_null(), 146 nfsrv_getattr(), 147 nfsrv_setattr(), 148 nfsrv_lookup(), 149 nfsrv_readlink(), 150 nfsrv_read(), 151 nfsrv_write(), 152 nfsrv_create(), 153 nfsrv_remove(), 154 nfsrv_rename(), 155 nfsrv_link(), 156 nfsrv_symlink(), 157 nfsrv_mkdir(), 158 nfsrv_rmdir(), 159 nfsrv_readdir(), 160 nfsrv_statfs(), 161 nfsrv_noop(), 162 nqnfsrv_readdirlook(), 163 nqnfsrv_getlease(), 164 nqnfsrv_vacated(); 165 166 int (*nfsrv_procs[NFS_NPROCS])() = { 167 nfsrv_null, 168 nfsrv_getattr, 169 nfsrv_setattr, 170 nfsrv_noop, 171 nfsrv_lookup, 172 nfsrv_readlink, 173 nfsrv_read, 174 nfsrv_noop, 175 nfsrv_write, 176 nfsrv_create, 177 nfsrv_remove, 178 nfsrv_rename, 179 nfsrv_link, 180 nfsrv_symlink, 181 nfsrv_mkdir, 182 nfsrv_rmdir, 183 nfsrv_readdir, 184 nfsrv_statfs, 185 nqnfsrv_readdirlook, 186 nqnfsrv_getlease, 187 nqnfsrv_vacated, 188 }; 189 190 struct nfsreq nfsreqh; 191 192 /* 193 * Initialize sockets and congestion for a new NFS connection. 194 * We do not free the sockaddr if error. 195 */ 196 nfs_connect(nmp, rep) 197 register struct nfsmount *nmp; 198 struct nfsreq *rep; 199 { 200 register struct socket *so; 201 int s, error, rcvreserve, sndreserve; 202 struct sockaddr *saddr; 203 struct sockaddr_in *sin; 204 struct mbuf *m; 205 u_short tport; 206 207 nmp->nm_so = (struct socket *)0; 208 saddr = mtod(nmp->nm_nam, struct sockaddr *); 209 if (error = socreate(saddr->sa_family, 210 &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto)) 211 goto bad; 212 so = nmp->nm_so; 213 nmp->nm_soflags = so->so_proto->pr_flags; 214 215 /* 216 * Some servers require that the client port be a reserved port number. 217 */ 218 if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) { 219 MGET(m, M_WAIT, MT_SONAME); 220 sin = mtod(m, struct sockaddr_in *); 221 sin->sin_len = m->m_len = sizeof (struct sockaddr_in); 222 sin->sin_family = AF_INET; 223 sin->sin_addr.s_addr = INADDR_ANY; 224 tport = IPPORT_RESERVED - 1; 225 sin->sin_port = htons(tport); 226 while ((error = sobind(so, m)) == EADDRINUSE && 227 --tport > IPPORT_RESERVED / 2) 228 sin->sin_port = htons(tport); 229 m_freem(m); 230 if (error) 231 goto bad; 232 } 233 234 /* 235 * Protocols that do not require connections may be optionally left 236 * unconnected for servers that reply from a port other than NFS_PORT. 237 */ 238 if (nmp->nm_flag & NFSMNT_NOCONN) { 239 if (nmp->nm_soflags & PR_CONNREQUIRED) { 240 error = ENOTCONN; 241 goto bad; 242 } 243 } else { 244 if (error = soconnect(so, nmp->nm_nam)) 245 goto bad; 246 247 /* 248 * Wait for the connection to complete. Cribbed from the 249 * connect system call but with the wait timing out so 250 * that interruptible mounts don't hang here for a long time. 251 */ 252 s = splnet(); 253 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 254 (void) tsleep((caddr_t)&so->so_timeo, PSOCK, 255 "nfscon", 2 * hz); 256 if ((so->so_state & SS_ISCONNECTING) && 257 so->so_error == 0 && rep && 258 (error = nfs_sigintr(nmp, rep, rep->r_procp))) { 259 so->so_state &= ~SS_ISCONNECTING; 260 splx(s); 261 goto bad; 262 } 263 } 264 if (so->so_error) { 265 error = so->so_error; 266 so->so_error = 0; 267 splx(s); 268 goto bad; 269 } 270 splx(s); 271 } 272 if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { 273 so->so_rcv.sb_timeo = (5 * hz); 274 so->so_snd.sb_timeo = (5 * hz); 275 } else { 276 so->so_rcv.sb_timeo = 0; 277 so->so_snd.sb_timeo = 0; 278 } 279 if (nmp->nm_sotype == SOCK_DGRAM) { 280 sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR; 281 rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR; 282 } else if (nmp->nm_sotype == SOCK_SEQPACKET) { 283 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2; 284 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2; 285 } else { 286 if (nmp->nm_sotype != SOCK_STREAM) 287 panic("nfscon sotype"); 288 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 289 MGET(m, M_WAIT, MT_SOOPTS); 290 *mtod(m, int *) = 1; 291 m->m_len = sizeof(int); 292 sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m); 293 } 294 if (so->so_proto->pr_protocol == IPPROTO_TCP) { 295 MGET(m, M_WAIT, MT_SOOPTS); 296 *mtod(m, int *) = 1; 297 m->m_len = sizeof(int); 298 sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m); 299 } 300 sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long)) 301 * 2; 302 rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long)) 303 * 2; 304 } 305 if (error = soreserve(so, sndreserve, rcvreserve)) 306 goto bad; 307 so->so_rcv.sb_flags |= SB_NOINTR; 308 so->so_snd.sb_flags |= SB_NOINTR; 309 310 /* Initialize other non-zero congestion variables */ 311 nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] = 312 nmp->nm_srtt[4] = (NFS_TIMEO << 3); 313 nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] = 314 nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0; 315 nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ 316 nmp->nm_sent = 0; 317 nmp->nm_timeouts = 0; 318 return (0); 319 320 bad: 321 nfs_disconnect(nmp); 322 return (error); 323 } 324 325 /* 326 * Reconnect routine: 327 * Called when a connection is broken on a reliable protocol. 328 * - clean up the old socket 329 * - nfs_connect() again 330 * - set R_MUSTRESEND for all outstanding requests on mount point 331 * If this fails the mount point is DEAD! 332 * nb: Must be called with the nfs_sndlock() set on the mount point. 333 */ 334 nfs_reconnect(rep) 335 register struct nfsreq *rep; 336 { 337 register struct nfsreq *rp; 338 register struct nfsmount *nmp = rep->r_nmp; 339 int error; 340 341 nfs_disconnect(nmp); 342 while (error = nfs_connect(nmp, rep)) { 343 if (error == EINTR || error == ERESTART) 344 return (EINTR); 345 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); 346 } 347 348 /* 349 * Loop through outstanding request list and fix up all requests 350 * on old socket. 351 */ 352 rp = nfsreqh.r_next; 353 while (rp != &nfsreqh) { 354 if (rp->r_nmp == nmp) 355 rp->r_flags |= R_MUSTRESEND; 356 rp = rp->r_next; 357 } 358 return (0); 359 } 360 361 /* 362 * NFS disconnect. Clean up and unlink. 363 */ 364 void 365 nfs_disconnect(nmp) 366 register struct nfsmount *nmp; 367 { 368 register struct socket *so; 369 370 if (nmp->nm_so) { 371 so = nmp->nm_so; 372 nmp->nm_so = (struct socket *)0; 373 soshutdown(so, 2); 374 soclose(so); 375 } 376 } 377 378 /* 379 * This is the nfs send routine. For connection based socket types, it 380 * must be called with an nfs_sndlock() on the socket. 381 * "rep == NULL" indicates that it has been called from a server. 382 * For the client side: 383 * - return EINTR if the RPC is terminated, 0 otherwise 384 * - set R_MUSTRESEND if the send fails for any reason 385 * - do any cleanup required by recoverable socket errors (???) 386 * For the server side: 387 * - return EINTR or ERESTART if interrupted by a signal 388 * - return EPIPE if a connection is lost for connection based sockets (TCP...) 389 * - do any cleanup required by recoverable socket errors (???) 390 */ 391 nfs_send(so, nam, top, rep) 392 register struct socket *so; 393 struct mbuf *nam; 394 register struct mbuf *top; 395 struct nfsreq *rep; 396 { 397 struct mbuf *sendnam; 398 int error, soflags, flags; 399 400 if (rep) { 401 if (rep->r_flags & R_SOFTTERM) { 402 m_freem(top); 403 return (EINTR); 404 } 405 if ((so = rep->r_nmp->nm_so) == NULL) { 406 rep->r_flags |= R_MUSTRESEND; 407 m_freem(top); 408 return (0); 409 } 410 rep->r_flags &= ~R_MUSTRESEND; 411 soflags = rep->r_nmp->nm_soflags; 412 } else 413 soflags = so->so_proto->pr_flags; 414 if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED)) 415 sendnam = (struct mbuf *)0; 416 else 417 sendnam = nam; 418 if (so->so_type == SOCK_SEQPACKET) 419 flags = MSG_EOR; 420 else 421 flags = 0; 422 423 error = sosend(so, sendnam, (struct uio *)0, top, 424 (struct mbuf *)0, flags); 425 if (error) { 426 if (rep) { 427 log(LOG_INFO, "nfs send error %d for server %s\n",error, 428 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 429 /* 430 * Deal with errors for the client side. 431 */ 432 if (rep->r_flags & R_SOFTTERM) 433 error = EINTR; 434 else 435 rep->r_flags |= R_MUSTRESEND; 436 } else 437 log(LOG_INFO, "nfsd send error %d\n", error); 438 439 /* 440 * Handle any recoverable (soft) socket errors here. (???) 441 */ 442 if (error != EINTR && error != ERESTART && 443 error != EWOULDBLOCK && error != EPIPE) 444 error = 0; 445 } 446 return (error); 447 } 448 449 /* 450 * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all 451 * done by soreceive(), but for SOCK_STREAM we must deal with the Record 452 * Mark and consolidate the data into a new mbuf list. 453 * nb: Sometimes TCP passes the data up to soreceive() in long lists of 454 * small mbufs. 455 * For SOCK_STREAM we must be very careful to read an entire record once 456 * we have read any of it, even if the system call has been interrupted. 457 */ 458 nfs_receive(rep, aname, mp) 459 register struct nfsreq *rep; 460 struct mbuf **aname; 461 struct mbuf **mp; 462 { 463 register struct socket *so; 464 struct uio auio; 465 struct iovec aio; 466 register struct mbuf *m; 467 struct mbuf *control; 468 u_long len; 469 struct mbuf **getnam; 470 int error, sotype, rcvflg; 471 struct proc *p = curproc; /* XXX */ 472 473 /* 474 * Set up arguments for soreceive() 475 */ 476 *mp = (struct mbuf *)0; 477 *aname = (struct mbuf *)0; 478 sotype = rep->r_nmp->nm_sotype; 479 480 /* 481 * For reliable protocols, lock against other senders/receivers 482 * in case a reconnect is necessary. 483 * For SOCK_STREAM, first get the Record Mark to find out how much 484 * more there is to get. 485 * We must lock the socket against other receivers 486 * until we have an entire rpc request/reply. 487 */ 488 if (sotype != SOCK_DGRAM) { 489 if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep)) 490 return (error); 491 tryagain: 492 /* 493 * Check for fatal errors and resending request. 494 */ 495 /* 496 * Ugh: If a reconnect attempt just happened, nm_so 497 * would have changed. NULL indicates a failed 498 * attempt that has essentially shut down this 499 * mount point. 500 */ 501 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) { 502 nfs_sndunlock(&rep->r_nmp->nm_flag); 503 return (EINTR); 504 } 505 if ((so = rep->r_nmp->nm_so) == NULL) { 506 if (error = nfs_reconnect(rep)) { 507 nfs_sndunlock(&rep->r_nmp->nm_flag); 508 return (error); 509 } 510 goto tryagain; 511 } 512 while (rep->r_flags & R_MUSTRESEND) { 513 m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT); 514 nfsstats.rpcretries++; 515 if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) { 516 if (error == EINTR || error == ERESTART || 517 (error = nfs_reconnect(rep))) { 518 nfs_sndunlock(&rep->r_nmp->nm_flag); 519 return (error); 520 } 521 goto tryagain; 522 } 523 } 524 nfs_sndunlock(&rep->r_nmp->nm_flag); 525 if (sotype == SOCK_STREAM) { 526 aio.iov_base = (caddr_t) &len; 527 aio.iov_len = sizeof(u_long); 528 auio.uio_iov = &aio; 529 auio.uio_iovcnt = 1; 530 auio.uio_segflg = UIO_SYSSPACE; 531 auio.uio_rw = UIO_READ; 532 auio.uio_offset = 0; 533 auio.uio_resid = sizeof(u_long); 534 auio.uio_procp = p; 535 do { 536 rcvflg = MSG_WAITALL; 537 error = soreceive(so, (struct mbuf **)0, &auio, 538 (struct mbuf **)0, (struct mbuf **)0, &rcvflg); 539 if (error == EWOULDBLOCK && rep) { 540 if (rep->r_flags & R_SOFTTERM) 541 return (EINTR); 542 } 543 } while (error == EWOULDBLOCK); 544 if (!error && auio.uio_resid > 0) { 545 log(LOG_INFO, 546 "short receive (%d/%d) from nfs server %s\n", 547 sizeof(u_long) - auio.uio_resid, 548 sizeof(u_long), 549 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 550 error = EPIPE; 551 } 552 if (error) 553 goto errout; 554 len = ntohl(len) & ~0x80000000; 555 /* 556 * This is SERIOUS! We are out of sync with the sender 557 * and forcing a disconnect/reconnect is all I can do. 558 */ 559 if (len > NFS_MAXPACKET) { 560 log(LOG_ERR, "%s (%d) from nfs server %s\n", 561 "impossible packet length", 562 len, 563 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 564 error = EFBIG; 565 goto errout; 566 } 567 auio.uio_resid = len; 568 do { 569 rcvflg = MSG_WAITALL; 570 error = soreceive(so, (struct mbuf **)0, 571 &auio, mp, (struct mbuf **)0, &rcvflg); 572 } while (error == EWOULDBLOCK || error == EINTR || 573 error == ERESTART); 574 if (!error && auio.uio_resid > 0) { 575 log(LOG_INFO, 576 "short receive (%d/%d) from nfs server %s\n", 577 len - auio.uio_resid, len, 578 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 579 error = EPIPE; 580 } 581 } else { 582 /* 583 * NB: Since uio_resid is big, MSG_WAITALL is ignored 584 * and soreceive() will return when it has either a 585 * control msg or a data msg. 586 * We have no use for control msg., but must grab them 587 * and then throw them away so we know what is going 588 * on. 589 */ 590 auio.uio_resid = len = 100000000; /* Anything Big */ 591 auio.uio_procp = p; 592 do { 593 rcvflg = 0; 594 error = soreceive(so, (struct mbuf **)0, 595 &auio, mp, &control, &rcvflg); 596 if (control) 597 m_freem(control); 598 if (error == EWOULDBLOCK && rep) { 599 if (rep->r_flags & R_SOFTTERM) 600 return (EINTR); 601 } 602 } while (error == EWOULDBLOCK || 603 (!error && *mp == NULL && control)); 604 if ((rcvflg & MSG_EOR) == 0) 605 printf("Egad!!\n"); 606 if (!error && *mp == NULL) 607 error = EPIPE; 608 len -= auio.uio_resid; 609 } 610 errout: 611 if (error && error != EINTR && error != ERESTART) { 612 m_freem(*mp); 613 *mp = (struct mbuf *)0; 614 if (error != EPIPE) 615 log(LOG_INFO, 616 "receive error %d from nfs server %s\n", 617 error, 618 rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname); 619 error = nfs_sndlock(&rep->r_nmp->nm_flag, rep); 620 if (!error) 621 error = nfs_reconnect(rep); 622 if (!error) 623 goto tryagain; 624 } 625 } else { 626 if ((so = rep->r_nmp->nm_so) == NULL) 627 return (EACCES); 628 if (so->so_state & SS_ISCONNECTED) 629 getnam = (struct mbuf **)0; 630 else 631 getnam = aname; 632 auio.uio_resid = len = 1000000; 633 auio.uio_procp = p; 634 do { 635 rcvflg = 0; 636 error = soreceive(so, getnam, &auio, mp, 637 (struct mbuf **)0, &rcvflg); 638 if (error == EWOULDBLOCK && 639 (rep->r_flags & R_SOFTTERM)) 640 return (EINTR); 641 } while (error == EWOULDBLOCK); 642 len -= auio.uio_resid; 643 } 644 if (error) { 645 m_freem(*mp); 646 *mp = (struct mbuf *)0; 647 } 648 /* 649 * Search for any mbufs that are not a multiple of 4 bytes long 650 * or with m_data not longword aligned. 651 * These could cause pointer alignment problems, so copy them to 652 * well aligned mbufs. 653 */ 654 nfs_realign(*mp, 5 * NFSX_UNSIGNED); 655 return (error); 656 } 657 658 /* 659 * Implement receipt of reply on a socket. 660 * We must search through the list of received datagrams matching them 661 * with outstanding requests using the xid, until ours is found. 662 */ 663 /* ARGSUSED */ 664 nfs_reply(myrep) 665 struct nfsreq *myrep; 666 { 667 register struct nfsreq *rep; 668 register struct nfsmount *nmp = myrep->r_nmp; 669 register long t1; 670 struct mbuf *mrep, *nam, *md; 671 u_long rxid, *tl; 672 caddr_t dpos, cp2; 673 int error; 674 675 /* 676 * Loop around until we get our own reply 677 */ 678 for (;;) { 679 /* 680 * Lock against other receivers so that I don't get stuck in 681 * sbwait() after someone else has received my reply for me. 682 * Also necessary for connection based protocols to avoid 683 * race conditions during a reconnect. 684 */ 685 if (error = nfs_rcvlock(myrep)) 686 return (error); 687 /* Already received, bye bye */ 688 if (myrep->r_mrep != NULL) { 689 nfs_rcvunlock(&nmp->nm_flag); 690 return (0); 691 } 692 /* 693 * Get the next Rpc reply off the socket 694 */ 695 error = nfs_receive(myrep, &nam, &mrep); 696 nfs_rcvunlock(&nmp->nm_flag); 697 if (error) printf("rcv err=%d\n",error); 698 if (error) { 699 700 /* 701 * Ignore routing errors on connectionless protocols?? 702 */ 703 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) { 704 nmp->nm_so->so_error = 0; 705 continue; 706 } 707 return (error); 708 } 709 if (nam) 710 m_freem(nam); 711 712 /* 713 * Get the xid and check that it is an rpc reply 714 */ 715 md = mrep; 716 dpos = mtod(md, caddr_t); 717 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 718 rxid = *tl++; 719 if (*tl != rpc_reply) { 720 if (nmp->nm_flag & NFSMNT_NQNFS) { 721 if (nqnfs_callback(nmp, mrep, md, dpos)) 722 nfsstats.rpcinvalid++; 723 } else { 724 nfsstats.rpcinvalid++; 725 m_freem(mrep); 726 } 727 nfsmout: 728 continue; 729 } 730 731 /* 732 * Loop through the request list to match up the reply 733 * Iff no match, just drop the datagram 734 */ 735 rep = nfsreqh.r_next; 736 while (rep != &nfsreqh) { 737 if (rep->r_mrep == NULL && rxid == rep->r_xid) { 738 /* Found it.. */ 739 rep->r_mrep = mrep; 740 rep->r_md = md; 741 rep->r_dpos = dpos; 742 if (nfsrtton) { 743 struct rttl *rt; 744 745 rt = &nfsrtt.rttl[nfsrtt.pos]; 746 rt->proc = rep->r_procnum; 747 rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]); 748 rt->sent = nmp->nm_sent; 749 rt->cwnd = nmp->nm_cwnd; 750 rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1]; 751 rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1]; 752 rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid; 753 rt->tstamp = time; 754 if (rep->r_flags & R_TIMING) 755 rt->rtt = rep->r_rtt; 756 else 757 rt->rtt = 1000000; 758 nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ; 759 } 760 /* 761 * Update congestion window. 762 * Do the additive increase of 763 * one rpc/rtt. 764 */ 765 if (nmp->nm_cwnd <= nmp->nm_sent) { 766 nmp->nm_cwnd += 767 (NFS_CWNDSCALE * NFS_CWNDSCALE + 768 (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd; 769 if (nmp->nm_cwnd > NFS_MAXCWND) 770 nmp->nm_cwnd = NFS_MAXCWND; 771 } 772 nmp->nm_sent -= NFS_CWNDSCALE; 773 /* 774 * Update rtt using a gain of 0.125 on the mean 775 * and a gain of 0.25 on the deviation. 776 */ 777 if (rep->r_flags & R_TIMING) { 778 /* 779 * Since the timer resolution of 780 * NFS_HZ is so course, it can often 781 * result in r_rtt == 0. Since 782 * r_rtt == N means that the actual 783 * rtt is between N+dt and N+2-dt ticks, 784 * add 1. 785 */ 786 t1 = rep->r_rtt + 1; 787 t1 -= (NFS_SRTT(rep) >> 3); 788 NFS_SRTT(rep) += t1; 789 if (t1 < 0) 790 t1 = -t1; 791 t1 -= (NFS_SDRTT(rep) >> 2); 792 NFS_SDRTT(rep) += t1; 793 } 794 nmp->nm_timeouts = 0; 795 break; 796 } 797 rep = rep->r_next; 798 } 799 /* 800 * If not matched to a request, drop it. 801 * If it's mine, get out. 802 */ 803 if (rep == &nfsreqh) { 804 nfsstats.rpcunexpected++; 805 m_freem(mrep); 806 } else if (rep == myrep) { 807 if (rep->r_mrep == NULL) 808 panic("nfsreply nil"); 809 return (0); 810 } 811 } 812 } 813 814 /* 815 * nfs_request - goes something like this 816 * - fill in request struct 817 * - links it into list 818 * - calls nfs_send() for first transmit 819 * - calls nfs_receive() to get reply 820 * - break down rpc header and return with nfs reply pointed to 821 * by mrep or error 822 * nb: always frees up mreq mbuf list 823 */ 824 nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) 825 struct vnode *vp; 826 struct mbuf *mrest; 827 int procnum; 828 struct proc *procp; 829 struct ucred *cred; 830 struct mbuf **mrp; 831 struct mbuf **mdp; 832 caddr_t *dposp; 833 { 834 register struct mbuf *m, *mrep; 835 register struct nfsreq *rep; 836 register u_long *tl; 837 register int i; 838 struct nfsmount *nmp; 839 struct mbuf *md, *mheadend; 840 struct nfsreq *reph; 841 struct nfsnode *tp, *np; 842 time_t reqtime, waituntil; 843 caddr_t dpos, cp2; 844 int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type; 845 int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0; 846 u_long xid; 847 char *auth_str; 848 849 nmp = VFSTONFS(vp->v_mount); 850 MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); 851 rep->r_nmp = nmp; 852 rep->r_vp = vp; 853 rep->r_procp = procp; 854 rep->r_procnum = procnum; 855 i = 0; 856 m = mrest; 857 while (m) { 858 i += m->m_len; 859 m = m->m_next; 860 } 861 mrest_len = i; 862 863 /* 864 * Get the RPC header with authorization. 865 */ 866 kerbauth: 867 auth_str = (char *)0; 868 if (nmp->nm_flag & NFSMNT_KERB) { 869 if (failed_auth) { 870 error = nfs_getauth(nmp, rep, cred, &auth_type, 871 &auth_str, &auth_len); 872 if (error) { 873 free((caddr_t)rep, M_NFSREQ); 874 m_freem(mrest); 875 return (error); 876 } 877 } else { 878 auth_type = RPCAUTH_UNIX; 879 auth_len = 5 * NFSX_UNSIGNED; 880 } 881 } else { 882 auth_type = RPCAUTH_UNIX; 883 if (cred->cr_ngroups < 1) 884 panic("nfsreq nogrps"); 885 auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ? 886 nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) + 887 5 * NFSX_UNSIGNED; 888 } 889 m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum, 890 auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid); 891 if (auth_str) 892 free(auth_str, M_TEMP); 893 894 /* 895 * For stream protocols, insert a Sun RPC Record Mark. 896 */ 897 if (nmp->nm_sotype == SOCK_STREAM) { 898 M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); 899 *mtod(m, u_long *) = htonl(0x80000000 | 900 (m->m_pkthdr.len - NFSX_UNSIGNED)); 901 } 902 rep->r_mreq = m; 903 rep->r_xid = xid; 904 tryagain: 905 if (nmp->nm_flag & NFSMNT_SOFT) 906 rep->r_retry = nmp->nm_retry; 907 else 908 rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */ 909 rep->r_rtt = rep->r_rexmit = 0; 910 if (proct[procnum] > 0) 911 rep->r_flags = R_TIMING; 912 else 913 rep->r_flags = 0; 914 rep->r_mrep = NULL; 915 916 /* 917 * Do the client side RPC. 918 */ 919 nfsstats.rpcrequests++; 920 /* 921 * Chain request into list of outstanding requests. Be sure 922 * to put it LAST so timer finds oldest requests first. 923 */ 924 s = splsoftclock(); 925 reph = &nfsreqh; 926 reph->r_prev->r_next = rep; 927 rep->r_prev = reph->r_prev; 928 reph->r_prev = rep; 929 rep->r_next = reph; 930 931 /* Get send time for nqnfs */ 932 reqtime = time.tv_sec; 933 934 /* 935 * If backing off another request or avoiding congestion, don't 936 * send this one now but let timer do it. If not timing a request, 937 * do it now. 938 */ 939 if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM || 940 (nmp->nm_flag & NFSMNT_DUMBTIMR) || 941 nmp->nm_sent < nmp->nm_cwnd)) { 942 splx(s); 943 if (nmp->nm_soflags & PR_CONNREQUIRED) 944 error = nfs_sndlock(&nmp->nm_flag, rep); 945 if (!error) { 946 m = m_copym(m, 0, M_COPYALL, M_WAIT); 947 error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep); 948 if (nmp->nm_soflags & PR_CONNREQUIRED) 949 nfs_sndunlock(&nmp->nm_flag); 950 } 951 if (!error && (rep->r_flags & R_MUSTRESEND) == 0) { 952 nmp->nm_sent += NFS_CWNDSCALE; 953 rep->r_flags |= R_SENT; 954 } 955 } else { 956 splx(s); 957 rep->r_rtt = -1; 958 } 959 960 /* 961 * Wait for the reply from our send or the timer's. 962 */ 963 if (!error) 964 error = nfs_reply(rep); 965 966 /* 967 * RPC done, unlink the request. 968 */ 969 s = splsoftclock(); 970 rep->r_prev->r_next = rep->r_next; 971 rep->r_next->r_prev = rep->r_prev; 972 splx(s); 973 974 /* 975 * If there was a successful reply and a tprintf msg. 976 * tprintf a response. 977 */ 978 if (!error && (rep->r_flags & R_TPRINTFMSG)) 979 nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname, 980 "is alive again"); 981 mrep = rep->r_mrep; 982 md = rep->r_md; 983 dpos = rep->r_dpos; 984 if (error) { 985 m_freem(rep->r_mreq); 986 free((caddr_t)rep, M_NFSREQ); 987 return (error); 988 } 989 990 /* 991 * break down the rpc header and check if ok 992 */ 993 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 994 if (*tl++ == rpc_msgdenied) { 995 if (*tl == rpc_mismatch) 996 error = EOPNOTSUPP; 997 else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) { 998 if (*tl == rpc_rejectedcred && failed_auth == 0) { 999 failed_auth++; 1000 mheadend->m_next = (struct mbuf *)0; 1001 m_freem(mrep); 1002 m_freem(rep->r_mreq); 1003 goto kerbauth; 1004 } else 1005 error = EAUTH; 1006 } else 1007 error = EACCES; 1008 m_freem(mrep); 1009 m_freem(rep->r_mreq); 1010 free((caddr_t)rep, M_NFSREQ); 1011 return (error); 1012 } 1013 1014 /* 1015 * skip over the auth_verf, someday we may want to cache auth_short's 1016 * for nfs_reqhead(), but for now just dump it 1017 */ 1018 if (*++tl != 0) { 1019 i = nfsm_rndup(fxdr_unsigned(long, *tl)); 1020 nfsm_adv(i); 1021 } 1022 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1023 /* 0 == ok */ 1024 if (*tl == 0) { 1025 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1026 if (*tl != 0) { 1027 error = fxdr_unsigned(int, *tl); 1028 m_freem(mrep); 1029 if ((nmp->nm_flag & NFSMNT_NQNFS) && 1030 error == NQNFS_TRYLATER) { 1031 error = 0; 1032 waituntil = time.tv_sec + trylater_delay; 1033 while (time.tv_sec < waituntil) 1034 (void) tsleep((caddr_t)&lbolt, 1035 PSOCK, "nqnfstry", 0); 1036 trylater_delay *= nfs_backoff[trylater_cnt]; 1037 if (trylater_cnt < 7) 1038 trylater_cnt++; 1039 goto tryagain; 1040 } 1041 m_freem(rep->r_mreq); 1042 free((caddr_t)rep, M_NFSREQ); 1043 return (error); 1044 } 1045 1046 /* 1047 * For nqnfs, get any lease in reply 1048 */ 1049 if (nmp->nm_flag & NFSMNT_NQNFS) { 1050 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 1051 if (*tl) { 1052 np = VTONFS(vp); 1053 nqlflag = fxdr_unsigned(int, *tl); 1054 nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED); 1055 cachable = fxdr_unsigned(int, *tl++); 1056 reqtime += fxdr_unsigned(int, *tl++); 1057 if (reqtime > time.tv_sec) { 1058 if (np->n_tnext) { 1059 if (np->n_tnext == (struct nfsnode *)nmp) 1060 nmp->nm_tprev = np->n_tprev; 1061 else 1062 np->n_tnext->n_tprev = np->n_tprev; 1063 if (np->n_tprev == (struct nfsnode *)nmp) 1064 nmp->nm_tnext = np->n_tnext; 1065 else 1066 np->n_tprev->n_tnext = np->n_tnext; 1067 if (nqlflag == NQL_WRITE) 1068 np->n_flag |= NQNFSWRITE; 1069 } else if (nqlflag == NQL_READ) 1070 np->n_flag &= ~NQNFSWRITE; 1071 else 1072 np->n_flag |= NQNFSWRITE; 1073 if (cachable) 1074 np->n_flag &= ~NQNFSNONCACHE; 1075 else 1076 np->n_flag |= NQNFSNONCACHE; 1077 np->n_expiry = reqtime; 1078 fxdr_hyper(tl, &np->n_lrev); 1079 tp = nmp->nm_tprev; 1080 while (tp != (struct nfsnode *)nmp && 1081 tp->n_expiry > np->n_expiry) 1082 tp = tp->n_tprev; 1083 if (tp == (struct nfsnode *)nmp) { 1084 np->n_tnext = nmp->nm_tnext; 1085 nmp->nm_tnext = np; 1086 } else { 1087 np->n_tnext = tp->n_tnext; 1088 tp->n_tnext = np; 1089 } 1090 np->n_tprev = tp; 1091 if (np->n_tnext == (struct nfsnode *)nmp) 1092 nmp->nm_tprev = np; 1093 else 1094 np->n_tnext->n_tprev = np; 1095 } 1096 } 1097 } 1098 *mrp = mrep; 1099 *mdp = md; 1100 *dposp = dpos; 1101 m_freem(rep->r_mreq); 1102 FREE((caddr_t)rep, M_NFSREQ); 1103 return (0); 1104 } 1105 m_freem(mrep); 1106 m_freem(rep->r_mreq); 1107 free((caddr_t)rep, M_NFSREQ); 1108 error = EPROTONOSUPPORT; 1109 nfsmout: 1110 return (error); 1111 } 1112 1113 /* 1114 * Generate the rpc reply header 1115 * siz arg. is used to decide if adding a cluster is worthwhile 1116 */ 1117 nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp) 1118 int siz; 1119 struct nfsd *nd; 1120 int err; 1121 int cache; 1122 u_quad_t *frev; 1123 struct mbuf **mrq; 1124 struct mbuf **mbp; 1125 caddr_t *bposp; 1126 { 1127 register u_long *tl; 1128 register struct mbuf *mreq; 1129 caddr_t bpos; 1130 struct mbuf *mb, *mb2; 1131 1132 MGETHDR(mreq, M_WAIT, MT_DATA); 1133 mb = mreq; 1134 /* 1135 * If this is a big reply, use a cluster else 1136 * try and leave leading space for the lower level headers. 1137 */ 1138 siz += RPC_REPLYSIZ; 1139 if (siz >= MINCLSIZE) { 1140 MCLGET(mreq, M_WAIT); 1141 } else 1142 mreq->m_data += max_hdr; 1143 tl = mtod(mreq, u_long *); 1144 mreq->m_len = 6*NFSX_UNSIGNED; 1145 bpos = ((caddr_t)tl)+mreq->m_len; 1146 *tl++ = nd->nd_retxid; 1147 *tl++ = rpc_reply; 1148 if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) { 1149 *tl++ = rpc_msgdenied; 1150 if (err == NQNFS_AUTHERR) { 1151 *tl++ = rpc_autherr; 1152 *tl = rpc_rejectedcred; 1153 mreq->m_len -= NFSX_UNSIGNED; 1154 bpos -= NFSX_UNSIGNED; 1155 } else { 1156 *tl++ = rpc_mismatch; 1157 *tl++ = txdr_unsigned(2); 1158 *tl = txdr_unsigned(2); 1159 } 1160 } else { 1161 *tl++ = rpc_msgaccepted; 1162 *tl++ = 0; 1163 *tl++ = 0; 1164 switch (err) { 1165 case EPROGUNAVAIL: 1166 *tl = txdr_unsigned(RPC_PROGUNAVAIL); 1167 break; 1168 case EPROGMISMATCH: 1169 *tl = txdr_unsigned(RPC_PROGMISMATCH); 1170 nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); 1171 *tl++ = txdr_unsigned(2); 1172 *tl = txdr_unsigned(2); /* someday 3 */ 1173 break; 1174 case EPROCUNAVAIL: 1175 *tl = txdr_unsigned(RPC_PROCUNAVAIL); 1176 break; 1177 default: 1178 *tl = 0; 1179 if (err != VNOVAL) { 1180 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1181 if (err) 1182 *tl = txdr_unsigned(nfsrv_errmap[err - 1]); 1183 else 1184 *tl = 0; 1185 } 1186 break; 1187 }; 1188 } 1189 1190 /* 1191 * For nqnfs, piggyback lease as requested. 1192 */ 1193 if (nd->nd_nqlflag != NQL_NOVAL && err == 0) { 1194 if (nd->nd_nqlflag) { 1195 nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED); 1196 *tl++ = txdr_unsigned(nd->nd_nqlflag); 1197 *tl++ = txdr_unsigned(cache); 1198 *tl++ = txdr_unsigned(nd->nd_duration); 1199 txdr_hyper(frev, tl); 1200 } else { 1201 if (nd->nd_nqlflag != 0) 1202 panic("nqreph"); 1203 nfsm_build(tl, u_long *, NFSX_UNSIGNED); 1204 *tl = 0; 1205 } 1206 } 1207 *mrq = mreq; 1208 *mbp = mb; 1209 *bposp = bpos; 1210 if (err != 0 && err != VNOVAL) 1211 nfsstats.srvrpc_errs++; 1212 return (0); 1213 } 1214 1215 /* 1216 * Nfs timer routine 1217 * Scan the nfsreq list and retranmit any requests that have timed out 1218 * To avoid retransmission attempts on STREAM sockets (in the future) make 1219 * sure to set the r_retry field to 0 (implies nm_retry == 0). 1220 */ 1221 nfs_timer() 1222 { 1223 register struct nfsreq *rep; 1224 register struct mbuf *m; 1225 register struct socket *so; 1226 register struct nfsmount *nmp; 1227 register int timeo; 1228 static long lasttime = 0; 1229 int s, error; 1230 1231 s = splnet(); 1232 for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) { 1233 nmp = rep->r_nmp; 1234 if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) 1235 continue; 1236 if (nfs_sigintr(nmp, rep, rep->r_procp)) { 1237 rep->r_flags |= R_SOFTTERM; 1238 continue; 1239 } 1240 if (rep->r_rtt >= 0) { 1241 rep->r_rtt++; 1242 if (nmp->nm_flag & NFSMNT_DUMBTIMR) 1243 timeo = nmp->nm_timeo; 1244 else 1245 timeo = NFS_RTO(nmp, proct[rep->r_procnum]); 1246 if (nmp->nm_timeouts > 0) 1247 timeo *= nfs_backoff[nmp->nm_timeouts - 1]; 1248 if (rep->r_rtt <= timeo) 1249 continue; 1250 if (nmp->nm_timeouts < 8) 1251 nmp->nm_timeouts++; 1252 } 1253 /* 1254 * Check for server not responding 1255 */ 1256 if ((rep->r_flags & R_TPRINTFMSG) == 0 && 1257 rep->r_rexmit > nmp->nm_deadthresh) { 1258 nfs_msg(rep->r_procp, 1259 nmp->nm_mountp->mnt_stat.f_mntfromname, 1260 "not responding"); 1261 rep->r_flags |= R_TPRINTFMSG; 1262 } 1263 if (rep->r_rexmit >= rep->r_retry) { /* too many */ 1264 nfsstats.rpctimeouts++; 1265 rep->r_flags |= R_SOFTTERM; 1266 continue; 1267 } 1268 if (nmp->nm_sotype != SOCK_DGRAM) { 1269 if (++rep->r_rexmit > NFS_MAXREXMIT) 1270 rep->r_rexmit = NFS_MAXREXMIT; 1271 continue; 1272 } 1273 if ((so = nmp->nm_so) == NULL) 1274 continue; 1275 1276 /* 1277 * If there is enough space and the window allows.. 1278 * Resend it 1279 * Set r_rtt to -1 in case we fail to send it now. 1280 */ 1281 rep->r_rtt = -1; 1282 if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && 1283 ((nmp->nm_flag & NFSMNT_DUMBTIMR) || 1284 (rep->r_flags & R_SENT) || 1285 nmp->nm_sent < nmp->nm_cwnd) && 1286 (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){ 1287 if ((nmp->nm_flag & NFSMNT_NOCONN) == 0) 1288 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1289 (struct mbuf *)0, (struct mbuf *)0); 1290 else 1291 error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m, 1292 nmp->nm_nam, (struct mbuf *)0); 1293 if (error) { 1294 if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) 1295 so->so_error = 0; 1296 } else { 1297 /* 1298 * Iff first send, start timing 1299 * else turn timing off, backoff timer 1300 * and divide congestion window by 2. 1301 */ 1302 if (rep->r_flags & R_SENT) { 1303 rep->r_flags &= ~R_TIMING; 1304 if (++rep->r_rexmit > NFS_MAXREXMIT) 1305 rep->r_rexmit = NFS_MAXREXMIT; 1306 nmp->nm_cwnd >>= 1; 1307 if (nmp->nm_cwnd < NFS_CWNDSCALE) 1308 nmp->nm_cwnd = NFS_CWNDSCALE; 1309 nfsstats.rpcretries++; 1310 } else { 1311 rep->r_flags |= R_SENT; 1312 nmp->nm_sent += NFS_CWNDSCALE; 1313 } 1314 rep->r_rtt = 0; 1315 } 1316 } 1317 } 1318 1319 /* 1320 * Call the nqnfs server timer once a second to handle leases. 1321 */ 1322 if (lasttime != time.tv_sec) { 1323 lasttime = time.tv_sec; 1324 nqnfs_serverd(); 1325 } 1326 splx(s); 1327 timeout(nfs_timer, (caddr_t)0, hz/NFS_HZ); 1328 } 1329 1330 /* 1331 * Test for a termination condition pending on the process. 1332 * This is used for NFSMNT_INT mounts. 1333 */ 1334 nfs_sigintr(nmp, rep, p) 1335 struct nfsmount *nmp; 1336 struct nfsreq *rep; 1337 register struct proc *p; 1338 { 1339 1340 if (rep && (rep->r_flags & R_SOFTTERM)) 1341 return (EINTR); 1342 if (!(nmp->nm_flag & NFSMNT_INT)) 1343 return (0); 1344 if (p && p->p_sig && (((p->p_sig &~ p->p_sigmask) &~ p->p_sigignore) & 1345 NFSINT_SIGMASK)) 1346 return (EINTR); 1347 return (0); 1348 } 1349 1350 /* 1351 * Lock a socket against others. 1352 * Necessary for STREAM sockets to ensure you get an entire rpc request/reply 1353 * and also to avoid race conditions between the processes with nfs requests 1354 * in progress when a reconnect is necessary. 1355 */ 1356 nfs_sndlock(flagp, rep) 1357 register int *flagp; 1358 struct nfsreq *rep; 1359 { 1360 struct proc *p; 1361 1362 if (rep) 1363 p = rep->r_procp; 1364 else 1365 p = (struct proc *)0; 1366 while (*flagp & NFSMNT_SNDLOCK) { 1367 if (nfs_sigintr(rep->r_nmp, rep, p)) 1368 return (EINTR); 1369 *flagp |= NFSMNT_WANTSND; 1370 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsndlck", 0); 1371 } 1372 *flagp |= NFSMNT_SNDLOCK; 1373 return (0); 1374 } 1375 1376 /* 1377 * Unlock the stream socket for others. 1378 */ 1379 void 1380 nfs_sndunlock(flagp) 1381 register int *flagp; 1382 { 1383 1384 if ((*flagp & NFSMNT_SNDLOCK) == 0) 1385 panic("nfs sndunlock"); 1386 *flagp &= ~NFSMNT_SNDLOCK; 1387 if (*flagp & NFSMNT_WANTSND) { 1388 *flagp &= ~NFSMNT_WANTSND; 1389 wakeup((caddr_t)flagp); 1390 } 1391 } 1392 1393 nfs_rcvlock(rep) 1394 register struct nfsreq *rep; 1395 { 1396 register int *flagp = &rep->r_nmp->nm_flag; 1397 1398 while (*flagp & NFSMNT_RCVLOCK) { 1399 if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) 1400 return (EINTR); 1401 *flagp |= NFSMNT_WANTRCV; 1402 (void) tsleep((caddr_t)flagp, PZERO-1, "nfsrcvlck", 0); 1403 } 1404 *flagp |= NFSMNT_RCVLOCK; 1405 return (0); 1406 } 1407 1408 /* 1409 * Unlock the stream socket for others. 1410 */ 1411 void 1412 nfs_rcvunlock(flagp) 1413 register int *flagp; 1414 { 1415 1416 if ((*flagp & NFSMNT_RCVLOCK) == 0) 1417 panic("nfs rcvunlock"); 1418 *flagp &= ~NFSMNT_RCVLOCK; 1419 if (*flagp & NFSMNT_WANTRCV) { 1420 *flagp &= ~NFSMNT_WANTRCV; 1421 wakeup((caddr_t)flagp); 1422 } 1423 } 1424 1425 /* 1426 * This function compares two net addresses by family and returns TRUE 1427 * if they are the same host. 1428 * If there is any doubt, return FALSE. 1429 * The AF_INET family is handled as a special case so that address mbufs 1430 * don't need to be saved to store "struct in_addr", which is only 4 bytes. 1431 */ 1432 nfs_netaddr_match(family, haddr, hmask, nam) 1433 int family; 1434 union nethostaddr *haddr; 1435 union nethostaddr *hmask; 1436 struct mbuf *nam; 1437 { 1438 register struct sockaddr_in *inetaddr; 1439 #ifdef ISO 1440 register struct sockaddr_iso *isoaddr1, *isoaddr2; 1441 #endif 1442 1443 1444 switch (family) { 1445 case AF_INET: 1446 inetaddr = mtod(nam, struct sockaddr_in *); 1447 if (inetaddr->sin_family != AF_INET) 1448 return (0); 1449 if (hmask) { 1450 if ((inetaddr->sin_addr.s_addr & hmask->had_inetaddr) == 1451 (haddr->had_inetaddr & hmask->had_inetaddr)) 1452 return (1); 1453 } else if (inetaddr->sin_addr.s_addr == haddr->had_inetaddr) 1454 return (1); 1455 break; 1456 #ifdef ISO 1457 case AF_ISO: 1458 isoaddr1 = mtod(nam, struct sockaddr_iso *); 1459 if (isoaddr1->siso_family != AF_ISO) 1460 return (0); 1461 isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); 1462 if (isoaddr1->siso_nlen > 0 && 1463 isoaddr1->siso_nlen == isoaddr2->siso_nlen && 1464 SAME_ISOADDR(isoaddr1, isoaddr2)) 1465 return (1); 1466 break; 1467 #endif /* ISO */ 1468 default: 1469 break; 1470 }; 1471 return (0); 1472 } 1473 1474 /* 1475 * Build hash lists of net addresses and hang them off the mount point. 1476 * Called by ufs_mount() to set up the lists of export addresses. 1477 */ 1478 hang_addrlist(mp, argp) 1479 struct mount *mp; 1480 struct ufs_args *argp; 1481 { 1482 register struct netaddrhash *np, **hnp; 1483 register int i; 1484 struct ufsmount *ump; 1485 struct sockaddr *saddr; 1486 struct mbuf *nam, *msk = (struct mbuf *)0; 1487 union nethostaddr netmsk; 1488 int error; 1489 1490 if (error = sockargs(&nam, (caddr_t)argp->saddr, argp->slen, 1491 MT_SONAME)) 1492 return (error); 1493 saddr = mtod(nam, struct sockaddr *); 1494 ump = VFSTOUFS(mp); 1495 if (saddr->sa_family == AF_INET && 1496 ((struct sockaddr_in *)saddr)->sin_addr.s_addr == INADDR_ANY) { 1497 m_freem(nam); 1498 if (mp->mnt_flag & MNT_DEFEXPORTED) 1499 return (EPERM); 1500 np = &ump->um_defexported; 1501 np->neth_exflags = argp->exflags; 1502 np->neth_anon = argp->anon; 1503 np->neth_anon.cr_ref = 1; 1504 mp->mnt_flag |= MNT_DEFEXPORTED; 1505 return (0); 1506 } 1507 if (argp->msklen > 0) { 1508 if (error = sockargs(&msk, (caddr_t)argp->smask, argp->msklen, 1509 MT_SONAME)) { 1510 m_freem(nam); 1511 return (error); 1512 } 1513 1514 /* 1515 * Scan all the hash lists to check against duplications. 1516 * For the net list, try both masks to catch a subnet 1517 * of another network. 1518 */ 1519 hnp = &ump->um_netaddr[NETMASK_HASH]; 1520 np = *hnp; 1521 if (saddr->sa_family == AF_INET) 1522 netmsk.had_inetaddr = 1523 mtod(msk, struct sockaddr_in *)->sin_addr.s_addr; 1524 else 1525 netmsk.had_nam = msk; 1526 while (np) { 1527 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1528 &np->neth_hmask, nam) || 1529 nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1530 &netmsk, nam)) { 1531 m_freem(nam); 1532 m_freem(msk); 1533 return (EPERM); 1534 } 1535 np = np->neth_next; 1536 } 1537 for (i = 0; i < NETHASHSZ; i++) { 1538 np = ump->um_netaddr[i]; 1539 while (np) { 1540 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1541 &netmsk, nam)) { 1542 m_freem(nam); 1543 m_freem(msk); 1544 return (EPERM); 1545 } 1546 np = np->neth_next; 1547 } 1548 } 1549 } else { 1550 hnp = &ump->um_netaddr[NETADDRHASH(saddr)]; 1551 np = ump->um_netaddr[NETMASK_HASH]; 1552 while (np) { 1553 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1554 &np->neth_hmask, nam)) { 1555 m_freem(nam); 1556 return (EPERM); 1557 } 1558 np = np->neth_next; 1559 } 1560 np = *hnp; 1561 while (np) { 1562 if (nfs_netaddr_match(np->neth_family, &np->neth_haddr, 1563 (union nethostaddr *)0, nam)) { 1564 m_freem(nam); 1565 return (EPERM); 1566 } 1567 np = np->neth_next; 1568 } 1569 } 1570 np = (struct netaddrhash *) malloc(sizeof(struct netaddrhash), M_NETADDR, 1571 M_WAITOK); 1572 np->neth_family = saddr->sa_family; 1573 if (saddr->sa_family == AF_INET) { 1574 np->neth_inetaddr = ((struct sockaddr_in *)saddr)->sin_addr.s_addr; 1575 m_freem(nam); 1576 if (msk) { 1577 np->neth_inetmask = netmsk.had_inetaddr; 1578 m_freem(msk); 1579 if (np->neth_inetaddr &~ np->neth_inetmask) 1580 return (EPERM); 1581 } else 1582 np->neth_inetmask = 0xffffffff; 1583 } else { 1584 np->neth_nam = nam; 1585 np->neth_msk = msk; 1586 } 1587 np->neth_exflags = argp->exflags; 1588 np->neth_anon = argp->anon; 1589 np->neth_anon.cr_ref = 1; 1590 np->neth_next = *hnp; 1591 *hnp = np; 1592 return (0); 1593 } 1594 1595 /* 1596 * Free the net address hash lists that are hanging off the mount points. 1597 */ 1598 free_addrlist(ump) 1599 struct ufsmount *ump; 1600 { 1601 register struct netaddrhash *np, *onp; 1602 register int i; 1603 1604 for (i = 0; i <= NETHASHSZ; i++) { 1605 np = ump->um_netaddr[i]; 1606 ump->um_netaddr[i] = (struct netaddrhash *)0; 1607 while (np) { 1608 onp = np; 1609 np = np->neth_next; 1610 if (onp->neth_family != AF_INET) { 1611 m_freem(onp->neth_nam); 1612 m_freem(onp->neth_msk); 1613 } 1614 free((caddr_t)onp, M_NETADDR); 1615 } 1616 } 1617 } 1618 1619 /* 1620 * Generate a hash code for an iso host address. Used by NETADDRHASH() for 1621 * iso addresses. 1622 */ 1623 iso_addrhash(saddr) 1624 struct sockaddr *saddr; 1625 { 1626 #ifdef ISO 1627 register struct sockaddr_iso *siso; 1628 register int i, sum; 1629 1630 sum = 0; 1631 for (i = 0; i < siso->siso_nlen; i++) 1632 sum += siso->siso_data[i]; 1633 return (sum & (NETHASHSZ - 1)); 1634 #else 1635 return (0); 1636 #endif /* ISO */ 1637 } 1638 1639 /* 1640 * Check for badly aligned mbuf data areas and 1641 * realign data in an mbuf list by copying the data areas up, as required. 1642 */ 1643 void 1644 nfs_realign(m, hsiz) 1645 register struct mbuf *m; 1646 int hsiz; 1647 { 1648 register struct mbuf *m2; 1649 register int siz, mlen, olen; 1650 register caddr_t tcp, fcp; 1651 struct mbuf *mnew; 1652 1653 while (m) { 1654 /* 1655 * This never happens for UDP, rarely happens for TCP 1656 * but frequently happens for iso transport. 1657 */ 1658 if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) { 1659 olen = m->m_len; 1660 fcp = mtod(m, caddr_t); 1661 m->m_flags &= ~M_PKTHDR; 1662 if (m->m_flags & M_EXT) 1663 m->m_data = m->m_ext.ext_buf; 1664 else 1665 m->m_data = m->m_dat; 1666 m->m_len = 0; 1667 tcp = mtod(m, caddr_t); 1668 mnew = m; 1669 m2 = m->m_next; 1670 1671 /* 1672 * If possible, only put the first invariant part 1673 * of the RPC header in the first mbuf. 1674 */ 1675 if (olen <= hsiz) 1676 mlen = hsiz; 1677 else 1678 mlen = M_TRAILINGSPACE(m); 1679 1680 /* 1681 * Loop through the mbuf list consolidating data. 1682 */ 1683 while (m) { 1684 while (olen > 0) { 1685 if (mlen == 0) { 1686 m2->m_flags &= ~M_PKTHDR; 1687 if (m2->m_flags & M_EXT) 1688 m2->m_data = m2->m_ext.ext_buf; 1689 else 1690 m2->m_data = m2->m_dat; 1691 m2->m_len = 0; 1692 mlen = M_TRAILINGSPACE(m2); 1693 tcp = mtod(m2, caddr_t); 1694 mnew = m2; 1695 m2 = m2->m_next; 1696 } 1697 siz = MIN(mlen, olen); 1698 if (tcp != fcp) 1699 bcopy(fcp, tcp, siz); 1700 mnew->m_len += siz; 1701 mlen -= siz; 1702 olen -= siz; 1703 tcp += siz; 1704 fcp += siz; 1705 } 1706 m = m->m_next; 1707 if (m) { 1708 olen = m->m_len; 1709 fcp = mtod(m, caddr_t); 1710 } 1711 } 1712 1713 /* 1714 * Finally, set m_len == 0 for any trailing mbufs that have 1715 * been copied out of. 1716 */ 1717 while (m2) { 1718 m2->m_len = 0; 1719 m2 = m2->m_next; 1720 } 1721 return; 1722 } 1723 m = m->m_next; 1724 } 1725 } 1726 1727 /* 1728 * Socket upcall routine for the nfsd sockets. 1729 * The caddr_t arg is a pointer to the "struct nfssvc_sock". 1730 * Essentially do as much as possible non-blocking, else punt and it will 1731 * be called with M_WAIT from an nfsd. 1732 */ 1733 void 1734 nfsrv_rcv(so, arg, waitflag) 1735 struct socket *so; 1736 caddr_t arg; 1737 int waitflag; 1738 { 1739 register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg; 1740 register struct mbuf *m; 1741 struct mbuf *mp, *nam; 1742 struct uio auio; 1743 int flags, error; 1744 1745 if ((slp->ns_flag & SLP_VALID) == 0) 1746 return; 1747 #ifdef notdef 1748 /* 1749 * Define this to test for nfsds handling this under heavy load. 1750 */ 1751 if (waitflag == M_DONTWAIT) { 1752 slp->ns_flag |= SLP_NEEDQ; goto dorecs; 1753 } 1754 #endif 1755 auio.uio_procp = NULL; 1756 if (so->so_type == SOCK_STREAM) { 1757 /* 1758 * If there are already records on the queue, defer soreceive() 1759 * to an nfsd so that there is feedback to the TCP layer that 1760 * the nfs servers are heavily loaded. 1761 */ 1762 if (slp->ns_rec && waitflag == M_DONTWAIT) { 1763 slp->ns_flag |= SLP_NEEDQ; 1764 goto dorecs; 1765 } 1766 1767 /* 1768 * Do soreceive(). 1769 */ 1770 auio.uio_resid = 1000000000; 1771 flags = MSG_DONTWAIT; 1772 error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags); 1773 if (error || mp == (struct mbuf *)0) { 1774 if (error == EWOULDBLOCK) 1775 slp->ns_flag |= SLP_NEEDQ; 1776 else 1777 slp->ns_flag |= SLP_DISCONN; 1778 goto dorecs; 1779 } 1780 m = mp; 1781 if (slp->ns_rawend) { 1782 slp->ns_rawend->m_next = m; 1783 slp->ns_cc += 1000000000 - auio.uio_resid; 1784 } else { 1785 slp->ns_raw = m; 1786 slp->ns_cc = 1000000000 - auio.uio_resid; 1787 } 1788 while (m->m_next) 1789 m = m->m_next; 1790 slp->ns_rawend = m; 1791 1792 /* 1793 * Now try and parse record(s) out of the raw stream data. 1794 */ 1795 if (error = nfsrv_getstream(slp, waitflag)) { 1796 if (error == EPERM) 1797 slp->ns_flag |= SLP_DISCONN; 1798 else 1799 slp->ns_flag |= SLP_NEEDQ; 1800 } 1801 } else { 1802 do { 1803 auio.uio_resid = 1000000000; 1804 flags = MSG_DONTWAIT; 1805 error = soreceive(so, &nam, &auio, &mp, 1806 (struct mbuf **)0, &flags); 1807 if (mp) { 1808 nfs_realign(mp, 10 * NFSX_UNSIGNED); 1809 if (nam) { 1810 m = nam; 1811 m->m_next = mp; 1812 } else 1813 m = mp; 1814 if (slp->ns_recend) 1815 slp->ns_recend->m_nextpkt = m; 1816 else 1817 slp->ns_rec = m; 1818 slp->ns_recend = m; 1819 m->m_nextpkt = (struct mbuf *)0; 1820 } 1821 if (error) { 1822 if ((so->so_proto->pr_flags & PR_CONNREQUIRED) 1823 && error != EWOULDBLOCK) { 1824 slp->ns_flag |= SLP_DISCONN; 1825 goto dorecs; 1826 } 1827 } 1828 } while (mp); 1829 } 1830 1831 /* 1832 * Now try and process the request records, non-blocking. 1833 */ 1834 dorecs: 1835 if (waitflag == M_DONTWAIT && 1836 (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN)))) 1837 nfsrv_wakenfsd(slp); 1838 } 1839 1840 /* 1841 * Try and extract an RPC request from the mbuf data list received on a 1842 * stream socket. The "waitflag" argument indicates whether or not it 1843 * can sleep. 1844 */ 1845 nfsrv_getstream(slp, waitflag) 1846 register struct nfssvc_sock *slp; 1847 int waitflag; 1848 { 1849 register struct mbuf *m; 1850 register char *cp1, *cp2; 1851 register int len; 1852 struct mbuf *om, *m2, *recm; 1853 u_long recmark; 1854 1855 if (slp->ns_flag & SLP_GETSTREAM) 1856 panic("nfs getstream"); 1857 slp->ns_flag |= SLP_GETSTREAM; 1858 for (;;) { 1859 if (slp->ns_reclen == 0) { 1860 if (slp->ns_cc < NFSX_UNSIGNED) { 1861 slp->ns_flag &= ~SLP_GETSTREAM; 1862 return (0); 1863 } 1864 m = slp->ns_raw; 1865 if (m->m_len >= NFSX_UNSIGNED) { 1866 bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED); 1867 m->m_data += NFSX_UNSIGNED; 1868 m->m_len -= NFSX_UNSIGNED; 1869 } else { 1870 cp1 = (caddr_t)&recmark; 1871 cp2 = mtod(m, caddr_t); 1872 while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) { 1873 while (m->m_len == 0) { 1874 m = m->m_next; 1875 cp2 = mtod(m, caddr_t); 1876 } 1877 *cp1++ = *cp2++; 1878 m->m_data++; 1879 m->m_len--; 1880 } 1881 } 1882 slp->ns_cc -= NFSX_UNSIGNED; 1883 slp->ns_reclen = ntohl(recmark) & ~0x80000000; 1884 if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) { 1885 slp->ns_flag &= ~SLP_GETSTREAM; 1886 return (EPERM); 1887 } 1888 } 1889 1890 /* 1891 * Now get the record part. 1892 */ 1893 if (slp->ns_cc == slp->ns_reclen) { 1894 recm = slp->ns_raw; 1895 slp->ns_raw = slp->ns_rawend = (struct mbuf *)0; 1896 slp->ns_cc = slp->ns_reclen = 0; 1897 } else if (slp->ns_cc > slp->ns_reclen) { 1898 len = 0; 1899 m = slp->ns_raw; 1900 om = (struct mbuf *)0; 1901 while (len < slp->ns_reclen) { 1902 if ((len + m->m_len) > slp->ns_reclen) { 1903 m2 = m_copym(m, 0, slp->ns_reclen - len, 1904 waitflag); 1905 if (m2) { 1906 if (om) { 1907 om->m_next = m2; 1908 recm = slp->ns_raw; 1909 } else 1910 recm = m2; 1911 m->m_data += slp->ns_reclen - len; 1912 m->m_len -= slp->ns_reclen - len; 1913 len = slp->ns_reclen; 1914 } else { 1915 slp->ns_flag &= ~SLP_GETSTREAM; 1916 return (EWOULDBLOCK); 1917 } 1918 } else if ((len + m->m_len) == slp->ns_reclen) { 1919 om = m; 1920 len += m->m_len; 1921 m = m->m_next; 1922 recm = slp->ns_raw; 1923 om->m_next = (struct mbuf *)0; 1924 } else { 1925 om = m; 1926 len += m->m_len; 1927 m = m->m_next; 1928 } 1929 } 1930 slp->ns_raw = m; 1931 slp->ns_cc -= len; 1932 slp->ns_reclen = 0; 1933 } else { 1934 slp->ns_flag &= ~SLP_GETSTREAM; 1935 return (0); 1936 } 1937 nfs_realign(recm, 10 * NFSX_UNSIGNED); 1938 if (slp->ns_recend) 1939 slp->ns_recend->m_nextpkt = recm; 1940 else 1941 slp->ns_rec = recm; 1942 slp->ns_recend = recm; 1943 } 1944 } 1945 1946 /* 1947 * Parse an RPC header. 1948 */ 1949 nfsrv_dorec(slp, nd) 1950 register struct nfssvc_sock *slp; 1951 register struct nfsd *nd; 1952 { 1953 register struct mbuf *m; 1954 int error; 1955 1956 if ((slp->ns_flag & SLP_VALID) == 0 || 1957 (m = slp->ns_rec) == (struct mbuf *)0) 1958 return (ENOBUFS); 1959 if (slp->ns_rec = m->m_nextpkt) 1960 m->m_nextpkt = (struct mbuf *)0; 1961 else 1962 slp->ns_recend = (struct mbuf *)0; 1963 if (m->m_type == MT_SONAME) { 1964 nd->nd_nam = m; 1965 nd->nd_md = nd->nd_mrep = m->m_next; 1966 m->m_next = (struct mbuf *)0; 1967 } else { 1968 nd->nd_nam = (struct mbuf *)0; 1969 nd->nd_md = nd->nd_mrep = m; 1970 } 1971 nd->nd_dpos = mtod(nd->nd_md, caddr_t); 1972 if (error = nfs_getreq(nd, TRUE)) { 1973 m_freem(nd->nd_nam); 1974 return (error); 1975 } 1976 return (0); 1977 } 1978 1979 /* 1980 * Parse an RPC request 1981 * - verify it 1982 * - fill in the cred struct. 1983 */ 1984 nfs_getreq(nd, has_header) 1985 register struct nfsd *nd; 1986 int has_header; 1987 { 1988 register int len, i; 1989 register u_long *tl; 1990 register long t1; 1991 struct uio uio; 1992 struct iovec iov; 1993 caddr_t dpos, cp2; 1994 u_long nfsvers, auth_type; 1995 int error = 0, nqnfs = 0; 1996 struct mbuf *mrep, *md; 1997 1998 mrep = nd->nd_mrep; 1999 md = nd->nd_md; 2000 dpos = nd->nd_dpos; 2001 if (has_header) { 2002 nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED); 2003 nd->nd_retxid = *tl++; 2004 if (*tl++ != rpc_call) { 2005 m_freem(mrep); 2006 return (EBADRPC); 2007 } 2008 } else { 2009 nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED); 2010 } 2011 nd->nd_repstat = 0; 2012 if (*tl++ != rpc_vers) { 2013 nd->nd_repstat = ERPCMISMATCH; 2014 nd->nd_procnum = NFSPROC_NOOP; 2015 return (0); 2016 } 2017 nfsvers = nfs_vers; 2018 if (*tl != nfs_prog) { 2019 if (*tl == nqnfs_prog) { 2020 nqnfs++; 2021 nfsvers = nqnfs_vers; 2022 } else { 2023 nd->nd_repstat = EPROGUNAVAIL; 2024 nd->nd_procnum = NFSPROC_NOOP; 2025 return (0); 2026 } 2027 } 2028 tl++; 2029 if (*tl++ != nfsvers) { 2030 nd->nd_repstat = EPROGMISMATCH; 2031 nd->nd_procnum = NFSPROC_NOOP; 2032 return (0); 2033 } 2034 nd->nd_procnum = fxdr_unsigned(u_long, *tl++); 2035 if (nd->nd_procnum == NFSPROC_NULL) 2036 return (0); 2037 if (nd->nd_procnum >= NFS_NPROCS || 2038 (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) || 2039 (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) { 2040 nd->nd_repstat = EPROCUNAVAIL; 2041 nd->nd_procnum = NFSPROC_NOOP; 2042 return (0); 2043 } 2044 auth_type = *tl++; 2045 len = fxdr_unsigned(int, *tl++); 2046 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2047 m_freem(mrep); 2048 return (EBADRPC); 2049 } 2050 2051 /* 2052 * Handle auth_unix or auth_kerb. 2053 */ 2054 if (auth_type == rpc_auth_unix) { 2055 len = fxdr_unsigned(int, *++tl); 2056 if (len < 0 || len > NFS_MAXNAMLEN) { 2057 m_freem(mrep); 2058 return (EBADRPC); 2059 } 2060 nfsm_adv(nfsm_rndup(len)); 2061 nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED); 2062 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2063 nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++); 2064 len = fxdr_unsigned(int, *tl); 2065 if (len < 0 || len > RPCAUTH_UNIXGIDS) { 2066 m_freem(mrep); 2067 return (EBADRPC); 2068 } 2069 nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED); 2070 for (i = 1; i <= len; i++) 2071 if (i < NGROUPS) 2072 nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++); 2073 else 2074 tl++; 2075 nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1); 2076 } else if (auth_type == rpc_auth_kerb) { 2077 nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++); 2078 nd->nd_authlen = fxdr_unsigned(int, *tl); 2079 iov.iov_len = uio.uio_resid = nfsm_rndup(nd->nd_authlen); 2080 if (uio.uio_resid > (len - 2*NFSX_UNSIGNED)) { 2081 m_freem(mrep); 2082 return (EBADRPC); 2083 } 2084 uio.uio_offset = 0; 2085 uio.uio_iov = &iov; 2086 uio.uio_iovcnt = 1; 2087 uio.uio_segflg = UIO_SYSSPACE; 2088 iov.iov_base = (caddr_t)nd->nd_authstr; 2089 nfsm_mtouio(&uio, uio.uio_resid); 2090 nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED); 2091 nd->nd_flag |= NFSD_NEEDAUTH; 2092 } 2093 2094 /* 2095 * Do we have any use for the verifier. 2096 * According to the "Remote Procedure Call Protocol Spec." it 2097 * should be AUTH_NULL, but some clients make it AUTH_UNIX? 2098 * For now, just skip over it 2099 */ 2100 len = fxdr_unsigned(int, *++tl); 2101 if (len < 0 || len > RPCAUTH_MAXSIZ) { 2102 m_freem(mrep); 2103 return (EBADRPC); 2104 } 2105 if (len > 0) { 2106 nfsm_adv(nfsm_rndup(len)); 2107 } 2108 2109 /* 2110 * For nqnfs, get piggybacked lease request. 2111 */ 2112 if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) { 2113 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2114 nd->nd_nqlflag = fxdr_unsigned(int, *tl); 2115 if (nd->nd_nqlflag) { 2116 nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); 2117 nd->nd_duration = fxdr_unsigned(int, *tl); 2118 } else 2119 nd->nd_duration = NQ_MINLEASE; 2120 } else { 2121 nd->nd_nqlflag = NQL_NOVAL; 2122 nd->nd_duration = NQ_MINLEASE; 2123 } 2124 nd->nd_md = md; 2125 nd->nd_dpos = dpos; 2126 return (0); 2127 nfsmout: 2128 return (error); 2129 } 2130 2131 /* 2132 * Search for a sleeping nfsd and wake it up. 2133 * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the 2134 * running nfsds will go look for the work in the nfssvc_sock list. 2135 */ 2136 void 2137 nfsrv_wakenfsd(slp) 2138 struct nfssvc_sock *slp; 2139 { 2140 register struct nfsd *nd = nfsd_head.nd_next; 2141 2142 if ((slp->ns_flag & SLP_VALID) == 0) 2143 return; 2144 while (nd != (struct nfsd *)&nfsd_head) { 2145 if (nd->nd_flag & NFSD_WAITING) { 2146 nd->nd_flag &= ~NFSD_WAITING; 2147 if (nd->nd_slp) 2148 panic("nfsd wakeup"); 2149 slp->ns_sref++; 2150 nd->nd_slp = slp; 2151 wakeup((caddr_t)nd); 2152 return; 2153 } 2154 nd = nd->nd_next; 2155 } 2156 slp->ns_flag |= SLP_DOREC; 2157 nfsd_head.nd_flag |= NFSD_CHECKSLP; 2158 } 2159 2160 nfs_msg(p, server, msg) 2161 struct proc *p; 2162 char *server, *msg; 2163 { 2164 tpr_t tpr; 2165 2166 if (p) 2167 tpr = tprintf_open(p); 2168 else 2169 tpr = NULL; 2170 tprintf(tpr, "nfs server %s: %s\n", server, msg); 2171 tprintf_close(tpr); 2172 } 2173