xref: /dragonfly/sys/vfs/nfs/nfs_syscalls.c (revision 36a3d1d6)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
37  * $FreeBSD: src/sys/nfs/nfs_syscalls.c,v 1.58.2.1 2000/11/26 02:30:06 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_syscalls.c,v 1.31 2008/01/05 14:02:41 swildner Exp $
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/sysproto.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/vnode.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/proc.h>
52 #include <sys/priv.h>
53 #include <sys/buf.h>
54 #include <sys/mbuf.h>
55 #include <sys/resourcevar.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/domain.h>
59 #include <sys/protosw.h>
60 #include <sys/nlookup.h>
61 #include <sys/mutex.h>
62 #include <vm/vm_zone.h>
63 
64 #include <sys/mutex2.h>
65 #include <sys/mplock2.h>
66 
67 #include <netinet/in.h>
68 #include <netinet/tcp.h>
69 #include "xdr_subs.h"
70 #include "rpcv2.h"
71 #include "nfsproto.h"
72 #include "nfs.h"
73 #include "nfsm_subs.h"
74 #include "nfsrvcache.h"
75 #include "nfsmount.h"
76 #include "nfsnode.h"
77 #include "nfsrtt.h"
78 
79 #include <sys/thread2.h>
80 
81 static MALLOC_DEFINE(M_NFSSVC, "NFS srvsock", "Nfs server structure");
82 
83 static int nuidhash_max = NFS_MAXUIDHASH;
84 
85 #ifndef NFS_NOSERVER
86 static void	nfsrv_zapsock (struct nfssvc_sock *slp);
87 #endif
88 
89 #define	TRUE	1
90 #define	FALSE	0
91 
92 SYSCTL_DECL(_vfs_nfs);
93 
94 #ifndef NFS_NOSERVER
95 int nfsd_waiting = 0;
96 static struct nfsdrt nfsdrt;
97 static int nfs_numnfsd = 0;
98 static void	nfsd_rt (int sotype, struct nfsrv_descript *nd,
99 			     int cacherep);
100 static int	nfssvc_addsock (struct file *, struct sockaddr *,
101 				    struct thread *);
102 static int	nfssvc_nfsd (struct nfsd_srvargs *,caddr_t,struct thread *);
103 
104 static int nfs_privport = 0;
105 SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
106 SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
108 int	nfs_soreserve = NFS_MAXPACKET * NFS_MAXASYNCBIO;
109 SYSCTL_INT(_vfs_nfs, OID_AUTO, soreserve, CTLFLAG_RW, &nfs_soreserve, 0, "");
110 
111 /*
112  * NFS server system calls
113  */
114 
115 #endif /* NFS_NOSERVER */
116 /*
117  * nfssvc_args(int flag, caddr_t argp)
118  *
119  * Nfs server psuedo system call for the nfsd's
120  * Based on the flag value it either:
121  * - adds a socket to the selection list
122  * - remains in the kernel as an nfsd
123  * - remains in the kernel as an nfsiod
124  *
125  * MPALMOSTSAFE
126  */
127 int
128 sys_nfssvc(struct nfssvc_args *uap)
129 {
130 #ifndef NFS_NOSERVER
131 	struct nlookupdata nd;
132 	struct file *fp;
133 	struct sockaddr *nam;
134 	struct nfsd_args nfsdarg;
135 	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
136 	struct nfsd_cargs ncd;
137 	struct nfsd *nfsd;
138 	struct nfssvc_sock *slp;
139 	struct nfsuid *nuidp;
140 	struct nfsmount *nmp;
141 	struct vnode *vp;
142 #endif /* NFS_NOSERVER */
143 	int error;
144 	struct thread *td = curthread;
145 
146 	/*
147 	 * Must be super user
148 	 */
149 	error = priv_check(td, PRIV_ROOT);
150 	if (error)
151 		return (error);
152 
153 	get_mplock();
154 	while (nfssvc_sockhead_flag & SLP_INIT) {
155 		nfssvc_sockhead_flag |= SLP_WANTINIT;
156 		tsleep((caddr_t)&nfssvc_sockhead, 0, "nfsd init", 0);
157 	}
158 	if (uap->flag & NFSSVC_BIOD)
159 		error = ENXIO;		/* no longer need nfsiod's */
160 #ifdef NFS_NOSERVER
161 	else
162 		error = ENXIO;
163 #else /* !NFS_NOSERVER */
164 	else if (uap->flag & NFSSVC_MNTD) {
165 		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
166 		if (error)
167 			goto done;
168 		vp = NULL;
169 		error = nlookup_init(&nd, ncd.ncd_dirp, UIO_USERSPACE,
170 					NLC_FOLLOW);
171 		if (error == 0)
172 			error = nlookup(&nd);
173 		if (error == 0)
174 			error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
175 		nlookup_done(&nd);
176 		if (error)
177 			goto done;
178 
179 		if ((vp->v_flag & VROOT) == 0)
180 			error = EINVAL;
181 		nmp = VFSTONFS(vp->v_mount);
182 		vput(vp);
183 		if (error)
184 			goto done;
185 		if ((nmp->nm_state & NFSSTA_MNTD) &&
186 			(uap->flag & NFSSVC_GOTAUTH) == 0) {
187 			error = 0;
188 			goto done;
189 		}
190 		nmp->nm_state |= NFSSTA_MNTD;
191 		error = nfs_clientd(nmp, td->td_ucred, &ncd, uap->flag,
192 				    uap->argp, td);
193 	} else if (uap->flag & NFSSVC_ADDSOCK) {
194 		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
195 		if (error)
196 			goto done;
197 		error = holdsock(td->td_proc->p_fd, nfsdarg.sock, &fp);
198 		if (error)
199 			goto done;
200 		/*
201 		 * Get the client address for connected sockets.
202 		 */
203 		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
204 			nam = NULL;
205 		else {
206 			error = getsockaddr(&nam, nfsdarg.name,
207 					    nfsdarg.namelen);
208 			if (error) {
209 				fdrop(fp);
210 				goto done;
211 			}
212 		}
213 		error = nfssvc_addsock(fp, nam, td);
214 		fdrop(fp);
215 	} else {
216 		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
217 		if (error)
218 			goto done;
219 		if ((uap->flag & NFSSVC_AUTHIN) &&
220 		    ((nfsd = nsd->nsd_nfsd)) != NULL &&
221 		    (nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
222 			slp = nfsd->nfsd_slp;
223 
224 			/*
225 			 * First check to see if another nfsd has already
226 			 * added this credential.
227 			 */
228 			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
229 			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
230 				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
231 				    (!nfsd->nfsd_nd->nd_nam2 ||
232 				     netaddr_match(NU_NETFAM(nuidp),
233 				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
234 					break;
235 			}
236 			if (nuidp) {
237 			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
238 			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
239 			} else {
240 			    /*
241 			     * Nope, so we will.
242 			     */
243 			    if (slp->ns_numuids < nuidhash_max) {
244 				slp->ns_numuids++;
245 				nuidp = (struct nfsuid *)
246 				   kmalloc(sizeof (struct nfsuid), M_NFSUID,
247 					M_WAITOK);
248 			    } else
249 				nuidp = NULL;
250 			    if ((slp->ns_flag & SLP_VALID) == 0) {
251 				if (nuidp)
252 				    kfree((caddr_t)nuidp, M_NFSUID);
253 			    } else {
254 				if (nuidp == NULL) {
255 				    nuidp = TAILQ_FIRST(&slp->ns_uidlruhead);
256 				    LIST_REMOVE(nuidp, nu_hash);
257 				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
258 					nu_lru);
259 				    if (nuidp->nu_flag & NU_NAM)
260 					FREE(nuidp->nu_nam, M_SONAME);
261 			        }
262 				nuidp->nu_flag = 0;
263 				nuidp->nu_cr = nsd->nsd_cr;
264 				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
265 				    nuidp->nu_cr.cr_ngroups = NGROUPS;
266 				nuidp->nu_cr.cr_ref = 1;
267 				nuidp->nu_timestamp = nsd->nsd_timestamp;
268 				nuidp->nu_expire = time_second + nsd->nsd_ttl;
269 				/*
270 				 * and save the session key in nu_key.
271 				 */
272 				bcopy(nsd->nsd_key, nuidp->nu_key,
273 				    sizeof (nsd->nsd_key));
274 				if (nfsd->nfsd_nd->nd_nam2) {
275 				    struct sockaddr_in *saddr;
276 
277 				    saddr = (struct sockaddr_in *)
278 					    nfsd->nfsd_nd->nd_nam2;
279 				    switch (saddr->sin_family) {
280 				    case AF_INET:
281 					nuidp->nu_flag |= NU_INETADDR;
282 					nuidp->nu_inetaddr =
283 					     saddr->sin_addr.s_addr;
284 					break;
285 				    case AF_ISO:
286 				    default:
287 					nuidp->nu_flag |= NU_NAM;
288 					nuidp->nu_nam =
289 					  dup_sockaddr(nfsd->nfsd_nd->nd_nam2);
290 					break;
291 				    };
292 				}
293 				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
294 					nu_lru);
295 				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
296 					nuidp, nu_hash);
297 				nfsrv_setcred(&nuidp->nu_cr,
298 				    &nfsd->nfsd_nd->nd_cr);
299 				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
300 			    }
301 			}
302 		}
303 		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
304 			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
305 		error = nfssvc_nfsd(nsd, uap->argp, td);
306 	}
307 #endif /* NFS_NOSERVER */
308 	if (error == EINTR || error == ERESTART)
309 		error = 0;
310 done:
311 	rel_mplock();
312 	return (error);
313 }
314 
315 #ifndef NFS_NOSERVER
316 /*
317  * Adds a socket to the list for servicing by nfsds.
318  */
319 static int
320 nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td)
321 {
322 	int siz;
323 	struct nfssvc_sock *slp;
324 	struct socket *so;
325 	int error;
326 
327 	so = (struct socket *)fp->f_data;
328 #if 0
329 	tslp = NULL;
330 	/*
331 	 * Add it to the list, as required.
332 	 */
333 	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
334 		tslp = nfs_udpsock;
335 		if (tslp->ns_flag & SLP_VALID) {
336 			if (mynam != NULL)
337 				FREE(mynam, M_SONAME);
338 			return (EPERM);
339 		}
340 	}
341 #endif
342 	/*
343 	 * Reserve buffer space in the socket.  Note that due to bugs in
344 	 * Linux's delayed-ack code, serious performance degredation may
345 	 * occur with linux hosts if the minimum is used.
346 	 *
347 	 * NFS sockets are not limited to the standard sb_max or by
348 	 * resource limits.
349 	 */
350 	if (so->so_type == SOCK_STREAM)
351 		siz = NFS_MAXPACKET + sizeof (u_long);
352 	else
353 		siz = NFS_MAXPACKET;
354 	if (siz < nfs_soreserve)
355 	    siz = nfs_soreserve;
356 
357 	error = soreserve(so, siz, siz, NULL);
358 	if (error) {
359 		if (mynam != NULL)
360 			FREE(mynam, M_SONAME);
361 		return (error);
362 	}
363 
364 	/*
365 	 * Set protocol specific options { for now TCP only } and
366 	 * reserve some space. For datagram sockets, this can get called
367 	 * repeatedly for the same socket, but that isn't harmful.
368 	 */
369 	if (so->so_type == SOCK_STREAM) {
370 		struct sockopt sopt;
371 		int val;
372 
373 		bzero(&sopt, sizeof sopt);
374 		sopt.sopt_level = SOL_SOCKET;
375 		sopt.sopt_name = SO_KEEPALIVE;
376 		sopt.sopt_val = &val;
377 		sopt.sopt_valsize = sizeof val;
378 		val = 1;
379 		sosetopt(so, &sopt);
380 	}
381 	if (so->so_proto->pr_domain->dom_family == AF_INET &&
382 	    so->so_proto->pr_protocol == IPPROTO_TCP) {
383 		struct sockopt sopt;
384 		int val;
385 
386 		bzero(&sopt, sizeof sopt);
387 		sopt.sopt_level = IPPROTO_TCP;
388 		sopt.sopt_name = TCP_NODELAY;
389 		sopt.sopt_val = &val;
390 		sopt.sopt_valsize = sizeof val;
391 		val = 1;
392 		sosetopt(so, &sopt);
393 	}
394 	atomic_clear_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
395 	so->so_rcv.ssb_timeo = 0;
396 	atomic_clear_int(&so->so_snd.ssb_flags, SSB_NOINTR);
397 	so->so_snd.ssb_timeo = 0;
398 
399 	slp = (struct nfssvc_sock *)kmalloc(sizeof (struct nfssvc_sock),
400 	    M_NFSSVC, M_WAITOK | M_ZERO);
401 	mtx_init(&slp->ns_solock);
402 	STAILQ_INIT(&slp->ns_rec);
403 	TAILQ_INIT(&slp->ns_uidlruhead);
404 	TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
405 
406 	slp->ns_so = so;
407 	slp->ns_nam = mynam;
408 	fp->f_count++;
409 	slp->ns_fp = fp;
410 	crit_enter();
411 	so->so_upcallarg = (caddr_t)slp;
412 	so->so_upcall = nfsrv_rcv;
413 	atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
414 	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
415 	nfsrv_wakenfsd(slp, 1);
416 	crit_exit();
417 	return (0);
418 }
419 
420 /*
421  * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
422  * until it is killed by a signal.
423  */
424 static int
425 nfssvc_nfsd(struct nfsd_srvargs *nsd, caddr_t argp, struct thread *td)
426 {
427 	int siz;
428 	struct nfssvc_sock *slp;
429 	struct nfsd *nfsd = nsd->nsd_nfsd;
430 	struct nfsrv_descript *nd = NULL;
431 	struct mbuf *m, *mreq;
432 	int error = 0, cacherep, sotype, writes_todo;
433 	int procrastinate;
434 	u_quad_t cur_usec;
435 
436 #ifndef nolint
437 	cacherep = RC_DOIT;
438 	writes_todo = 0;
439 #endif
440 	if (nfsd == NULL) {
441 		nsd->nsd_nfsd = nfsd = (struct nfsd *)
442 			kmalloc(sizeof (struct nfsd), M_NFSD, M_WAITOK|M_ZERO);
443 		crit_enter();
444 		nfsd->nfsd_td = td;
445 		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
446 		nfs_numnfsd++;
447 	} else
448 		crit_enter();
449 
450 	/*
451 	 * Loop getting rpc requests until SIGKILL.
452 	 */
453 	for (;;) {
454 		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
455 			while (nfsd->nfsd_slp == NULL &&
456 			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
457 				nfsd->nfsd_flag |= NFSD_WAITING;
458 				nfsd_waiting++;
459 				error = tsleep((caddr_t)nfsd, PCATCH, "nfsd", 0);
460 				nfsd_waiting--;
461 				if (error)
462 					goto done;
463 			}
464 			if (nfsd->nfsd_slp == NULL &&
465 			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
466 				TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
467 				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
468 					== (SLP_VALID | SLP_DOREC)) {
469 					    slp->ns_flag &= ~SLP_DOREC;
470 					    slp->ns_sref++;
471 					    nfsd->nfsd_slp = slp;
472 					    break;
473 				    }
474 				}
475 				if (slp == 0)
476 					nfsd_head_flag &= ~NFSD_CHECKSLP;
477 			}
478 			if ((slp = nfsd->nfsd_slp) == NULL)
479 				continue;
480 			if (slp->ns_flag & SLP_VALID) {
481 				if (slp->ns_flag & SLP_DISCONN)
482 					nfsrv_zapsock(slp);
483 				else if (slp->ns_flag & SLP_NEEDQ) {
484 					slp->ns_flag &= ~SLP_NEEDQ;
485 					(void) nfs_slplock(slp, 1);
486 					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
487 						MB_WAIT);
488 					nfs_slpunlock(slp);
489 				}
490 				error = nfsrv_dorec(slp, nfsd, &nd);
491 				cur_usec = nfs_curusec();
492 				if (error && slp->ns_tq.lh_first &&
493 				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
494 					error = 0;
495 					cacherep = RC_DOIT;
496 					writes_todo = 1;
497 				} else
498 					writes_todo = 0;
499 				nfsd->nfsd_flag |= NFSD_REQINPROG;
500 			}
501 		} else {
502 			error = 0;
503 			slp = nfsd->nfsd_slp;
504 		}
505 		if (error || (slp->ns_flag & SLP_VALID) == 0) {
506 			if (nd) {
507 				kfree((caddr_t)nd, M_NFSRVDESC);
508 				nd = NULL;
509 			}
510 			nfsd->nfsd_slp = NULL;
511 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
512 			nfsrv_slpderef(slp);
513 			continue;
514 		}
515 		crit_exit();
516 		sotype = slp->ns_so->so_type;
517 		if (nd) {
518 		    getmicrotime(&nd->nd_starttime);
519 		    if (nd->nd_nam2)
520 			nd->nd_nam = nd->nd_nam2;
521 		    else
522 			nd->nd_nam = slp->ns_nam;
523 
524 		    /*
525 		     * Check to see if authorization is needed.
526 		     */
527 		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
528 			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
529 			nsd->nsd_haddr =
530 				((struct sockaddr_in *)
531 				 nd->nd_nam)->sin_addr.s_addr;
532 			nsd->nsd_authlen = nfsd->nfsd_authlen;
533 			nsd->nsd_verflen = nfsd->nfsd_verflen;
534 			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
535 				nfsd->nfsd_authlen) &&
536 			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
537 				nfsd->nfsd_verflen) &&
538 			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
539 			    return (ENEEDAUTH);
540 			cacherep = RC_DROPIT;
541 		    } else {
542 			cacherep = nfsrv_getcache(nd, slp, &mreq);
543 		    }
544 
545 		    if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
546 			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
547 			nd->nd_procnum = NFSPROC_NOOP;
548 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
549 			cacherep = RC_DOIT;
550 		    } else if (nfs_privport) {
551 			/* Check if source port is privileged */
552 			u_short port;
553 			struct sockaddr *nam = nd->nd_nam;
554 			struct sockaddr_in *sin;
555 
556 			sin = (struct sockaddr_in *)nam;
557 			port = ntohs(sin->sin_port);
558 			if (port >= IPPORT_RESERVED &&
559 			    nd->nd_procnum != NFSPROC_NULL) {
560 			    nd->nd_procnum = NFSPROC_NOOP;
561 			    nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
562 			    cacherep = RC_DOIT;
563 			    kprintf("NFS request from unprivileged port (%s:%d)\n",
564 				   inet_ntoa(sin->sin_addr), port);
565 			}
566 		    }
567 
568 		}
569 
570 		/*
571 		 * Loop to get all the write rpc relies that have been
572 		 * gathered together.
573 		 */
574 		do {
575 		    switch (cacherep) {
576 		    case RC_DOIT:
577 			if (nd && (nd->nd_flag & ND_NFSV3))
578 			    procrastinate = nfsrvw_procrastinate_v3;
579 			else
580 			    procrastinate = nfsrvw_procrastinate;
581 			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
582 			    procrastinate > 0)
583 			) {
584 			    error = nfsrv_writegather(&nd, slp,
585 				nfsd->nfsd_td, &mreq);
586 			} else {
587 			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
588 				slp, nfsd->nfsd_td, &mreq);
589 			}
590 			if (mreq == NULL)
591 				break;
592 			if (error != 0 && error != NFSERR_RETVOID) {
593 				if (nd->nd_procnum != NQNFSPROC_VACATED)
594 					nfsstats.srv_errs++;
595 				nfsrv_updatecache(nd, FALSE, mreq);
596 				if (nd->nd_nam2)
597 					FREE(nd->nd_nam2, M_SONAME);
598 				break;
599 			}
600 			nfsstats.srvrpccnt[nd->nd_procnum]++;
601 			nfsrv_updatecache(nd, TRUE, mreq);
602 			nd->nd_mrep = NULL;
603 		    case RC_REPLY:
604 			m = mreq;
605 			siz = 0;
606 			while (m) {
607 				siz += m->m_len;
608 				m = m->m_next;
609 			}
610 			if (siz <= 0 || siz > NFS_MAXPACKET) {
611 				kprintf("mbuf siz=%d\n",siz);
612 				panic("Bad nfs svc reply");
613 			}
614 			m = mreq;
615 			m->m_pkthdr.len = siz;
616 			m->m_pkthdr.rcvif = NULL;
617 			/*
618 			 * For stream protocols, prepend a Sun RPC
619 			 * Record Mark.
620 			 */
621 			if (sotype == SOCK_STREAM) {
622 				M_PREPEND(m, NFSX_UNSIGNED, MB_WAIT);
623 				if (m == NULL)
624 					return (ENOBUFS);
625 				*mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
626 			}
627 			if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
628 				(void) nfs_slplock(slp, 1);
629 			if (slp->ns_flag & SLP_VALID)
630 			    error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL);
631 			else {
632 			    error = EPIPE;
633 			    m_freem(m);
634 			}
635 			if (nfsrtton)
636 				nfsd_rt(sotype, nd, cacherep);
637 			if (nd->nd_nam2)
638 				FREE(nd->nd_nam2, M_SONAME);
639 			if (nd->nd_mrep)
640 				m_freem(nd->nd_mrep);
641 			if (error == EPIPE)
642 				nfsrv_zapsock(slp);
643 			if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
644 				nfs_slpunlock(slp);
645 			if (error == EINTR || error == ERESTART) {
646 				kfree((caddr_t)nd, M_NFSRVDESC);
647 				nfsrv_slpderef(slp);
648 				crit_enter();
649 				goto done;
650 			}
651 			break;
652 		    case RC_DROPIT:
653 			if (nfsrtton)
654 				nfsd_rt(sotype, nd, cacherep);
655 			m_freem(nd->nd_mrep);
656 			if (nd->nd_nam2)
657 				FREE(nd->nd_nam2, M_SONAME);
658 			break;
659 		    };
660 		    if (nd) {
661 			FREE((caddr_t)nd, M_NFSRVDESC);
662 			nd = NULL;
663 		    }
664 
665 		    /*
666 		     * Check to see if there are outstanding writes that
667 		     * need to be serviced.
668 		     */
669 		    cur_usec = nfs_curusec();
670 		    crit_enter();
671 		    if (slp->ns_tq.lh_first &&
672 			slp->ns_tq.lh_first->nd_time <= cur_usec) {
673 			cacherep = RC_DOIT;
674 			writes_todo = 1;
675 		    } else
676 			writes_todo = 0;
677 		    crit_exit();
678 		} while (writes_todo);
679 		crit_enter();
680 		if (nfsrv_dorec(slp, nfsd, &nd)) {
681 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
682 			nfsd->nfsd_slp = NULL;
683 			nfsrv_slpderef(slp);
684 		}
685 	}
686 done:
687 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
688 	crit_exit();
689 	kfree((caddr_t)nfsd, M_NFSD);
690 	nsd->nsd_nfsd = NULL;
691 	if (--nfs_numnfsd == 0)
692 		nfsrv_init(TRUE);	/* Reinitialize everything */
693 	return (error);
694 }
695 
696 /*
697  * Shut down a socket associated with an nfssvc_sock structure.
698  * Should be called with the send lock set, if required.
699  * The trick here is to increment the sref at the start, so that the nfsds
700  * will stop using it and clear ns_flag at the end so that it will not be
701  * reassigned during cleanup.
702  */
703 static void
704 nfsrv_zapsock(struct nfssvc_sock *slp)
705 {
706 	struct nfsuid *nuidp, *nnuidp;
707 	struct nfsrv_descript *nwp, *nnwp;
708 	struct socket *so;
709 	struct file *fp;
710 	struct nfsrv_rec *rec;
711 
712 	slp->ns_flag &= ~SLP_ALLFLAGS;
713 	fp = slp->ns_fp;
714 	if (fp) {
715 		slp->ns_fp = NULL;
716 		so = slp->ns_so;
717 		atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
718 		so->so_upcall = NULL;
719 		so->so_upcallarg = NULL;
720 		soshutdown(so, SHUT_RDWR);
721 		closef(fp, NULL);
722 		if (slp->ns_nam)
723 			FREE(slp->ns_nam, M_SONAME);
724 		m_freem(slp->ns_raw);
725 		while ((rec = STAILQ_FIRST(&slp->ns_rec)) != NULL) {
726 			--slp->ns_numrec;
727 			STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
728 			if (rec->nr_address)
729 				FREE(rec->nr_address, M_SONAME);
730 			m_freem(rec->nr_packet);
731 			kfree(rec, M_NFSRVDESC);
732 		}
733 		TAILQ_FOREACH_MUTABLE(nuidp, &slp->ns_uidlruhead, nu_lru,
734 				      nnuidp) {
735 			LIST_REMOVE(nuidp, nu_hash);
736 			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
737 			if (nuidp->nu_flag & NU_NAM)
738 				FREE(nuidp->nu_nam, M_SONAME);
739 			kfree((caddr_t)nuidp, M_NFSUID);
740 		}
741 		crit_enter();
742 		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
743 			nnwp = nwp->nd_tq.le_next;
744 			LIST_REMOVE(nwp, nd_tq);
745 			kfree((caddr_t)nwp, M_NFSRVDESC);
746 		}
747 		LIST_INIT(&slp->ns_tq);
748 		crit_exit();
749 	}
750 }
751 
752 /*
753  * Derefence a server socket structure. If it has no more references and
754  * is no longer valid, you can throw it away.
755  */
756 void
757 nfsrv_slpderef(struct nfssvc_sock *slp)
758 {
759 	if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
760 		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
761 		kfree((caddr_t)slp, M_NFSSVC);
762 	}
763 }
764 
765 /*
766  * Lock a socket against others.
767  *
768  * Returns 0 on failure, 1 on success.
769  */
770 int
771 nfs_slplock(struct nfssvc_sock *slp, int wait)
772 {
773 	mtx_t mtx = &slp->ns_solock;
774 
775 	if (wait) {
776 		mtx_lock_ex(mtx, "nfsslplck", 0, 0);
777 		return(1);
778 	} else if (mtx_lock_ex_try(mtx) == 0) {
779 		return(1);
780 	} else {
781 		return(0);
782 	}
783 }
784 
785 /*
786  * Unlock the stream socket for others.
787  */
788 void
789 nfs_slpunlock(struct nfssvc_sock *slp)
790 {
791 	mtx_t mtx = &slp->ns_solock;
792 
793 	mtx_unlock(mtx);
794 }
795 
796 /*
797  * Initialize the data structures for the server.
798  * Handshake with any new nfsds starting up to avoid any chance of
799  * corruption.
800  */
801 void
802 nfsrv_init(int terminating)
803 {
804 	struct nfssvc_sock *slp, *nslp;
805 
806 	if (nfssvc_sockhead_flag & SLP_INIT)
807 		panic("nfsd init");
808 	nfssvc_sockhead_flag |= SLP_INIT;
809 	if (terminating) {
810 		TAILQ_FOREACH_MUTABLE(slp, &nfssvc_sockhead, ns_chain, nslp) {
811 			if (slp->ns_flag & SLP_VALID)
812 				nfsrv_zapsock(slp);
813 			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
814 			kfree((caddr_t)slp, M_NFSSVC);
815 		}
816 		nfsrv_cleancache();	/* And clear out server cache */
817 	} else
818 		nfs_pub.np_valid = 0;
819 
820 	TAILQ_INIT(&nfssvc_sockhead);
821 	nfssvc_sockhead_flag &= ~SLP_INIT;
822 	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
823 		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
824 		wakeup((caddr_t)&nfssvc_sockhead);
825 	}
826 
827 	TAILQ_INIT(&nfsd_head);
828 	nfsd_head_flag &= ~NFSD_CHECKSLP;
829 
830 #if 0
831 	nfs_udpsock = (struct nfssvc_sock *)
832 	    kmalloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
833 	mtx_init(&nfs_udpsock->ns_solock);
834 	STAILQ_INIT(&nfs_udpsock->ns_rec);
835 	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
836 	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
837 
838 	nfs_cltpsock = (struct nfssvc_sock *)
839 	    kmalloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
840 	mtx_init(&nfs_cltpsock->ns_solock);
841 	STAILQ_INIT(&nfs_cltpsock->ns_rec);
842 	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
843 	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
844 #endif
845 }
846 
847 /*
848  * Add entries to the server monitor log.
849  */
850 static void
851 nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep)
852 {
853 	struct drt *rt;
854 
855 	rt = &nfsdrt.drt[nfsdrt.pos];
856 	if (cacherep == RC_DOIT)
857 		rt->flag = 0;
858 	else if (cacherep == RC_REPLY)
859 		rt->flag = DRT_CACHEREPLY;
860 	else
861 		rt->flag = DRT_CACHEDROP;
862 	if (sotype == SOCK_STREAM)
863 		rt->flag |= DRT_TCP;
864 	if (nd->nd_flag & ND_NFSV3)
865 		rt->flag |= DRT_NFSV3;
866 	rt->proc = nd->nd_procnum;
867 	if (nd->nd_nam->sa_family == AF_INET)
868 	    rt->ipadr = ((struct sockaddr_in *)nd->nd_nam)->sin_addr.s_addr;
869 	else
870 	    rt->ipadr = INADDR_ANY;
871 	rt->resptime = nfs_curusec() - (nd->nd_starttime.tv_sec * 1000000 + nd->nd_starttime.tv_usec);
872 	getmicrotime(&rt->tstamp);
873 	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
874 }
875 #endif /* NFS_NOSERVER */
876 
877 static int nfs_defect = 0;
878 SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
879 
880 /*
881  * Get an authorization string for the uid by having the mount_nfs sitting
882  * on this mount point porpous out of the kernel and do it.
883  */
884 int
885 nfs_getauth(struct nfsmount *nmp, struct nfsreq *rep,
886 	    struct ucred *cred, char **auth_str, int *auth_len, char *verf_str,
887 	    int *verf_len, NFSKERBKEY_T key /* return session key */)
888 {
889 	int error = 0;
890 
891 	while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) {
892 		nmp->nm_state |= NFSSTA_WANTAUTH;
893 		(void) tsleep((caddr_t)&nmp->nm_authtype, 0,
894 			"nfsauth1", 2 * hz);
895 		error = nfs_sigintr(nmp, rep, rep->r_td);
896 		if (error) {
897 			nmp->nm_state &= ~NFSSTA_WANTAUTH;
898 			return (error);
899 		}
900 	}
901 	nmp->nm_state &= ~(NFSSTA_WAITAUTH | NFSSTA_WANTAUTH);
902 	nmp->nm_authstr = *auth_str = (char *)kmalloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
903 	nmp->nm_authlen = RPCAUTH_MAXSIZ;
904 	nmp->nm_verfstr = verf_str;
905 	nmp->nm_verflen = *verf_len;
906 	nmp->nm_authuid = cred->cr_uid;
907 	wakeup((caddr_t)&nmp->nm_authstr);
908 
909 	/*
910 	 * And wait for mount_nfs to do its stuff.
911 	 */
912 	while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) {
913 		(void) tsleep((caddr_t)&nmp->nm_authlen, 0,
914 			"nfsauth2", 2 * hz);
915 		error = nfs_sigintr(nmp, rep, rep->r_td);
916 	}
917 	if (nmp->nm_state & NFSSTA_AUTHERR) {
918 		nmp->nm_state &= ~NFSSTA_AUTHERR;
919 		error = EAUTH;
920 	}
921 	if (error)
922 		kfree((caddr_t)*auth_str, M_TEMP);
923 	else {
924 		*auth_len = nmp->nm_authlen;
925 		*verf_len = nmp->nm_verflen;
926 		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
927 	}
928 	nmp->nm_state &= ~NFSSTA_HASAUTH;
929 	nmp->nm_state |= NFSSTA_WAITAUTH;
930 	if (nmp->nm_state & NFSSTA_WANTAUTH) {
931 		nmp->nm_state &= ~NFSSTA_WANTAUTH;
932 		wakeup((caddr_t)&nmp->nm_authtype);
933 	}
934 	return (error);
935 }
936 
937 /*
938  * Get a nickname authenticator and verifier.
939  */
940 int
941 nfs_getnickauth(struct nfsmount *nmp, struct ucred *cred, char **auth_str,
942 		int *auth_len, char *verf_str, int verf_len)
943 {
944 	struct nfsuid *nuidp;
945 	u_int32_t *nickp, *verfp;
946 	struct timeval ktvin, ktvout;
947 
948 #ifdef DIAGNOSTIC
949 	if (verf_len < (4 * NFSX_UNSIGNED))
950 		panic("nfs_getnickauth verf too small");
951 #endif
952 	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
953 	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
954 		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
955 			break;
956 	}
957 	if (!nuidp || nuidp->nu_expire < time_second)
958 		return (EACCES);
959 
960 	/*
961 	 * Move to the end of the lru list (end of lru == most recently used).
962 	 */
963 	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
964 	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
965 
966 	nickp = (u_int32_t *)kmalloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
967 	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
968 	*nickp = txdr_unsigned(nuidp->nu_nickname);
969 	*auth_str = (char *)nickp;
970 	*auth_len = 2 * NFSX_UNSIGNED;
971 
972 	/*
973 	 * Now we must encrypt the verifier and package it up.
974 	 */
975 	verfp = (u_int32_t *)verf_str;
976 	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
977 	if (time_second > nuidp->nu_timestamp.tv_sec ||
978 	    (time_second == nuidp->nu_timestamp.tv_sec &&
979 	     time_second > nuidp->nu_timestamp.tv_usec))
980 		getmicrotime(&nuidp->nu_timestamp);
981 	else
982 		nuidp->nu_timestamp.tv_usec++;
983 	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
984 	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
985 
986 	/*
987 	 * Now encrypt the timestamp verifier in ecb mode using the session
988 	 * key.
989 	 */
990 #ifdef NFSKERB
991 	XXX
992 #else
993 	ktvout.tv_sec = 0;
994 	ktvout.tv_usec = 0;
995 #endif
996 
997 	*verfp++ = ktvout.tv_sec;
998 	*verfp++ = ktvout.tv_usec;
999 	*verfp = 0;
1000 	return (0);
1001 }
1002 
1003 /*
1004  * Save the current nickname in a hash list entry on the mount point.
1005  */
1006 int
1007 nfs_savenickauth(struct nfsmount *nmp, struct ucred *cred, int len,
1008 		 NFSKERBKEY_T key, struct mbuf **mdp, char **dposp,
1009 		 struct mbuf *mrep)
1010 {
1011 	struct nfsuid *nuidp;
1012 	u_int32_t *tl;
1013 	struct timeval ktvin, ktvout;
1014 	u_int32_t nick;
1015 	int deltasec, error = 0;
1016 	struct nfsm_info info;
1017 
1018 	info.md = *mdp;
1019 	info.dpos = *dposp;
1020 	info.mrep = mrep;
1021 
1022 	if (len == (3 * NFSX_UNSIGNED)) {
1023 		NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
1024 		ktvin.tv_sec = *tl++;
1025 		ktvin.tv_usec = *tl++;
1026 		nick = fxdr_unsigned(u_int32_t, *tl);
1027 
1028 		/*
1029 		 * Decrypt the timestamp in ecb mode.
1030 		 */
1031 #ifdef NFSKERB
1032 		XXX
1033 #else
1034 		ktvout.tv_sec = 0;
1035 		ktvout.tv_usec = 0;
1036 #endif
1037 		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
1038 		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
1039 		deltasec = time_second - ktvout.tv_sec;
1040 		if (deltasec < 0)
1041 			deltasec = -deltasec;
1042 		/*
1043 		 * If ok, add it to the hash list for the mount point.
1044 		 */
1045 		if (deltasec <= NFS_KERBCLOCKSKEW) {
1046 			if (nmp->nm_numuids < nuidhash_max) {
1047 				nmp->nm_numuids++;
1048 				nuidp = (struct nfsuid *)
1049 				   kmalloc(sizeof (struct nfsuid), M_NFSUID,
1050 					M_WAITOK);
1051 			} else {
1052 				nuidp = TAILQ_FIRST(&nmp->nm_uidlruhead);
1053 				LIST_REMOVE(nuidp, nu_hash);
1054 				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1055 					nu_lru);
1056 			}
1057 			nuidp->nu_flag = 0;
1058 			nuidp->nu_cr.cr_uid = cred->cr_uid;
1059 			nuidp->nu_expire = time_second + NFS_KERBTTL;
1060 			nuidp->nu_timestamp = ktvout;
1061 			nuidp->nu_nickname = nick;
1062 			bcopy(key, nuidp->nu_key, sizeof (key));
1063 			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1064 				nu_lru);
1065 			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1066 				nuidp, nu_hash);
1067 		}
1068 	} else {
1069 		ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
1070 	}
1071 nfsmout:
1072 	*mdp = info.md;
1073 	*dposp = info.dpos;
1074 	return (error);
1075 }
1076