xref: /dragonfly/sys/vfs/nfs/nfs_syscalls.c (revision 1f8a7fec)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)nfs_syscalls.c	8.5 (Berkeley) 3/30/95
37  * $FreeBSD: src/sys/nfs/nfs_syscalls.c,v 1.58.2.1 2000/11/26 02:30:06 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_syscalls.c,v 1.31 2008/01/05 14:02:41 swildner Exp $
39  */
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/sysproto.h>
44 #include <sys/kernel.h>
45 #include <sys/sysctl.h>
46 #include <sys/file.h>
47 #include <sys/filedesc.h>
48 #include <sys/vnode.h>
49 #include <sys/malloc.h>
50 #include <sys/mount.h>
51 #include <sys/proc.h>
52 #include <sys/priv.h>
53 #include <sys/buf.h>
54 #include <sys/mbuf.h>
55 #include <sys/resourcevar.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/domain.h>
59 #include <sys/protosw.h>
60 #include <sys/nlookup.h>
61 #include <vm/vm_zone.h>
62 
63 #include <sys/mutex2.h>
64 #include <sys/thread2.h>
65 
66 #include <netinet/in.h>
67 #include <netinet/tcp.h>
68 #include "xdr_subs.h"
69 #include "rpcv2.h"
70 #include "nfsproto.h"
71 #include "nfs.h"
72 #include "nfsm_subs.h"
73 #include "nfsrvcache.h"
74 #include "nfsmount.h"
75 #include "nfsnode.h"
76 #include "nfsrtt.h"
77 
78 #include <sys/thread2.h>
79 
80 static MALLOC_DEFINE(M_NFSSVC, "NFS srvsock", "Nfs server structure");
81 
82 static int nuidhash_max = NFS_MAXUIDHASH;
83 
84 #ifndef NFS_NOSERVER
85 static void	nfsrv_zapsock (struct nfssvc_sock *slp);
86 #endif
87 
88 #define	TRUE	1
89 #define	FALSE	0
90 
91 SYSCTL_DECL(_vfs_nfs);
92 
93 #ifndef NFS_NOSERVER
94 int nfsd_waiting = 0;
95 static struct nfsdrt nfsdrt;
96 static int nfs_numnfsd = 0;
97 static void	nfsd_rt (int sotype, struct nfsrv_descript *nd,
98 			     int cacherep);
99 static int	nfssvc_addsock (struct file *, struct sockaddr *,
100 				    struct thread *);
101 static int	nfssvc_nfsd (struct nfsd_srvargs *,caddr_t,struct thread *);
102 
103 static int nfs_privport = 0;
104 SYSCTL_INT(_vfs_nfs, NFS_NFSPRIVPORT, nfs_privport, CTLFLAG_RW, &nfs_privport, 0, "");
105 SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay, CTLFLAG_RW, &nfsrvw_procrastinate, 0, "");
106 SYSCTL_INT(_vfs_nfs, OID_AUTO, gatherdelay_v3, CTLFLAG_RW, &nfsrvw_procrastinate_v3, 0, "");
107 int	nfs_soreserve = NFS_MAXPACKET * NFS_MAXASYNCBIO;
108 SYSCTL_INT(_vfs_nfs, OID_AUTO, soreserve, CTLFLAG_RW, &nfs_soreserve, 0, "");
109 
110 /*
111  * NFS server system calls
112  */
113 
114 #endif /* NFS_NOSERVER */
115 /*
116  * nfssvc_args(int flag, caddr_t argp)
117  *
118  * Nfs server psuedo system call for the nfsd's
119  * Based on the flag value it either:
120  * - adds a socket to the selection list
121  * - remains in the kernel as an nfsd
122  * - remains in the kernel as an nfsiod
123  *
124  * MPALMOSTSAFE
125  */
126 int
127 sys_nfssvc(struct nfssvc_args *uap)
128 {
129 #ifndef NFS_NOSERVER
130 	struct nlookupdata nd;
131 	struct file *fp;
132 	struct sockaddr *nam;
133 	struct nfsd_args nfsdarg;
134 	struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
135 	struct nfsd_cargs ncd;
136 	struct nfsd *nfsd;
137 	struct nfssvc_sock *slp;
138 	struct nfsuid *nuidp;
139 	struct nfsmount *nmp;
140 	struct vnode *vp;
141 #endif /* NFS_NOSERVER */
142 	int error;
143 	struct thread *td = curthread;
144 
145 	/*
146 	 * Must be super user
147 	 */
148 	error = priv_check(td, PRIV_ROOT);
149 	if (error)
150 		return (error);
151 
152 	lwkt_gettoken(&nfs_token);
153 
154 	while (nfssvc_sockhead_flag & SLP_INIT) {
155 		nfssvc_sockhead_flag |= SLP_WANTINIT;
156 		tsleep((caddr_t)&nfssvc_sockhead, 0, "nfsd init", 0);
157 	}
158 	if (uap->flag & NFSSVC_BIOD)
159 		error = ENXIO;		/* no longer need nfsiod's */
160 #ifdef NFS_NOSERVER
161 	else
162 		error = ENXIO;
163 #else /* !NFS_NOSERVER */
164 	else if (uap->flag & NFSSVC_MNTD) {
165 		error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd));
166 		if (error)
167 			goto done;
168 		vp = NULL;
169 		error = nlookup_init(&nd, ncd.ncd_dirp, UIO_USERSPACE,
170 					NLC_FOLLOW);
171 		if (error == 0)
172 			error = nlookup(&nd);
173 		if (error == 0)
174 			error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp);
175 		nlookup_done(&nd);
176 		if (error)
177 			goto done;
178 
179 		if ((vp->v_flag & VROOT) == 0)
180 			error = EINVAL;
181 		nmp = VFSTONFS(vp->v_mount);
182 		vput(vp);
183 		if (error)
184 			goto done;
185 		if ((nmp->nm_state & NFSSTA_MNTD) &&
186 			(uap->flag & NFSSVC_GOTAUTH) == 0) {
187 			error = 0;
188 			goto done;
189 		}
190 		nmp->nm_state |= NFSSTA_MNTD;
191 		error = nfs_clientd(nmp, td->td_ucred, &ncd, uap->flag,
192 				    uap->argp, td);
193 	} else if (uap->flag & NFSSVC_ADDSOCK) {
194 		error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg));
195 		if (error)
196 			goto done;
197 		error = holdsock(td->td_proc->p_fd, nfsdarg.sock, &fp);
198 		if (error)
199 			goto done;
200 		/*
201 		 * Get the client address for connected sockets.
202 		 */
203 		if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
204 			nam = NULL;
205 		else {
206 			error = getsockaddr(&nam, nfsdarg.name,
207 					    nfsdarg.namelen);
208 			if (error) {
209 				fdrop(fp);
210 				goto done;
211 			}
212 		}
213 		error = nfssvc_addsock(fp, nam, td);
214 		fdrop(fp);
215 	} else {
216 		error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd));
217 		if (error)
218 			goto done;
219 		if ((uap->flag & NFSSVC_AUTHIN) &&
220 		    ((nfsd = nsd->nsd_nfsd)) != NULL &&
221 		    (nfsd->nfsd_slp->ns_flag & SLP_VALID)) {
222 			slp = nfsd->nfsd_slp;
223 
224 			/*
225 			 * First check to see if another nfsd has already
226 			 * added this credential.
227 			 */
228 			for (nuidp = NUIDHASH(slp,nsd->nsd_cr.cr_uid)->lh_first;
229 			    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
230 				if (nuidp->nu_cr.cr_uid == nsd->nsd_cr.cr_uid &&
231 				    (!nfsd->nfsd_nd->nd_nam2 ||
232 				     netaddr_match(NU_NETFAM(nuidp),
233 				     &nuidp->nu_haddr, nfsd->nfsd_nd->nd_nam2)))
234 					break;
235 			}
236 			if (nuidp) {
237 			    nfsrv_setcred(&nuidp->nu_cr,&nfsd->nfsd_nd->nd_cr);
238 			    nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
239 			} else {
240 			    /*
241 			     * Nope, so we will.
242 			     */
243 			    if (slp->ns_numuids < nuidhash_max) {
244 				slp->ns_numuids++;
245 				nuidp = (struct nfsuid *)
246 				   kmalloc(sizeof (struct nfsuid), M_NFSUID,
247 					M_WAITOK);
248 			    } else
249 				nuidp = NULL;
250 			    if ((slp->ns_flag & SLP_VALID) == 0) {
251 				if (nuidp)
252 				    kfree((caddr_t)nuidp, M_NFSUID);
253 			    } else {
254 				if (nuidp == NULL) {
255 				    nuidp = TAILQ_FIRST(&slp->ns_uidlruhead);
256 				    LIST_REMOVE(nuidp, nu_hash);
257 				    TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp,
258 					nu_lru);
259 				    if (nuidp->nu_flag & NU_NAM)
260 					FREE(nuidp->nu_nam, M_SONAME);
261 			        }
262 				nuidp->nu_flag = 0;
263 				nuidp->nu_cr = nsd->nsd_cr;
264 				if (nuidp->nu_cr.cr_ngroups > NGROUPS)
265 				    nuidp->nu_cr.cr_ngroups = NGROUPS;
266 				nuidp->nu_cr.cr_ref = 1;
267 				nuidp->nu_timestamp = nsd->nsd_timestamp;
268 				nuidp->nu_expire = time_second + nsd->nsd_ttl;
269 				/*
270 				 * and save the session key in nu_key.
271 				 */
272 				bcopy(nsd->nsd_key, nuidp->nu_key,
273 				    sizeof (nsd->nsd_key));
274 				if (nfsd->nfsd_nd->nd_nam2) {
275 				    struct sockaddr_in *saddr;
276 
277 				    saddr = (struct sockaddr_in *)
278 					    nfsd->nfsd_nd->nd_nam2;
279 				    switch (saddr->sin_family) {
280 				    case AF_INET:
281 					nuidp->nu_flag |= NU_INETADDR;
282 					nuidp->nu_inetaddr =
283 					     saddr->sin_addr.s_addr;
284 					break;
285 				    case AF_ISO:
286 				    default:
287 					nuidp->nu_flag |= NU_NAM;
288 					nuidp->nu_nam =
289 					  dup_sockaddr(nfsd->nfsd_nd->nd_nam2);
290 					break;
291 				    };
292 				}
293 				TAILQ_INSERT_TAIL(&slp->ns_uidlruhead, nuidp,
294 					nu_lru);
295 				LIST_INSERT_HEAD(NUIDHASH(slp, nsd->nsd_uid),
296 					nuidp, nu_hash);
297 				nfsrv_setcred(&nuidp->nu_cr,
298 				    &nfsd->nfsd_nd->nd_cr);
299 				nfsd->nfsd_nd->nd_flag |= ND_KERBFULL;
300 			    }
301 			}
302 		}
303 		if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
304 			nfsd->nfsd_flag |= NFSD_AUTHFAIL;
305 		error = nfssvc_nfsd(nsd, uap->argp, td);
306 	}
307 #endif /* NFS_NOSERVER */
308 	if (error == EINTR || error == ERESTART)
309 		error = 0;
310 done:
311 	lwkt_reltoken(&nfs_token);
312 	return (error);
313 }
314 
315 #ifndef NFS_NOSERVER
316 /*
317  * Adds a socket to the list for servicing by nfsds.
318  */
319 static int
320 nfssvc_addsock(struct file *fp, struct sockaddr *mynam, struct thread *td)
321 {
322 	int siz;
323 	struct nfssvc_sock *slp;
324 	struct socket *so;
325 	int error;
326 
327 	so = (struct socket *)fp->f_data;
328 #if 0
329 	tslp = NULL;
330 	/*
331 	 * Add it to the list, as required.
332 	 */
333 	if (so->so_proto->pr_protocol == IPPROTO_UDP) {
334 		tslp = nfs_udpsock;
335 		if (tslp->ns_flag & SLP_VALID) {
336 			if (mynam != NULL)
337 				FREE(mynam, M_SONAME);
338 			return (EPERM);
339 		}
340 	}
341 #endif
342 	/*
343 	 * Reserve buffer space in the socket.  Note that due to bugs in
344 	 * Linux's delayed-ack code, serious performance degredation may
345 	 * occur with linux hosts if the minimum is used.
346 	 *
347 	 * NFS sockets are not limited to the standard sb_max or by
348 	 * resource limits.
349 	 */
350 	if (so->so_type == SOCK_STREAM)
351 		siz = NFS_MAXPACKET + sizeof (u_long);
352 	else
353 		siz = NFS_MAXPACKET;
354 	if (siz < nfs_soreserve)
355 	    siz = nfs_soreserve;
356 
357 	error = soreserve(so, siz, siz, NULL);
358 	if (error) {
359 		if (mynam != NULL)
360 			FREE(mynam, M_SONAME);
361 		return (error);
362 	}
363 
364 	/*
365 	 * Set protocol specific options { for now TCP only } and
366 	 * reserve some space. For datagram sockets, this can get called
367 	 * repeatedly for the same socket, but that isn't harmful.
368 	 */
369 	if (so->so_type == SOCK_STREAM) {
370 		struct sockopt sopt;
371 		int val;
372 
373 		bzero(&sopt, sizeof sopt);
374 		sopt.sopt_level = SOL_SOCKET;
375 		sopt.sopt_name = SO_KEEPALIVE;
376 		sopt.sopt_val = &val;
377 		sopt.sopt_valsize = sizeof val;
378 		val = 1;
379 		sosetopt(so, &sopt);
380 	}
381 	if (so->so_proto->pr_domain->dom_family == AF_INET &&
382 	    so->so_proto->pr_protocol == IPPROTO_TCP) {
383 		struct sockopt sopt;
384 		int val;
385 
386 		bzero(&sopt, sizeof sopt);
387 		sopt.sopt_level = IPPROTO_TCP;
388 		sopt.sopt_name = TCP_NODELAY;
389 		sopt.sopt_val = &val;
390 		sopt.sopt_valsize = sizeof val;
391 		val = 1;
392 		sosetopt(so, &sopt);
393 	}
394 	atomic_clear_int(&so->so_rcv.ssb_flags, SSB_NOINTR);
395 	so->so_rcv.ssb_timeo = 0;
396 	atomic_clear_int(&so->so_snd.ssb_flags, SSB_NOINTR);
397 	so->so_snd.ssb_timeo = 0;
398 
399 	slp = (struct nfssvc_sock *)kmalloc(sizeof (struct nfssvc_sock),
400 	    M_NFSSVC, M_WAITOK | M_ZERO);
401 	mtx_init(&slp->ns_solock);
402 	STAILQ_INIT(&slp->ns_rec);
403 	TAILQ_INIT(&slp->ns_uidlruhead);
404 	lwkt_token_init(&slp->ns_token, 1, "nfssrv_token");
405 
406 	lwkt_gettoken(&nfs_token);
407 	TAILQ_INSERT_TAIL(&nfssvc_sockhead, slp, ns_chain);
408 	nfsrv_slpref(slp);
409 	lwkt_gettoken(&slp->ns_token);
410 
411 	slp->ns_so = so;
412 	slp->ns_nam = mynam;
413 	fp->f_count++;
414 	slp->ns_fp = fp;
415 
416 	so->so_upcallarg = (caddr_t)slp;
417 	so->so_upcall = nfsrv_rcv_upcall;
418 	atomic_set_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
419 	slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
420 	nfsrv_wakenfsd(slp, 1);
421 
422 	lwkt_reltoken(&slp->ns_token);
423 	lwkt_reltoken(&nfs_token);
424 
425 	return (0);
426 }
427 
428 /*
429  * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
430  * until it is killed by a signal.
431  */
432 static int
433 nfssvc_nfsd(struct nfsd_srvargs *nsd, caddr_t argp, struct thread *td)
434 {
435 	int siz;
436 	struct nfssvc_sock *slp;
437 	struct nfsd *nfsd = nsd->nsd_nfsd;
438 	struct nfsrv_descript *nd = NULL;
439 	struct mbuf *m, *mreq;
440 	int error = 0, cacherep, sotype, writes_todo;
441 	int procrastinate;
442 	u_quad_t cur_usec;
443 
444 #ifndef nolint
445 	cacherep = RC_DOIT;
446 	writes_todo = 0;
447 #endif
448 	lwkt_gettoken(&nfs_token);
449 
450 	if (nfsd == NULL) {
451 		nsd->nsd_nfsd = nfsd = (struct nfsd *)
452 			kmalloc(sizeof (struct nfsd), M_NFSD, M_WAITOK|M_ZERO);
453 		nfsd->nfsd_td = td;
454 		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
455 		nfs_numnfsd++;
456 	}
457 
458 	/*
459 	 * Loop getting rpc requests until SIGKILL.
460 	 */
461 	for (;;) {
462 		if ((nfsd->nfsd_flag & NFSD_REQINPROG) == 0) {
463 			while (nfsd->nfsd_slp == NULL &&
464 			    (nfsd_head_flag & NFSD_CHECKSLP) == 0) {
465 				nfsd->nfsd_flag |= NFSD_WAITING;
466 				nfsd_waiting++;
467 				error = tsleep((caddr_t)nfsd, PCATCH, "nfsd", 0);
468 				nfsd_waiting--;
469 				if (error)
470 					goto done;
471 			}
472 			if (nfsd->nfsd_slp == NULL &&
473 			    (nfsd_head_flag & NFSD_CHECKSLP) != 0) {
474 				TAILQ_FOREACH(slp, &nfssvc_sockhead, ns_chain) {
475 				    if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
476 					== (SLP_VALID | SLP_DOREC)) {
477 					    slp->ns_flag &= ~SLP_DOREC;
478 					    nfsrv_slpref(slp);
479 					    nfsd->nfsd_slp = slp;
480 					    break;
481 				    }
482 				}
483 				if (slp == 0)
484 					nfsd_head_flag &= ~NFSD_CHECKSLP;
485 			}
486 			if ((slp = nfsd->nfsd_slp) == NULL)
487 				continue;
488 
489 			lwkt_reltoken(&nfs_token);
490 			lwkt_gettoken(&slp->ns_token);
491 
492 			if (slp->ns_flag & SLP_VALID) {
493 				if (slp->ns_flag & SLP_DISCONN)
494 					nfsrv_zapsock(slp);
495 				else if (slp->ns_flag & SLP_NEEDQ) {
496 					slp->ns_flag &= ~SLP_NEEDQ;
497 					(void) nfs_slplock(slp, 1);
498 					nfsrv_rcv(slp->ns_so, (caddr_t)slp,
499 						MB_WAIT);
500 					nfs_slpunlock(slp);
501 				}
502 				error = nfsrv_dorec(slp, nfsd, &nd);
503 				cur_usec = nfs_curusec();
504 				if (error && slp->ns_tq.lh_first &&
505 				    slp->ns_tq.lh_first->nd_time <= cur_usec) {
506 					error = 0;
507 					cacherep = RC_DOIT;
508 					writes_todo = 1;
509 				} else
510 					writes_todo = 0;
511 				nfsd->nfsd_flag |= NFSD_REQINPROG;
512 			}
513 		} else {
514 			error = 0;
515 			slp = nfsd->nfsd_slp;
516 			lwkt_reltoken(&nfs_token);
517 			lwkt_gettoken(&slp->ns_token);
518 		}
519 
520 		/*
521 		 * nfs_token not held here.  slp token is held.
522 		 */
523 		if (error || (slp->ns_flag & SLP_VALID) == 0) {
524 			if (nd) {
525 				kfree((caddr_t)nd, M_NFSRVDESC);
526 				nd = NULL;
527 			}
528 			nfsd->nfsd_slp = NULL;
529 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
530 			lwkt_reltoken(&slp->ns_token);
531 			lwkt_gettoken(&nfs_token);
532 			nfsrv_slpderef(slp);
533 			continue;
534 		}
535 
536 		/*
537 		 * nfs_token not held here.  slp token is held.
538 		 */
539 		sotype = slp->ns_so->so_type;
540 		if (nd) {
541 		    getmicrotime(&nd->nd_starttime);
542 		    if (nd->nd_nam2)
543 			nd->nd_nam = nd->nd_nam2;
544 		    else
545 			nd->nd_nam = slp->ns_nam;
546 
547 		    /*
548 		     * Check to see if authorization is needed.
549 		     */
550 		    if (nfsd->nfsd_flag & NFSD_NEEDAUTH) {
551 			nfsd->nfsd_flag &= ~NFSD_NEEDAUTH;
552 			nsd->nsd_haddr =
553 				((struct sockaddr_in *)
554 				 nd->nd_nam)->sin_addr.s_addr;
555 			nsd->nsd_authlen = nfsd->nfsd_authlen;
556 			nsd->nsd_verflen = nfsd->nfsd_verflen;
557 			if (!copyout(nfsd->nfsd_authstr,nsd->nsd_authstr,
558 				nfsd->nfsd_authlen) &&
559 			    !copyout(nfsd->nfsd_verfstr, nsd->nsd_verfstr,
560 				nfsd->nfsd_verflen) &&
561 			    !copyout((caddr_t)nsd, argp, sizeof (*nsd)))
562 			    lwkt_reltoken(&slp->ns_token);
563 			    return (ENEEDAUTH);
564 			cacherep = RC_DROPIT;
565 		    } else {
566 			cacherep = nfsrv_getcache(nd, slp, &mreq);
567 		    }
568 
569 		    if (nfsd->nfsd_flag & NFSD_AUTHFAIL) {
570 			nfsd->nfsd_flag &= ~NFSD_AUTHFAIL;
571 			nd->nd_procnum = NFSPROC_NOOP;
572 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
573 			cacherep = RC_DOIT;
574 		    } else if (nfs_privport) {
575 			/* Check if source port is privileged */
576 			u_short port;
577 			struct sockaddr *nam = nd->nd_nam;
578 			struct sockaddr_in *sin;
579 
580 			sin = (struct sockaddr_in *)nam;
581 			port = ntohs(sin->sin_port);
582 			if (port >= IPPORT_RESERVED &&
583 			    nd->nd_procnum != NFSPROC_NULL) {
584 			    nd->nd_procnum = NFSPROC_NOOP;
585 			    nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
586 			    cacherep = RC_DOIT;
587 			    kprintf("NFS request from unprivileged port (%s:%d)\n",
588 				   inet_ntoa(sin->sin_addr), port);
589 			}
590 		    }
591 
592 		}
593 
594 		/*
595 		 * Loop to get all the write rpc replies that have been
596 		 * gathered together.
597 		 *
598 		 * nfs_token not held here.  slp token is held.
599 		 */
600 		do {
601 		    switch (cacherep) {
602 		    case RC_DOIT:
603 			if (nd && (nd->nd_flag & ND_NFSV3))
604 			    procrastinate = nfsrvw_procrastinate_v3;
605 			else
606 			    procrastinate = nfsrvw_procrastinate;
607 			if (writes_todo || (nd->nd_procnum == NFSPROC_WRITE &&
608 			    procrastinate > 0)
609 			) {
610 			    error = nfsrv_writegather(&nd, slp,
611 				nfsd->nfsd_td, &mreq);
612 			} else {
613 			    error = (*(nfsrv3_procs[nd->nd_procnum]))(nd,
614 				slp, nfsd->nfsd_td, &mreq);
615 			}
616 			if (mreq == NULL)
617 				break;
618 			if (error != 0 && error != NFSERR_RETVOID) {
619 				if (nd->nd_procnum != NQNFSPROC_VACATED)
620 					nfsstats.srv_errs++;
621 				nfsrv_updatecache(nd, FALSE, mreq);
622 				if (nd->nd_nam2)
623 					FREE(nd->nd_nam2, M_SONAME);
624 				break;
625 			}
626 			nfsstats.srvrpccnt[nd->nd_procnum]++;
627 			nfsrv_updatecache(nd, TRUE, mreq);
628 			nd->nd_mrep = NULL;
629 		    case RC_REPLY:
630 			m = mreq;
631 			siz = 0;
632 			while (m) {
633 				siz += m->m_len;
634 				m = m->m_next;
635 			}
636 			if (siz <= 0 || siz > NFS_MAXPACKET) {
637 				kprintf("mbuf siz=%d\n",siz);
638 				panic("Bad nfs svc reply");
639 			}
640 			m = mreq;
641 			m->m_pkthdr.len = siz;
642 			m->m_pkthdr.rcvif = NULL;
643 			/*
644 			 * For stream protocols, prepend a Sun RPC
645 			 * Record Mark.
646 			 */
647 			if (sotype == SOCK_STREAM) {
648 				M_PREPEND(m, NFSX_UNSIGNED, MB_WAIT);
649 				if (m == NULL)
650 					return (ENOBUFS);
651 				*mtod(m, u_int32_t *) = htonl(0x80000000 | siz);
652 			}
653 			if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
654 				(void) nfs_slplock(slp, 1);
655 			if (slp->ns_flag & SLP_VALID)
656 			    error = nfs_send(slp->ns_so, nd->nd_nam2, m, NULL);
657 			else {
658 			    error = EPIPE;
659 			    m_freem(m);
660 			}
661 			if (nfsrtton)
662 				nfsd_rt(sotype, nd, cacherep);
663 			if (nd->nd_nam2)
664 				FREE(nd->nd_nam2, M_SONAME);
665 			if (nd->nd_mrep)
666 				m_freem(nd->nd_mrep);
667 			if (error == EPIPE)
668 				nfsrv_zapsock(slp);
669 			if (slp->ns_so->so_proto->pr_flags & PR_CONNREQUIRED)
670 				nfs_slpunlock(slp);
671 			if (error == EINTR || error == ERESTART) {
672 				kfree((caddr_t)nd, M_NFSRVDESC);
673 				lwkt_reltoken(&slp->ns_token);
674 				lwkt_gettoken(&nfs_token);
675 				nfsrv_slpderef(slp);
676 				goto done;
677 			}
678 			break;
679 		    case RC_DROPIT:
680 			if (nfsrtton)
681 				nfsd_rt(sotype, nd, cacherep);
682 			m_freem(nd->nd_mrep);
683 			if (nd->nd_nam2)
684 				FREE(nd->nd_nam2, M_SONAME);
685 			break;
686 		    };
687 		    if (nd) {
688 			FREE((caddr_t)nd, M_NFSRVDESC);
689 			nd = NULL;
690 		    }
691 
692 		    /*
693 		     * Check to see if there are outstanding writes that
694 		     * need to be serviced.
695 		     */
696 		    cur_usec = nfs_curusec();
697 		    if (slp->ns_tq.lh_first &&
698 			slp->ns_tq.lh_first->nd_time <= cur_usec) {
699 			cacherep = RC_DOIT;
700 			writes_todo = 1;
701 		    } else {
702 			writes_todo = 0;
703 		    }
704 		} while (writes_todo);
705 
706 		/*
707 		 * nfs_token not held here.  slp token is held.
708 		 */
709 		if (nfsrv_dorec(slp, nfsd, &nd)) {
710 			nfsd->nfsd_flag &= ~NFSD_REQINPROG;
711 			nfsd->nfsd_slp = NULL;
712 			lwkt_reltoken(&slp->ns_token);
713 			lwkt_gettoken(&nfs_token);
714 			nfsrv_slpderef(slp);
715 		} else {
716 			lwkt_reltoken(&slp->ns_token);
717 			lwkt_gettoken(&nfs_token);
718 		}
719 	}
720 done:
721 	TAILQ_REMOVE(&nfsd_head, nfsd, nfsd_chain);
722 	kfree((caddr_t)nfsd, M_NFSD);
723 	nsd->nsd_nfsd = NULL;
724 	if (--nfs_numnfsd == 0)
725 		nfsrv_init(TRUE);	/* Reinitialize everything */
726 
727 	lwkt_reltoken(&nfs_token);
728 	return (error);
729 }
730 
731 /*
732  * Shut down a socket associated with an nfssvc_sock structure.
733  * Should be called with the send lock set, if required.
734  * The trick here is to increment the sref at the start, so that the nfsds
735  * will stop using it and clear ns_flag at the end so that it will not be
736  * reassigned during cleanup.
737  */
738 static void
739 nfsrv_zapsock(struct nfssvc_sock *slp)
740 {
741 	struct nfsuid *nuidp, *nnuidp;
742 	struct nfsrv_descript *nwp, *nnwp;
743 	struct socket *so;
744 	struct file *fp;
745 	struct nfsrv_rec *rec;
746 
747 	slp->ns_flag &= ~SLP_ALLFLAGS;
748 	fp = slp->ns_fp;
749 	if (fp) {
750 		slp->ns_fp = NULL;
751 		so = slp->ns_so;
752 		atomic_clear_int(&so->so_rcv.ssb_flags, SSB_UPCALL);
753 		so->so_upcall = NULL;
754 		so->so_upcallarg = NULL;
755 		soshutdown(so, SHUT_RDWR);
756 		closef(fp, NULL);
757 		if (slp->ns_nam)
758 			FREE(slp->ns_nam, M_SONAME);
759 		m_freem(slp->ns_raw);
760 		while ((rec = STAILQ_FIRST(&slp->ns_rec)) != NULL) {
761 			--slp->ns_numrec;
762 			STAILQ_REMOVE_HEAD(&slp->ns_rec, nr_link);
763 			if (rec->nr_address)
764 				FREE(rec->nr_address, M_SONAME);
765 			m_freem(rec->nr_packet);
766 			kfree(rec, M_NFSRVDESC);
767 		}
768 		TAILQ_FOREACH_MUTABLE(nuidp, &slp->ns_uidlruhead, nu_lru,
769 				      nnuidp) {
770 			LIST_REMOVE(nuidp, nu_hash);
771 			TAILQ_REMOVE(&slp->ns_uidlruhead, nuidp, nu_lru);
772 			if (nuidp->nu_flag & NU_NAM)
773 				FREE(nuidp->nu_nam, M_SONAME);
774 			kfree((caddr_t)nuidp, M_NFSUID);
775 		}
776 		crit_enter();
777 		for (nwp = slp->ns_tq.lh_first; nwp; nwp = nnwp) {
778 			nnwp = nwp->nd_tq.le_next;
779 			LIST_REMOVE(nwp, nd_tq);
780 			kfree((caddr_t)nwp, M_NFSRVDESC);
781 		}
782 		LIST_INIT(&slp->ns_tq);
783 		crit_exit();
784 	}
785 }
786 
787 /*
788  * Derefence a server socket structure. If it has no more references and
789  * is no longer valid, you can throw it away.
790  *
791  * Must be holding nfs_token!
792  */
793 void
794 nfsrv_slpderef(struct nfssvc_sock *slp)
795 {
796 	ASSERT_LWKT_TOKEN_HELD(&nfs_token);
797 	if (--slp->ns_sref == 0 && (slp->ns_flag & SLP_VALID) == 0) {
798 		TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
799 		kfree((caddr_t)slp, M_NFSSVC);
800 	}
801 }
802 
803 void
804 nfsrv_slpref(struct nfssvc_sock *slp)
805 {
806 	ASSERT_LWKT_TOKEN_HELD(&nfs_token);
807 	++slp->ns_sref;
808 }
809 
810 /*
811  * Lock a socket against others.
812  *
813  * Returns 0 on failure, 1 on success.
814  */
815 int
816 nfs_slplock(struct nfssvc_sock *slp, int wait)
817 {
818 	mtx_t mtx = &slp->ns_solock;
819 
820 	if (wait) {
821 		mtx_lock_ex(mtx, "nfsslplck", 0, 0);
822 		return(1);
823 	} else if (mtx_lock_ex_try(mtx) == 0) {
824 		return(1);
825 	} else {
826 		return(0);
827 	}
828 }
829 
830 /*
831  * Unlock the stream socket for others.
832  */
833 void
834 nfs_slpunlock(struct nfssvc_sock *slp)
835 {
836 	mtx_t mtx = &slp->ns_solock;
837 
838 	mtx_unlock(mtx);
839 }
840 
841 /*
842  * Initialize the data structures for the server.
843  * Handshake with any new nfsds starting up to avoid any chance of
844  * corruption.
845  */
846 void
847 nfsrv_init(int terminating)
848 {
849 	struct nfssvc_sock *slp, *nslp;
850 
851 	lwkt_gettoken(&nfs_token);
852 	if (nfssvc_sockhead_flag & SLP_INIT)
853 		panic("nfsd init");
854 	nfssvc_sockhead_flag |= SLP_INIT;
855 
856 	if (terminating) {
857 		TAILQ_FOREACH_MUTABLE(slp, &nfssvc_sockhead, ns_chain, nslp) {
858 			if (slp->ns_flag & SLP_VALID)
859 				nfsrv_zapsock(slp);
860 			TAILQ_REMOVE(&nfssvc_sockhead, slp, ns_chain);
861 			kfree((caddr_t)slp, M_NFSSVC);
862 		}
863 		nfsrv_cleancache();	/* And clear out server cache */
864 	} else {
865 		nfs_pub.np_valid = 0;
866 	}
867 
868 	TAILQ_INIT(&nfssvc_sockhead);
869 	nfssvc_sockhead_flag &= ~SLP_INIT;
870 	if (nfssvc_sockhead_flag & SLP_WANTINIT) {
871 		nfssvc_sockhead_flag &= ~SLP_WANTINIT;
872 		wakeup((caddr_t)&nfssvc_sockhead);
873 	}
874 
875 	TAILQ_INIT(&nfsd_head);
876 	nfsd_head_flag &= ~NFSD_CHECKSLP;
877 
878 	lwkt_reltoken(&nfs_token);
879 
880 #if 0
881 	nfs_udpsock = (struct nfssvc_sock *)
882 	    kmalloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
883 	mtx_init(&nfs_udpsock->ns_solock);
884 	STAILQ_INIT(&nfs_udpsock->ns_rec);
885 	TAILQ_INIT(&nfs_udpsock->ns_uidlruhead);
886 	TAILQ_INSERT_HEAD(&nfssvc_sockhead, nfs_udpsock, ns_chain);
887 
888 	nfs_cltpsock = (struct nfssvc_sock *)
889 	    kmalloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK | M_ZERO);
890 	mtx_init(&nfs_cltpsock->ns_solock);
891 	STAILQ_INIT(&nfs_cltpsock->ns_rec);
892 	TAILQ_INIT(&nfs_cltpsock->ns_uidlruhead);
893 	TAILQ_INSERT_TAIL(&nfssvc_sockhead, nfs_cltpsock, ns_chain);
894 #endif
895 }
896 
897 /*
898  * Add entries to the server monitor log.
899  */
900 static void
901 nfsd_rt(int sotype, struct nfsrv_descript *nd, int cacherep)
902 {
903 	struct drt *rt;
904 
905 	rt = &nfsdrt.drt[nfsdrt.pos];
906 	if (cacherep == RC_DOIT)
907 		rt->flag = 0;
908 	else if (cacherep == RC_REPLY)
909 		rt->flag = DRT_CACHEREPLY;
910 	else
911 		rt->flag = DRT_CACHEDROP;
912 	if (sotype == SOCK_STREAM)
913 		rt->flag |= DRT_TCP;
914 	if (nd->nd_flag & ND_NFSV3)
915 		rt->flag |= DRT_NFSV3;
916 	rt->proc = nd->nd_procnum;
917 	if (nd->nd_nam->sa_family == AF_INET)
918 	    rt->ipadr = ((struct sockaddr_in *)nd->nd_nam)->sin_addr.s_addr;
919 	else
920 	    rt->ipadr = INADDR_ANY;
921 	rt->resptime = nfs_curusec() - (nd->nd_starttime.tv_sec * 1000000 + nd->nd_starttime.tv_usec);
922 	getmicrotime(&rt->tstamp);
923 	nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
924 }
925 #endif /* NFS_NOSERVER */
926 
927 static int nfs_defect = 0;
928 SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
929 
930 /*
931  * Get an authorization string for the uid by having the mount_nfs sitting
932  * on this mount point porpous out of the kernel and do it.
933  */
934 int
935 nfs_getauth(struct nfsmount *nmp, struct nfsreq *rep,
936 	    struct ucred *cred, char **auth_str, int *auth_len, char *verf_str,
937 	    int *verf_len, NFSKERBKEY_T key /* return session key */)
938 {
939 	int error = 0;
940 
941 	while ((nmp->nm_state & NFSSTA_WAITAUTH) == 0) {
942 		nmp->nm_state |= NFSSTA_WANTAUTH;
943 		(void) tsleep((caddr_t)&nmp->nm_authtype, 0,
944 			"nfsauth1", 2 * hz);
945 		error = nfs_sigintr(nmp, rep, rep->r_td);
946 		if (error) {
947 			nmp->nm_state &= ~NFSSTA_WANTAUTH;
948 			return (error);
949 		}
950 	}
951 	nmp->nm_state &= ~(NFSSTA_WAITAUTH | NFSSTA_WANTAUTH);
952 	nmp->nm_authstr = *auth_str = (char *)kmalloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
953 	nmp->nm_authlen = RPCAUTH_MAXSIZ;
954 	nmp->nm_verfstr = verf_str;
955 	nmp->nm_verflen = *verf_len;
956 	nmp->nm_authuid = cred->cr_uid;
957 	wakeup((caddr_t)&nmp->nm_authstr);
958 
959 	/*
960 	 * And wait for mount_nfs to do its stuff.
961 	 */
962 	while ((nmp->nm_state & NFSSTA_HASAUTH) == 0 && error == 0) {
963 		(void) tsleep((caddr_t)&nmp->nm_authlen, 0,
964 			"nfsauth2", 2 * hz);
965 		error = nfs_sigintr(nmp, rep, rep->r_td);
966 	}
967 	if (nmp->nm_state & NFSSTA_AUTHERR) {
968 		nmp->nm_state &= ~NFSSTA_AUTHERR;
969 		error = EAUTH;
970 	}
971 	if (error)
972 		kfree((caddr_t)*auth_str, M_TEMP);
973 	else {
974 		*auth_len = nmp->nm_authlen;
975 		*verf_len = nmp->nm_verflen;
976 		bcopy((caddr_t)nmp->nm_key, (caddr_t)key, sizeof (key));
977 	}
978 	nmp->nm_state &= ~NFSSTA_HASAUTH;
979 	nmp->nm_state |= NFSSTA_WAITAUTH;
980 	if (nmp->nm_state & NFSSTA_WANTAUTH) {
981 		nmp->nm_state &= ~NFSSTA_WANTAUTH;
982 		wakeup((caddr_t)&nmp->nm_authtype);
983 	}
984 	return (error);
985 }
986 
987 /*
988  * Get a nickname authenticator and verifier.
989  */
990 int
991 nfs_getnickauth(struct nfsmount *nmp, struct ucred *cred, char **auth_str,
992 		int *auth_len, char *verf_str, int verf_len)
993 {
994 	struct nfsuid *nuidp;
995 	u_int32_t *nickp, *verfp;
996 	struct timeval ktvin, ktvout;
997 
998 #ifdef DIAGNOSTIC
999 	if (verf_len < (4 * NFSX_UNSIGNED))
1000 		panic("nfs_getnickauth verf too small");
1001 #endif
1002 	for (nuidp = NMUIDHASH(nmp, cred->cr_uid)->lh_first;
1003 	    nuidp != 0; nuidp = nuidp->nu_hash.le_next) {
1004 		if (nuidp->nu_cr.cr_uid == cred->cr_uid)
1005 			break;
1006 	}
1007 	if (!nuidp || nuidp->nu_expire < time_second)
1008 		return (EACCES);
1009 
1010 	/*
1011 	 * Move to the end of the lru list (end of lru == most recently used).
1012 	 */
1013 	TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru);
1014 	TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp, nu_lru);
1015 
1016 	nickp = (u_int32_t *)kmalloc(2 * NFSX_UNSIGNED, M_TEMP, M_WAITOK);
1017 	*nickp++ = txdr_unsigned(RPCAKN_NICKNAME);
1018 	*nickp = txdr_unsigned(nuidp->nu_nickname);
1019 	*auth_str = (char *)nickp;
1020 	*auth_len = 2 * NFSX_UNSIGNED;
1021 
1022 	/*
1023 	 * Now we must encrypt the verifier and package it up.
1024 	 */
1025 	verfp = (u_int32_t *)verf_str;
1026 	*verfp++ = txdr_unsigned(RPCAKN_NICKNAME);
1027 	if (time_second > nuidp->nu_timestamp.tv_sec ||
1028 	    (time_second == nuidp->nu_timestamp.tv_sec &&
1029 	     time_second > nuidp->nu_timestamp.tv_usec))
1030 		getmicrotime(&nuidp->nu_timestamp);
1031 	else
1032 		nuidp->nu_timestamp.tv_usec++;
1033 	ktvin.tv_sec = txdr_unsigned(nuidp->nu_timestamp.tv_sec);
1034 	ktvin.tv_usec = txdr_unsigned(nuidp->nu_timestamp.tv_usec);
1035 
1036 	/*
1037 	 * Now encrypt the timestamp verifier in ecb mode using the session
1038 	 * key.
1039 	 */
1040 #ifdef NFSKERB
1041 	XXX
1042 #else
1043 	ktvout.tv_sec = 0;
1044 	ktvout.tv_usec = 0;
1045 #endif
1046 
1047 	*verfp++ = ktvout.tv_sec;
1048 	*verfp++ = ktvout.tv_usec;
1049 	*verfp = 0;
1050 	return (0);
1051 }
1052 
1053 /*
1054  * Save the current nickname in a hash list entry on the mount point.
1055  */
1056 int
1057 nfs_savenickauth(struct nfsmount *nmp, struct ucred *cred, int len,
1058 		 NFSKERBKEY_T key, struct mbuf **mdp, char **dposp,
1059 		 struct mbuf *mrep)
1060 {
1061 	struct nfsuid *nuidp;
1062 	u_int32_t *tl;
1063 	struct timeval ktvin, ktvout;
1064 	u_int32_t nick;
1065 	int deltasec, error = 0;
1066 	struct nfsm_info info;
1067 
1068 	info.md = *mdp;
1069 	info.dpos = *dposp;
1070 	info.mrep = mrep;
1071 
1072 	if (len == (3 * NFSX_UNSIGNED)) {
1073 		NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
1074 		ktvin.tv_sec = *tl++;
1075 		ktvin.tv_usec = *tl++;
1076 		nick = fxdr_unsigned(u_int32_t, *tl);
1077 
1078 		/*
1079 		 * Decrypt the timestamp in ecb mode.
1080 		 */
1081 #ifdef NFSKERB
1082 		XXX
1083 #else
1084 		ktvout.tv_sec = 0;
1085 		ktvout.tv_usec = 0;
1086 #endif
1087 		ktvout.tv_sec = fxdr_unsigned(long, ktvout.tv_sec);
1088 		ktvout.tv_usec = fxdr_unsigned(long, ktvout.tv_usec);
1089 		deltasec = time_second - ktvout.tv_sec;
1090 		if (deltasec < 0)
1091 			deltasec = -deltasec;
1092 		/*
1093 		 * If ok, add it to the hash list for the mount point.
1094 		 */
1095 		if (deltasec <= NFS_KERBCLOCKSKEW) {
1096 			if (nmp->nm_numuids < nuidhash_max) {
1097 				nmp->nm_numuids++;
1098 				nuidp = (struct nfsuid *)
1099 				   kmalloc(sizeof (struct nfsuid), M_NFSUID,
1100 					M_WAITOK);
1101 			} else {
1102 				nuidp = TAILQ_FIRST(&nmp->nm_uidlruhead);
1103 				LIST_REMOVE(nuidp, nu_hash);
1104 				TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp,
1105 					nu_lru);
1106 			}
1107 			nuidp->nu_flag = 0;
1108 			nuidp->nu_cr.cr_uid = cred->cr_uid;
1109 			nuidp->nu_expire = time_second + NFS_KERBTTL;
1110 			nuidp->nu_timestamp = ktvout;
1111 			nuidp->nu_nickname = nick;
1112 			bcopy(key, nuidp->nu_key, sizeof (key));
1113 			TAILQ_INSERT_TAIL(&nmp->nm_uidlruhead, nuidp,
1114 				nu_lru);
1115 			LIST_INSERT_HEAD(NMUIDHASH(nmp, cred->cr_uid),
1116 				nuidp, nu_hash);
1117 		}
1118 	} else {
1119 		ERROROUT(nfsm_adv(&info, nfsm_rndup(len)));
1120 	}
1121 nfsmout:
1122 	*mdp = info.md;
1123 	*dposp = info.dpos;
1124 	return (error);
1125 }
1126