xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 8a0a413e)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 
41 #include "opt_bootp.h"
42 #include "opt_nfsroot.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/clock.h>
50 #include <sys/jail.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/socket.h>
59 #include <sys/socketvar.h>
60 #include <sys/sockio.h>
61 #include <sys/sysctl.h>
62 #include <sys/vnode.h>
63 #include <sys/signalvar.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 #include <vm/uma.h>
68 
69 #include <net/if.h>
70 #include <net/route.h>
71 #include <netinet/in.h>
72 
73 #include <fs/nfs/nfsport.h>
74 #include <fs/nfsclient/nfsnode.h>
75 #include <fs/nfsclient/nfsmount.h>
76 #include <fs/nfsclient/nfs.h>
77 #include <nfs/nfsdiskless.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 
90 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
91 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
92 
93 SYSCTL_DECL(_vfs_nfs);
94 static int nfs_ip_paranoia = 1;
95 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
96     &nfs_ip_paranoia, 0, "");
97 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
98 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
99         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
100 /* how long between console messages "nfs server foo not responding" */
101 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
102 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
103         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
104 #ifdef NFS_DEBUG
105 int nfs_debug;
106 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
107     "Toggle debug flag");
108 #endif
109 
110 static int	nfs_mountroot(struct mount *);
111 static void	nfs_sec_name(char *, int *);
112 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
113 		    struct nfs_args *argp, const char *, struct ucred *,
114 		    struct thread *);
115 static int	mountnfs(struct nfs_args *, struct mount *,
116 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
117 		    u_char *, int, struct vnode **, struct ucred *,
118 		    struct thread *, int, int, int);
119 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
120 		    struct sockaddr_storage *, int *, off_t *,
121 		    struct timeval *);
122 static vfs_mount_t nfs_mount;
123 static vfs_cmount_t nfs_cmount;
124 static vfs_unmount_t nfs_unmount;
125 static vfs_root_t nfs_root;
126 static vfs_statfs_t nfs_statfs;
127 static vfs_sync_t nfs_sync;
128 static vfs_sysctl_t nfs_sysctl;
129 static vfs_purge_t nfs_purge;
130 
131 /*
132  * nfs vfs operations.
133  */
134 static struct vfsops nfs_vfsops = {
135 	.vfs_init =		ncl_init,
136 	.vfs_mount =		nfs_mount,
137 	.vfs_cmount =		nfs_cmount,
138 	.vfs_root =		nfs_root,
139 	.vfs_statfs =		nfs_statfs,
140 	.vfs_sync =		nfs_sync,
141 	.vfs_uninit =		ncl_uninit,
142 	.vfs_unmount =		nfs_unmount,
143 	.vfs_sysctl =		nfs_sysctl,
144 	.vfs_purge =		nfs_purge,
145 };
146 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
147 
148 /* So that loader and kldload(2) can find us, wherever we are.. */
149 MODULE_VERSION(nfs, 1);
150 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
151 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
152 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
153 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
154 
155 /*
156  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
157  * can be shared by both NFS clients. It is declared here so that it
158  * will be defined for kernels built without NFS_ROOT, although it
159  * isn't used in that case.
160  */
161 #if !defined(NFS_ROOT)
162 struct nfs_diskless	nfs_diskless = { { { 0 } } };
163 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
164 int			nfs_diskless_valid = 0;
165 #endif
166 
167 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
168     &nfs_diskless_valid, 0,
169     "Has the diskless struct been filled correctly");
170 
171 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
172     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
173 
174 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
175     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
176     "%Ssockaddr_in", "Diskless root nfs address");
177 
178 
179 void		newnfsargs_ntoh(struct nfs_args *);
180 static int	nfs_mountdiskless(char *,
181 		    struct sockaddr_in *, struct nfs_args *,
182 		    struct thread *, struct vnode **, struct mount *);
183 static void	nfs_convert_diskless(void);
184 static void	nfs_convert_oargs(struct nfs_args *args,
185 		    struct onfs_args *oargs);
186 
187 int
188 newnfs_iosize(struct nfsmount *nmp)
189 {
190 	int iosize, maxio;
191 
192 	/* First, set the upper limit for iosize */
193 	if (nmp->nm_flag & NFSMNT_NFSV4) {
194 		maxio = NFS_MAXBSIZE;
195 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
196 		if (nmp->nm_sotype == SOCK_DGRAM)
197 			maxio = NFS_MAXDGRAMDATA;
198 		else
199 			maxio = NFS_MAXBSIZE;
200 	} else {
201 		maxio = NFS_V2MAXDATA;
202 	}
203 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
204 		nmp->nm_rsize = maxio;
205 	if (nmp->nm_rsize > NFS_MAXBSIZE)
206 		nmp->nm_rsize = NFS_MAXBSIZE;
207 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
208 		nmp->nm_readdirsize = maxio;
209 	if (nmp->nm_readdirsize > nmp->nm_rsize)
210 		nmp->nm_readdirsize = nmp->nm_rsize;
211 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
212 		nmp->nm_wsize = maxio;
213 	if (nmp->nm_wsize > NFS_MAXBSIZE)
214 		nmp->nm_wsize = NFS_MAXBSIZE;
215 
216 	/*
217 	 * Calculate the size used for io buffers.  Use the larger
218 	 * of the two sizes to minimise nfs requests but make sure
219 	 * that it is at least one VM page to avoid wasting buffer
220 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
221 	 * that is the buffer size used for directories.
222 	 */
223 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
224 	iosize = imax(iosize, PAGE_SIZE);
225 	iosize = imax(iosize, NFS_DIRBLKSIZ);
226 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
227 	return (iosize);
228 }
229 
230 static void
231 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
232 {
233 
234 	args->version = NFS_ARGSVERSION;
235 	args->addr = oargs->addr;
236 	args->addrlen = oargs->addrlen;
237 	args->sotype = oargs->sotype;
238 	args->proto = oargs->proto;
239 	args->fh = oargs->fh;
240 	args->fhsize = oargs->fhsize;
241 	args->flags = oargs->flags;
242 	args->wsize = oargs->wsize;
243 	args->rsize = oargs->rsize;
244 	args->readdirsize = oargs->readdirsize;
245 	args->timeo = oargs->timeo;
246 	args->retrans = oargs->retrans;
247 	args->readahead = oargs->readahead;
248 	args->hostname = oargs->hostname;
249 }
250 
251 static void
252 nfs_convert_diskless(void)
253 {
254 
255 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
256 		sizeof(struct ifaliasreq));
257 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
258 		sizeof(struct sockaddr_in));
259 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
260 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
261 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
262 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
263 	} else {
264 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
265 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
266 	}
267 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
268 		sizeof(struct sockaddr_in));
269 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
270 	nfsv3_diskless.root_time = nfs_diskless.root_time;
271 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
272 		MAXHOSTNAMELEN);
273 	nfs_diskless_valid = 3;
274 }
275 
276 /*
277  * nfs statfs call
278  */
279 static int
280 nfs_statfs(struct mount *mp, struct statfs *sbp)
281 {
282 	struct vnode *vp;
283 	struct thread *td;
284 	struct nfsmount *nmp = VFSTONFS(mp);
285 	struct nfsvattr nfsva;
286 	struct nfsfsinfo fs;
287 	struct nfsstatfs sb;
288 	int error = 0, attrflag, gotfsinfo = 0, ret;
289 	struct nfsnode *np;
290 
291 	td = curthread;
292 
293 	error = vfs_busy(mp, MBF_NOWAIT);
294 	if (error)
295 		return (error);
296 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
297 	if (error) {
298 		vfs_unbusy(mp);
299 		return (error);
300 	}
301 	vp = NFSTOV(np);
302 	mtx_lock(&nmp->nm_mtx);
303 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
304 		mtx_unlock(&nmp->nm_mtx);
305 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
306 		    &attrflag, NULL);
307 		if (!error)
308 			gotfsinfo = 1;
309 	} else
310 		mtx_unlock(&nmp->nm_mtx);
311 	if (!error)
312 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
313 		    &attrflag, NULL);
314 	if (error != 0)
315 		NFSCL_DEBUG(2, "statfs=%d\n", error);
316 	if (attrflag == 0) {
317 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
318 		    td->td_ucred, td, &nfsva, NULL, NULL);
319 		if (ret) {
320 			/*
321 			 * Just set default values to get things going.
322 			 */
323 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
324 			nfsva.na_vattr.va_type = VDIR;
325 			nfsva.na_vattr.va_mode = 0777;
326 			nfsva.na_vattr.va_nlink = 100;
327 			nfsva.na_vattr.va_uid = (uid_t)0;
328 			nfsva.na_vattr.va_gid = (gid_t)0;
329 			nfsva.na_vattr.va_fileid = 2;
330 			nfsva.na_vattr.va_gen = 1;
331 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
332 			nfsva.na_vattr.va_size = 512 * 1024;
333 		}
334 	}
335 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
336 	if (!error) {
337 	    mtx_lock(&nmp->nm_mtx);
338 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
339 		nfscl_loadfsinfo(nmp, &fs);
340 	    nfscl_loadsbinfo(nmp, &sb, sbp);
341 	    sbp->f_iosize = newnfs_iosize(nmp);
342 	    mtx_unlock(&nmp->nm_mtx);
343 	    if (sbp != &mp->mnt_stat) {
344 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
345 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
346 	    }
347 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
348 	} else if (NFS_ISV4(vp)) {
349 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
350 	}
351 	vput(vp);
352 	vfs_unbusy(mp);
353 	return (error);
354 }
355 
356 /*
357  * nfs version 3 fsinfo rpc call
358  */
359 int
360 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
361     struct thread *td)
362 {
363 	struct nfsfsinfo fs;
364 	struct nfsvattr nfsva;
365 	int error, attrflag;
366 
367 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
368 	if (!error) {
369 		if (attrflag)
370 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
371 			    1);
372 		mtx_lock(&nmp->nm_mtx);
373 		nfscl_loadfsinfo(nmp, &fs);
374 		mtx_unlock(&nmp->nm_mtx);
375 	}
376 	return (error);
377 }
378 
379 /*
380  * Mount a remote root fs via. nfs. This depends on the info in the
381  * nfs_diskless structure that has been filled in properly by some primary
382  * bootstrap.
383  * It goes something like this:
384  * - do enough of "ifconfig" by calling ifioctl() so that the system
385  *   can talk to the server
386  * - If nfs_diskless.mygateway is filled in, use that address as
387  *   a default gateway.
388  * - build the rootfs mount point and call mountnfs() to do the rest.
389  *
390  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
391  * structure, as well as other global NFS client variables here, as
392  * nfs_mountroot() will be called once in the boot before any other NFS
393  * client activity occurs.
394  */
395 static int
396 nfs_mountroot(struct mount *mp)
397 {
398 	struct thread *td = curthread;
399 	struct nfsv3_diskless *nd = &nfsv3_diskless;
400 	struct socket *so;
401 	struct vnode *vp;
402 	struct ifreq ir;
403 	int error;
404 	u_long l;
405 	char buf[128];
406 	char *cp;
407 
408 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
409 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
410 #elif defined(NFS_ROOT)
411 	nfs_setup_diskless();
412 #endif
413 
414 	if (nfs_diskless_valid == 0)
415 		return (-1);
416 	if (nfs_diskless_valid == 1)
417 		nfs_convert_diskless();
418 
419 	/*
420 	 * Do enough of ifconfig(8) so that the critical net interface can
421 	 * talk to the server.
422 	 */
423 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
424 	    td->td_ucred, td);
425 	if (error)
426 		panic("nfs_mountroot: socreate(%04x): %d",
427 			nd->myif.ifra_addr.sa_family, error);
428 
429 #if 0 /* XXX Bad idea */
430 	/*
431 	 * We might not have been told the right interface, so we pass
432 	 * over the first ten interfaces of the same kind, until we get
433 	 * one of them configured.
434 	 */
435 
436 	for (i = strlen(nd->myif.ifra_name) - 1;
437 		nd->myif.ifra_name[i] >= '0' &&
438 		nd->myif.ifra_name[i] <= '9';
439 		nd->myif.ifra_name[i] ++) {
440 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
441 		if(!error)
442 			break;
443 	}
444 #endif
445 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
446 	if (error)
447 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
448 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
449 		ir.ifr_mtu = strtol(cp, NULL, 10);
450 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
451 		freeenv(cp);
452 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
453 		if (error)
454 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
455 	}
456 	soclose(so);
457 
458 	/*
459 	 * If the gateway field is filled in, set it as the default route.
460 	 * Note that pxeboot will set a default route of 0 if the route
461 	 * is not set by the DHCP server.  Check also for a value of 0
462 	 * to avoid panicking inappropriately in that situation.
463 	 */
464 	if (nd->mygateway.sin_len != 0 &&
465 	    nd->mygateway.sin_addr.s_addr != 0) {
466 		struct sockaddr_in mask, sin;
467 
468 		bzero((caddr_t)&mask, sizeof(mask));
469 		sin = mask;
470 		sin.sin_family = AF_INET;
471 		sin.sin_len = sizeof(sin);
472                 /* XXX MRT use table 0 for this sort of thing */
473 		CURVNET_SET(TD_TO_VNET(td));
474 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
475 		    (struct sockaddr *)&nd->mygateway,
476 		    (struct sockaddr *)&mask,
477 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
478 		CURVNET_RESTORE();
479 		if (error)
480 			panic("nfs_mountroot: RTM_ADD: %d", error);
481 	}
482 
483 	/*
484 	 * Create the rootfs mount point.
485 	 */
486 	nd->root_args.fh = nd->root_fh;
487 	nd->root_args.fhsize = nd->root_fhsize;
488 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
489 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
490 		(l >> 24) & 0xff, (l >> 16) & 0xff,
491 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
492 	printf("NFS ROOT: %s\n", buf);
493 	nd->root_args.hostname = buf;
494 	if ((error = nfs_mountdiskless(buf,
495 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
496 		return (error);
497 	}
498 
499 	/*
500 	 * This is not really an nfs issue, but it is much easier to
501 	 * set hostname here and then let the "/etc/rc.xxx" files
502 	 * mount the right /var based upon its preset value.
503 	 */
504 	mtx_lock(&prison0.pr_mtx);
505 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
506 	    sizeof(prison0.pr_hostname));
507 	mtx_unlock(&prison0.pr_mtx);
508 	inittodr(ntohl(nd->root_time));
509 	return (0);
510 }
511 
512 /*
513  * Internal version of mount system call for diskless setup.
514  */
515 static int
516 nfs_mountdiskless(char *path,
517     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
518     struct vnode **vpp, struct mount *mp)
519 {
520 	struct sockaddr *nam;
521 	int dirlen, error;
522 	char *dirpath;
523 
524 	/*
525 	 * Find the directory path in "path", which also has the server's
526 	 * name/ip address in it.
527 	 */
528 	dirpath = strchr(path, ':');
529 	if (dirpath != NULL)
530 		dirlen = strlen(++dirpath);
531 	else
532 		dirlen = 0;
533 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
534 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
535 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
536 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
537 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
538 		return (error);
539 	}
540 	return (0);
541 }
542 
543 static void
544 nfs_sec_name(char *sec, int *flagsp)
545 {
546 	if (!strcmp(sec, "krb5"))
547 		*flagsp |= NFSMNT_KERB;
548 	else if (!strcmp(sec, "krb5i"))
549 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
550 	else if (!strcmp(sec, "krb5p"))
551 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
552 }
553 
554 static void
555 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
556     const char *hostname, struct ucred *cred, struct thread *td)
557 {
558 	int adjsock;
559 	char *p;
560 
561 	/*
562 	 * Set read-only flag if requested; otherwise, clear it if this is
563 	 * an update.  If this is not an update, then either the read-only
564 	 * flag is already clear, or this is a root mount and it was set
565 	 * intentionally at some previous point.
566 	 */
567 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
568 		MNT_ILOCK(mp);
569 		mp->mnt_flag |= MNT_RDONLY;
570 		MNT_IUNLOCK(mp);
571 	} else if (mp->mnt_flag & MNT_UPDATE) {
572 		MNT_ILOCK(mp);
573 		mp->mnt_flag &= ~MNT_RDONLY;
574 		MNT_IUNLOCK(mp);
575 	}
576 
577 	/*
578 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
579 	 * no sense in that context.  Also, set up appropriate retransmit
580 	 * and soft timeout behavior.
581 	 */
582 	if (argp->sotype == SOCK_STREAM) {
583 		nmp->nm_flag &= ~NFSMNT_NOCONN;
584 		nmp->nm_timeo = NFS_MAXTIMEO;
585 		if ((argp->flags & NFSMNT_NFSV4) != 0)
586 			nmp->nm_retry = INT_MAX;
587 		else
588 			nmp->nm_retry = NFS_RETRANS_TCP;
589 	}
590 
591 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
592 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
593 		argp->flags &= ~NFSMNT_RDIRPLUS;
594 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
595 	}
596 
597 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
598 	if (nmp->nm_minorvers == 0) {
599 		argp->flags &= ~NFSMNT_ONEOPENOWN;
600 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
601 	}
602 
603 	/* Re-bind if rsrvd port requested and wasn't on one */
604 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
605 		  && (argp->flags & NFSMNT_RESVPORT);
606 	/* Also re-bind if we're switching to/from a connected UDP socket */
607 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
608 		    (argp->flags & NFSMNT_NOCONN));
609 
610 	/* Update flags atomically.  Don't change the lock bits. */
611 	nmp->nm_flag = argp->flags | nmp->nm_flag;
612 
613 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
614 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
615 		if (nmp->nm_timeo < NFS_MINTIMEO)
616 			nmp->nm_timeo = NFS_MINTIMEO;
617 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
618 			nmp->nm_timeo = NFS_MAXTIMEO;
619 	}
620 
621 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
622 		nmp->nm_retry = argp->retrans;
623 		if (nmp->nm_retry > NFS_MAXREXMIT)
624 			nmp->nm_retry = NFS_MAXREXMIT;
625 	}
626 
627 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
628 		nmp->nm_wsize = argp->wsize;
629 		/*
630 		 * Clip at the power of 2 below the size. There is an
631 		 * issue (not isolated) that causes intermittent page
632 		 * faults if this is not done.
633 		 */
634 		if (nmp->nm_wsize > NFS_FABLKSIZE)
635 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
636 		else
637 			nmp->nm_wsize = NFS_FABLKSIZE;
638 	}
639 
640 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
641 		nmp->nm_rsize = argp->rsize;
642 		/*
643 		 * Clip at the power of 2 below the size. There is an
644 		 * issue (not isolated) that causes intermittent page
645 		 * faults if this is not done.
646 		 */
647 		if (nmp->nm_rsize > NFS_FABLKSIZE)
648 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
649 		else
650 			nmp->nm_rsize = NFS_FABLKSIZE;
651 	}
652 
653 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
654 		nmp->nm_readdirsize = argp->readdirsize;
655 	}
656 
657 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
658 		nmp->nm_acregmin = argp->acregmin;
659 	else
660 		nmp->nm_acregmin = NFS_MINATTRTIMO;
661 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
662 		nmp->nm_acregmax = argp->acregmax;
663 	else
664 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
665 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
666 		nmp->nm_acdirmin = argp->acdirmin;
667 	else
668 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
669 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
670 		nmp->nm_acdirmax = argp->acdirmax;
671 	else
672 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
673 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
674 		nmp->nm_acdirmin = nmp->nm_acdirmax;
675 	if (nmp->nm_acregmin > nmp->nm_acregmax)
676 		nmp->nm_acregmin = nmp->nm_acregmax;
677 
678 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
679 		if (argp->readahead <= NFS_MAXRAHEAD)
680 			nmp->nm_readahead = argp->readahead;
681 		else
682 			nmp->nm_readahead = NFS_MAXRAHEAD;
683 	}
684 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
685 		if (argp->wcommitsize < nmp->nm_wsize)
686 			nmp->nm_wcommitsize = nmp->nm_wsize;
687 		else
688 			nmp->nm_wcommitsize = argp->wcommitsize;
689 	}
690 
691 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
692 		    (nmp->nm_soproto != argp->proto));
693 
694 	if (nmp->nm_client != NULL && adjsock) {
695 		int haslock = 0, error = 0;
696 
697 		if (nmp->nm_sotype == SOCK_STREAM) {
698 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
699 			if (!error)
700 				haslock = 1;
701 		}
702 		if (!error) {
703 		    newnfs_disconnect(&nmp->nm_sockreq);
704 		    if (haslock)
705 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
706 		    nmp->nm_sotype = argp->sotype;
707 		    nmp->nm_soproto = argp->proto;
708 		    if (nmp->nm_sotype == SOCK_DGRAM)
709 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
710 			    cred, td, 0)) {
711 				printf("newnfs_args: retrying connect\n");
712 				(void) nfs_catnap(PSOCK, 0, "nfscon");
713 			}
714 		}
715 	} else {
716 		nmp->nm_sotype = argp->sotype;
717 		nmp->nm_soproto = argp->proto;
718 	}
719 
720 	if (hostname != NULL) {
721 		strlcpy(nmp->nm_hostname, hostname,
722 		    sizeof(nmp->nm_hostname));
723 		p = strchr(nmp->nm_hostname, ':');
724 		if (p != NULL)
725 			*p = '\0';
726 	}
727 }
728 
729 static const char *nfs_opts[] = { "from", "nfs_args",
730     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
731     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
732     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
733     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
734     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
735     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
736     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
737     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
738     "pnfs", "wcommitsize", "oneopenown",
739     NULL };
740 
741 /*
742  * Parse the "from" mountarg, passed by the generic mount(8) program
743  * or the mountroot code.  This is used when rerooting into NFS.
744  *
745  * Note that the "hostname" is actually a "hostname:/share/path" string.
746  */
747 static int
748 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
749     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
750 {
751 	char *nam, *delimp, *hostp, *spec;
752 	int error, have_bracket = 0, offset, rv, speclen;
753 	struct sockaddr_in *sin;
754 	size_t len;
755 
756 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
757 	if (error != 0)
758 		return (error);
759 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
760 
761 	/*
762 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
763 	 */
764 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
765 	    *(delimp + 1) == ':') {
766 		hostp = spec + 1;
767 		spec = delimp + 2;
768 		have_bracket = 1;
769 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
770 		hostp = spec;
771 		spec = delimp + 1;
772 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
773 		printf("%s: path@server syntax is deprecated, "
774 		    "use server:path\n", __func__);
775 		hostp = delimp + 1;
776 	} else {
777 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
778 		free(nam, M_TEMP);
779 		return (EINVAL);
780 	}
781 	*delimp = '\0';
782 
783 	/*
784 	 * If there has been a trailing slash at mounttime it seems
785 	 * that some mountd implementations fail to remove the mount
786 	 * entries from their mountlist while unmounting.
787 	 */
788 	for (speclen = strlen(spec);
789 	    speclen > 1 && spec[speclen - 1] == '/';
790 	    speclen--)
791 		spec[speclen - 1] = '\0';
792 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
793 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
794 		free(nam, M_TEMP);
795 		return (EINVAL);
796 	}
797 	/* Make both '@' and ':' notations equal */
798 	if (*hostp != '\0') {
799 		len = strlen(hostp);
800 		offset = 0;
801 		if (have_bracket)
802 			nam[offset++] = '[';
803 		memmove(nam + offset, hostp, len);
804 		if (have_bracket)
805 			nam[len + offset++] = ']';
806 		nam[len + offset++] = ':';
807 		memmove(nam + len + offset, spec, speclen);
808 		nam[len + speclen + offset] = '\0';
809 	} else
810 		nam[0] = '\0';
811 
812 	/*
813 	 * XXX: IPv6
814 	 */
815 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
816 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
817 	if (rv != 1) {
818 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
819 		    __func__, hostp, rv);
820 		free(nam, M_TEMP);
821 		free(sin, M_SONAME);
822 		return (EINVAL);
823 	}
824 
825 	sin->sin_len = sizeof(*sin);
826 	sin->sin_family = AF_INET;
827 	/*
828 	 * XXX: hardcoded port number.
829 	 */
830 	sin->sin_port = htons(2049);
831 
832 	*hostnamep = strdup(nam, M_NEWNFSMNT);
833 	*sinp = sin;
834 	strlcpy(dirpath, spec, dirpathsize);
835 	*dirlenp = strlen(dirpath);
836 
837 	free(nam, M_TEMP);
838 	return (0);
839 }
840 
841 /*
842  * VFS Operations.
843  *
844  * mount system call
845  * It seems a bit dumb to copyinstr() the host and path here and then
846  * bcopy() them in mountnfs(), but I wanted to detect errors before
847  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
848  * an error after that means that I have to release the mbuf.
849  */
850 /* ARGSUSED */
851 static int
852 nfs_mount(struct mount *mp)
853 {
854 	struct nfs_args args = {
855 	    .version = NFS_ARGSVERSION,
856 	    .addr = NULL,
857 	    .addrlen = sizeof (struct sockaddr_in),
858 	    .sotype = SOCK_STREAM,
859 	    .proto = 0,
860 	    .fh = NULL,
861 	    .fhsize = 0,
862 	    .flags = NFSMNT_RESVPORT,
863 	    .wsize = NFS_WSIZE,
864 	    .rsize = NFS_RSIZE,
865 	    .readdirsize = NFS_READDIRSIZE,
866 	    .timeo = 10,
867 	    .retrans = NFS_RETRANS,
868 	    .readahead = NFS_DEFRAHEAD,
869 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
870 	    .hostname = NULL,
871 	    .acregmin = NFS_MINATTRTIMO,
872 	    .acregmax = NFS_MAXATTRTIMO,
873 	    .acdirmin = NFS_MINDIRATTRTIMO,
874 	    .acdirmax = NFS_MAXDIRATTRTIMO,
875 	};
876 	int error = 0, ret, len;
877 	struct sockaddr *nam = NULL;
878 	struct vnode *vp;
879 	struct thread *td;
880 	char *hst;
881 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
882 	char *cp, *opt, *name, *secname;
883 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
884 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
885 	int minvers = 0;
886 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
887 	    krbnamelen, srvkrbnamelen;
888 	size_t hstlen;
889 
890 	has_nfs_args_opt = 0;
891 	has_nfs_from_opt = 0;
892 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
893 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
894 		error = EINVAL;
895 		goto out;
896 	}
897 
898 	td = curthread;
899 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
900 	    nfs_diskless_valid != 0) {
901 		error = nfs_mountroot(mp);
902 		goto out;
903 	}
904 
905 	nfscl_init();
906 
907 	/*
908 	 * The old mount_nfs program passed the struct nfs_args
909 	 * from userspace to kernel.  The new mount_nfs program
910 	 * passes string options via nmount() from userspace to kernel
911 	 * and we populate the struct nfs_args in the kernel.
912 	 */
913 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
914 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
915 		    sizeof(args));
916 		if (error != 0)
917 			goto out;
918 
919 		if (args.version != NFS_ARGSVERSION) {
920 			error = EPROGMISMATCH;
921 			goto out;
922 		}
923 		has_nfs_args_opt = 1;
924 	}
925 
926 	/* Handle the new style options. */
927 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
928 		args.acdirmin = args.acdirmax =
929 		    args.acregmin = args.acregmax = 0;
930 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
931 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
932 	}
933 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
934 		args.flags |= NFSMNT_NOCONN;
935 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
936 		args.flags &= ~NFSMNT_NOCONN;
937 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
938 		args.flags |= NFSMNT_NOLOCKD;
939 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
940 		args.flags &= ~NFSMNT_NOLOCKD;
941 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
942 		args.flags |= NFSMNT_INT;
943 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
944 		args.flags |= NFSMNT_RDIRPLUS;
945 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
946 		args.flags |= NFSMNT_RESVPORT;
947 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
948 		args.flags &= ~NFSMNT_RESVPORT;
949 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
950 		args.flags |= NFSMNT_SOFT;
951 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
952 		args.flags &= ~NFSMNT_SOFT;
953 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
954 		args.sotype = SOCK_DGRAM;
955 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
956 		args.sotype = SOCK_DGRAM;
957 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
958 		args.sotype = SOCK_STREAM;
959 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
960 		args.flags |= NFSMNT_NFSV3;
961 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
962 		args.flags |= NFSMNT_NFSV4;
963 		args.sotype = SOCK_STREAM;
964 	}
965 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
966 		args.flags |= NFSMNT_ALLGSSNAME;
967 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
968 		args.flags |= NFSMNT_NOCTO;
969 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
970 		args.flags |= NFSMNT_NONCONTIGWR;
971 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
972 		args.flags |= NFSMNT_PNFS;
973 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
974 		args.flags |= NFSMNT_ONEOPENOWN;
975 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
976 		if (opt == NULL) {
977 			vfs_mount_error(mp, "illegal readdirsize");
978 			error = EINVAL;
979 			goto out;
980 		}
981 		ret = sscanf(opt, "%d", &args.readdirsize);
982 		if (ret != 1 || args.readdirsize <= 0) {
983 			vfs_mount_error(mp, "illegal readdirsize: %s",
984 			    opt);
985 			error = EINVAL;
986 			goto out;
987 		}
988 		args.flags |= NFSMNT_READDIRSIZE;
989 	}
990 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
991 		if (opt == NULL) {
992 			vfs_mount_error(mp, "illegal readahead");
993 			error = EINVAL;
994 			goto out;
995 		}
996 		ret = sscanf(opt, "%d", &args.readahead);
997 		if (ret != 1 || args.readahead <= 0) {
998 			vfs_mount_error(mp, "illegal readahead: %s",
999 			    opt);
1000 			error = EINVAL;
1001 			goto out;
1002 		}
1003 		args.flags |= NFSMNT_READAHEAD;
1004 	}
1005 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1006 		if (opt == NULL) {
1007 			vfs_mount_error(mp, "illegal wsize");
1008 			error = EINVAL;
1009 			goto out;
1010 		}
1011 		ret = sscanf(opt, "%d", &args.wsize);
1012 		if (ret != 1 || args.wsize <= 0) {
1013 			vfs_mount_error(mp, "illegal wsize: %s",
1014 			    opt);
1015 			error = EINVAL;
1016 			goto out;
1017 		}
1018 		args.flags |= NFSMNT_WSIZE;
1019 	}
1020 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1021 		if (opt == NULL) {
1022 			vfs_mount_error(mp, "illegal rsize");
1023 			error = EINVAL;
1024 			goto out;
1025 		}
1026 		ret = sscanf(opt, "%d", &args.rsize);
1027 		if (ret != 1 || args.rsize <= 0) {
1028 			vfs_mount_error(mp, "illegal wsize: %s",
1029 			    opt);
1030 			error = EINVAL;
1031 			goto out;
1032 		}
1033 		args.flags |= NFSMNT_RSIZE;
1034 	}
1035 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1036 		if (opt == NULL) {
1037 			vfs_mount_error(mp, "illegal retrans");
1038 			error = EINVAL;
1039 			goto out;
1040 		}
1041 		ret = sscanf(opt, "%d", &args.retrans);
1042 		if (ret != 1 || args.retrans <= 0) {
1043 			vfs_mount_error(mp, "illegal retrans: %s",
1044 			    opt);
1045 			error = EINVAL;
1046 			goto out;
1047 		}
1048 		args.flags |= NFSMNT_RETRANS;
1049 	}
1050 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1051 		ret = sscanf(opt, "%d", &args.acregmin);
1052 		if (ret != 1 || args.acregmin < 0) {
1053 			vfs_mount_error(mp, "illegal actimeo: %s",
1054 			    opt);
1055 			error = EINVAL;
1056 			goto out;
1057 		}
1058 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1059 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1060 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1061 	}
1062 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1063 		ret = sscanf(opt, "%d", &args.acregmin);
1064 		if (ret != 1 || args.acregmin < 0) {
1065 			vfs_mount_error(mp, "illegal acregmin: %s",
1066 			    opt);
1067 			error = EINVAL;
1068 			goto out;
1069 		}
1070 		args.flags |= NFSMNT_ACREGMIN;
1071 	}
1072 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1073 		ret = sscanf(opt, "%d", &args.acregmax);
1074 		if (ret != 1 || args.acregmax < 0) {
1075 			vfs_mount_error(mp, "illegal acregmax: %s",
1076 			    opt);
1077 			error = EINVAL;
1078 			goto out;
1079 		}
1080 		args.flags |= NFSMNT_ACREGMAX;
1081 	}
1082 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1083 		ret = sscanf(opt, "%d", &args.acdirmin);
1084 		if (ret != 1 || args.acdirmin < 0) {
1085 			vfs_mount_error(mp, "illegal acdirmin: %s",
1086 			    opt);
1087 			error = EINVAL;
1088 			goto out;
1089 		}
1090 		args.flags |= NFSMNT_ACDIRMIN;
1091 	}
1092 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1093 		ret = sscanf(opt, "%d", &args.acdirmax);
1094 		if (ret != 1 || args.acdirmax < 0) {
1095 			vfs_mount_error(mp, "illegal acdirmax: %s",
1096 			    opt);
1097 			error = EINVAL;
1098 			goto out;
1099 		}
1100 		args.flags |= NFSMNT_ACDIRMAX;
1101 	}
1102 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1103 		ret = sscanf(opt, "%d", &args.wcommitsize);
1104 		if (ret != 1 || args.wcommitsize < 0) {
1105 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1106 			error = EINVAL;
1107 			goto out;
1108 		}
1109 		args.flags |= NFSMNT_WCOMMITSIZE;
1110 	}
1111 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1112 		ret = sscanf(opt, "%d", &args.timeo);
1113 		if (ret != 1 || args.timeo <= 0) {
1114 			vfs_mount_error(mp, "illegal timeo: %s",
1115 			    opt);
1116 			error = EINVAL;
1117 			goto out;
1118 		}
1119 		args.flags |= NFSMNT_TIMEO;
1120 	}
1121 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1122 		ret = sscanf(opt, "%d", &args.timeo);
1123 		if (ret != 1 || args.timeo <= 0) {
1124 			vfs_mount_error(mp, "illegal timeout: %s",
1125 			    opt);
1126 			error = EINVAL;
1127 			goto out;
1128 		}
1129 		args.flags |= NFSMNT_TIMEO;
1130 	}
1131 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1132 		ret = sscanf(opt, "%d", &nametimeo);
1133 		if (ret != 1 || nametimeo < 0) {
1134 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1135 			error = EINVAL;
1136 			goto out;
1137 		}
1138 	}
1139 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1140 	    == 0) {
1141 		ret = sscanf(opt, "%d", &negnametimeo);
1142 		if (ret != 1 || negnametimeo < 0) {
1143 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1144 			    opt);
1145 			error = EINVAL;
1146 			goto out;
1147 		}
1148 	}
1149 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1150 	    0) {
1151 		ret = sscanf(opt, "%d", &minvers);
1152 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1153 		    (args.flags & NFSMNT_NFSV4) == 0) {
1154 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1155 			error = EINVAL;
1156 			goto out;
1157 		}
1158 	}
1159 	if (vfs_getopt(mp->mnt_optnew, "sec",
1160 		(void **) &secname, NULL) == 0)
1161 		nfs_sec_name(secname, &args.flags);
1162 
1163 	if (mp->mnt_flag & MNT_UPDATE) {
1164 		struct nfsmount *nmp = VFSTONFS(mp);
1165 
1166 		if (nmp == NULL) {
1167 			error = EIO;
1168 			goto out;
1169 		}
1170 
1171 		/*
1172 		 * If a change from TCP->UDP is done and there are thread(s)
1173 		 * that have I/O RPC(s) in progress with a transfer size
1174 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1175 		 * hung, retrying the RPC(s) forever. Usually these threads
1176 		 * will be seen doing an uninterruptible sleep on wait channel
1177 		 * "nfsreq".
1178 		 */
1179 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1180 			tprintf(td->td_proc, LOG_WARNING,
1181 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1182 
1183 		/*
1184 		 * When doing an update, we can't change version,
1185 		 * security, switch lockd strategies, change cookie
1186 		 * translation or switch oneopenown.
1187 		 */
1188 		args.flags = (args.flags &
1189 		    ~(NFSMNT_NFSV3 |
1190 		      NFSMNT_NFSV4 |
1191 		      NFSMNT_KERB |
1192 		      NFSMNT_INTEGRITY |
1193 		      NFSMNT_PRIVACY |
1194 		      NFSMNT_ONEOPENOWN |
1195 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1196 		    (nmp->nm_flag &
1197 			(NFSMNT_NFSV3 |
1198 			 NFSMNT_NFSV4 |
1199 			 NFSMNT_KERB |
1200 			 NFSMNT_INTEGRITY |
1201 			 NFSMNT_PRIVACY |
1202 			 NFSMNT_ONEOPENOWN |
1203 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1204 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1205 		goto out;
1206 	}
1207 
1208 	/*
1209 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1210 	 * or no-connection mode for those protocols that support
1211 	 * no-connection mode (the flag will be cleared later for protocols
1212 	 * that do not support no-connection mode).  This will allow a client
1213 	 * to receive replies from a different IP then the request was
1214 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1215 	 * not 0.
1216 	 */
1217 	if (nfs_ip_paranoia == 0)
1218 		args.flags |= NFSMNT_NOCONN;
1219 
1220 	if (has_nfs_args_opt != 0) {
1221 		/*
1222 		 * In the 'nfs_args' case, the pointers in the args
1223 		 * structure are in userland - we copy them in here.
1224 		 */
1225 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1226 			vfs_mount_error(mp, "Bad file handle");
1227 			error = EINVAL;
1228 			goto out;
1229 		}
1230 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1231 		    args.fhsize);
1232 		if (error != 0)
1233 			goto out;
1234 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1235 		if (error != 0)
1236 			goto out;
1237 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1238 		args.hostname = hst;
1239 		/* getsockaddr() call must be after above copyin() calls */
1240 		error = getsockaddr(&nam, (caddr_t)args.addr,
1241 		    args.addrlen);
1242 		if (error != 0)
1243 			goto out;
1244 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1245 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1246 	    sizeof(dirpath), &dirlen) == 0) {
1247 		has_nfs_from_opt = 1;
1248 		bcopy(args.hostname, hst, MNAMELEN);
1249 		hst[MNAMELEN - 1] = '\0';
1250 
1251 		/*
1252 		 * This only works with NFSv4 for now.
1253 		 */
1254 		args.fhsize = 0;
1255 		args.flags |= NFSMNT_NFSV4;
1256 		args.sotype = SOCK_STREAM;
1257 	} else {
1258 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1259 		    &args.fhsize) == 0) {
1260 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1261 				vfs_mount_error(mp, "Bad file handle");
1262 				error = EINVAL;
1263 				goto out;
1264 			}
1265 			bcopy(args.fh, nfh, args.fhsize);
1266 		} else {
1267 			args.fhsize = 0;
1268 		}
1269 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1270 		    (void **)&args.hostname, &len);
1271 		if (args.hostname == NULL) {
1272 			vfs_mount_error(mp, "Invalid hostname");
1273 			error = EINVAL;
1274 			goto out;
1275 		}
1276 		if (len >= MNAMELEN) {
1277 			vfs_mount_error(mp, "Hostname too long");
1278 			error = EINVAL;
1279 			goto out;
1280 		}
1281 		bcopy(args.hostname, hst, len);
1282 		hst[len] = '\0';
1283 	}
1284 
1285 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1286 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1287 	else {
1288 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1289 		cp = strchr(srvkrbname, ':');
1290 		if (cp != NULL)
1291 			*cp = '\0';
1292 	}
1293 	srvkrbnamelen = strlen(srvkrbname);
1294 
1295 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1296 		strlcpy(krbname, name, sizeof (krbname));
1297 	else
1298 		krbname[0] = '\0';
1299 	krbnamelen = strlen(krbname);
1300 
1301 	if (has_nfs_from_opt == 0) {
1302 		if (vfs_getopt(mp->mnt_optnew,
1303 		    "dirpath", (void **)&name, NULL) == 0)
1304 			strlcpy(dirpath, name, sizeof (dirpath));
1305 		else
1306 			dirpath[0] = '\0';
1307 		dirlen = strlen(dirpath);
1308 	}
1309 
1310 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1311 		if (vfs_getopt(mp->mnt_optnew, "addr",
1312 		    (void **)&args.addr, &args.addrlen) == 0) {
1313 			if (args.addrlen > SOCK_MAXADDRLEN) {
1314 				error = ENAMETOOLONG;
1315 				goto out;
1316 			}
1317 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1318 			bcopy(args.addr, nam, args.addrlen);
1319 			nam->sa_len = args.addrlen;
1320 		} else {
1321 			vfs_mount_error(mp, "No server address");
1322 			error = EINVAL;
1323 			goto out;
1324 		}
1325 	}
1326 
1327 	args.fh = nfh;
1328 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1329 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1330 	    nametimeo, negnametimeo, minvers);
1331 out:
1332 	if (!error) {
1333 		MNT_ILOCK(mp);
1334 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1335 		    MNTK_USES_BCACHE;
1336 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1337 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1338 		MNT_IUNLOCK(mp);
1339 	}
1340 	free(hst, M_TEMP);
1341 	return (error);
1342 }
1343 
1344 
1345 /*
1346  * VFS Operations.
1347  *
1348  * mount system call
1349  * It seems a bit dumb to copyinstr() the host and path here and then
1350  * bcopy() them in mountnfs(), but I wanted to detect errors before
1351  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1352  * an error after that means that I have to release the mbuf.
1353  */
1354 /* ARGSUSED */
1355 static int
1356 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1357 {
1358 	int error;
1359 	struct nfs_args args;
1360 
1361 	error = copyin(data, &args, sizeof (struct nfs_args));
1362 	if (error)
1363 		return error;
1364 
1365 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1366 
1367 	error = kernel_mount(ma, flags);
1368 	return (error);
1369 }
1370 
1371 /*
1372  * Common code for mount and mountroot
1373  */
1374 static int
1375 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1376     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1377     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1378     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1379     int minvers)
1380 {
1381 	struct nfsmount *nmp;
1382 	struct nfsnode *np;
1383 	int error, trycnt, ret;
1384 	struct nfsvattr nfsva;
1385 	struct nfsclclient *clp;
1386 	struct nfsclds *dsp, *tdsp;
1387 	uint32_t lease;
1388 	static u_int64_t clval = 0;
1389 
1390 	NFSCL_DEBUG(3, "in mnt\n");
1391 	clp = NULL;
1392 	if (mp->mnt_flag & MNT_UPDATE) {
1393 		nmp = VFSTONFS(mp);
1394 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1395 		FREE(nam, M_SONAME);
1396 		return (0);
1397 	} else {
1398 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1399 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1400 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1401 		TAILQ_INIT(&nmp->nm_bufq);
1402 		TAILQ_INIT(&nmp->nm_sess);
1403 		if (clval == 0)
1404 			clval = (u_int64_t)nfsboottime.tv_sec;
1405 		nmp->nm_clval = clval++;
1406 		nmp->nm_krbnamelen = krbnamelen;
1407 		nmp->nm_dirpathlen = dirlen;
1408 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1409 		if (td->td_ucred->cr_uid != (uid_t)0) {
1410 			/*
1411 			 * nm_uid is used to get KerberosV credentials for
1412 			 * the nfsv4 state handling operations if there is
1413 			 * no host based principal set. Use the uid of
1414 			 * this user if not root, since they are doing the
1415 			 * mount. I don't think setting this for root will
1416 			 * work, since root normally does not have user
1417 			 * credentials in a credentials cache.
1418 			 */
1419 			nmp->nm_uid = td->td_ucred->cr_uid;
1420 		} else {
1421 			/*
1422 			 * Just set to -1, so it won't be used.
1423 			 */
1424 			nmp->nm_uid = (uid_t)-1;
1425 		}
1426 
1427 		/* Copy and null terminate all the names */
1428 		if (nmp->nm_krbnamelen > 0) {
1429 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1430 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1431 		}
1432 		if (nmp->nm_dirpathlen > 0) {
1433 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1434 			    nmp->nm_dirpathlen);
1435 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1436 			    + 1] = '\0';
1437 		}
1438 		if (nmp->nm_srvkrbnamelen > 0) {
1439 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1440 			    nmp->nm_srvkrbnamelen);
1441 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1442 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1443 		}
1444 		nmp->nm_sockreq.nr_cred = crhold(cred);
1445 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1446 		mp->mnt_data = nmp;
1447 		nmp->nm_getinfo = nfs_getnlminfo;
1448 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1449 	}
1450 	vfs_getnewfsid(mp);
1451 	nmp->nm_mountp = mp;
1452 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1453 
1454 	/*
1455 	 * Since nfs_decode_args() might optionally set them, these
1456 	 * need to be set to defaults before the call, so that the
1457 	 * optional settings aren't overwritten.
1458 	 */
1459 	nmp->nm_nametimeo = nametimeo;
1460 	nmp->nm_negnametimeo = negnametimeo;
1461 	nmp->nm_timeo = NFS_TIMEO;
1462 	nmp->nm_retry = NFS_RETRANS;
1463 	nmp->nm_readahead = NFS_DEFRAHEAD;
1464 
1465 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1466 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1467 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1468 		nmp->nm_wcommitsize *= 2;
1469 	nmp->nm_wcommitsize *= 256;
1470 
1471 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1472 		nmp->nm_minorvers = minvers;
1473 	else
1474 		nmp->nm_minorvers = 0;
1475 
1476 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1477 
1478 	/*
1479 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1480 	 * high, depending on whether we end up with negative offsets in
1481 	 * the client or server somewhere.  2GB-1 may be safer.
1482 	 *
1483 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1484 	 * that we can handle until we find out otherwise.
1485 	 */
1486 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1487 		nmp->nm_maxfilesize = 0xffffffffLL;
1488 	else
1489 		nmp->nm_maxfilesize = OFF_MAX;
1490 
1491 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1492 		nmp->nm_wsize = NFS_WSIZE;
1493 		nmp->nm_rsize = NFS_RSIZE;
1494 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1495 	}
1496 	nmp->nm_numgrps = NFS_MAXGRPS;
1497 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1498 	if (nmp->nm_tprintf_delay < 0)
1499 		nmp->nm_tprintf_delay = 0;
1500 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1501 	if (nmp->nm_tprintf_initial_delay < 0)
1502 		nmp->nm_tprintf_initial_delay = 0;
1503 	nmp->nm_fhsize = argp->fhsize;
1504 	if (nmp->nm_fhsize > 0)
1505 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1506 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1507 	nmp->nm_nam = nam;
1508 	/* Set up the sockets and per-host congestion */
1509 	nmp->nm_sotype = argp->sotype;
1510 	nmp->nm_soproto = argp->proto;
1511 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1512 	if ((argp->flags & NFSMNT_NFSV4))
1513 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1514 	else if ((argp->flags & NFSMNT_NFSV3))
1515 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1516 	else
1517 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1518 
1519 
1520 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1521 		goto bad;
1522 	/* For NFSv4.1, get the clientid now. */
1523 	if (nmp->nm_minorvers > 0) {
1524 		NFSCL_DEBUG(3, "at getcl\n");
1525 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1526 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1527 		if (error != 0)
1528 			goto bad;
1529 	}
1530 
1531 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1532 	    nmp->nm_dirpathlen > 0) {
1533 		NFSCL_DEBUG(3, "in dirp\n");
1534 		/*
1535 		 * If the fhsize on the mount point == 0 for V4, the mount
1536 		 * path needs to be looked up.
1537 		 */
1538 		trycnt = 3;
1539 		do {
1540 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1541 			    cred, td);
1542 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1543 			if (error)
1544 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1545 		} while (error && --trycnt > 0);
1546 		if (error) {
1547 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1548 			goto bad;
1549 		}
1550 	}
1551 
1552 	/*
1553 	 * A reference count is needed on the nfsnode representing the
1554 	 * remote root.  If this object is not persistent, then backward
1555 	 * traversals of the mount point (i.e. "..") will not work if
1556 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1557 	 * this problem, because one can identify root inodes by their
1558 	 * number == UFS_ROOTINO (2).
1559 	 */
1560 	if (nmp->nm_fhsize > 0) {
1561 		/*
1562 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1563 		 * non-zero for the root vnode. f_iosize will be set correctly
1564 		 * by nfs_statfs() before any I/O occurs.
1565 		 */
1566 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1567 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1568 		    LK_EXCLUSIVE);
1569 		if (error)
1570 			goto bad;
1571 		*vpp = NFSTOV(np);
1572 
1573 		/*
1574 		 * Get file attributes and transfer parameters for the
1575 		 * mountpoint.  This has the side effect of filling in
1576 		 * (*vpp)->v_type with the correct value.
1577 		 */
1578 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1579 		    cred, td, &nfsva, NULL, &lease);
1580 		if (ret) {
1581 			/*
1582 			 * Just set default values to get things going.
1583 			 */
1584 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1585 			nfsva.na_vattr.va_type = VDIR;
1586 			nfsva.na_vattr.va_mode = 0777;
1587 			nfsva.na_vattr.va_nlink = 100;
1588 			nfsva.na_vattr.va_uid = (uid_t)0;
1589 			nfsva.na_vattr.va_gid = (gid_t)0;
1590 			nfsva.na_vattr.va_fileid = 2;
1591 			nfsva.na_vattr.va_gen = 1;
1592 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1593 			nfsva.na_vattr.va_size = 512 * 1024;
1594 			lease = 60;
1595 		}
1596 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1597 		if (nmp->nm_minorvers > 0) {
1598 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1599 			NFSLOCKCLSTATE();
1600 			clp->nfsc_renew = NFSCL_RENEW(lease);
1601 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1602 			clp->nfsc_clientidrev++;
1603 			if (clp->nfsc_clientidrev == 0)
1604 				clp->nfsc_clientidrev++;
1605 			NFSUNLOCKCLSTATE();
1606 			/*
1607 			 * Mount will succeed, so the renew thread can be
1608 			 * started now.
1609 			 */
1610 			nfscl_start_renewthread(clp);
1611 			nfscl_clientrelease(clp);
1612 		}
1613 		if (argp->flags & NFSMNT_NFSV3)
1614 			ncl_fsinfo(nmp, *vpp, cred, td);
1615 
1616 		/* Mark if the mount point supports NFSv4 ACLs. */
1617 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1618 		    ret == 0 &&
1619 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1620 			MNT_ILOCK(mp);
1621 			mp->mnt_flag |= MNT_NFS4ACLS;
1622 			MNT_IUNLOCK(mp);
1623 		}
1624 
1625 		/*
1626 		 * Lose the lock but keep the ref.
1627 		 */
1628 		NFSVOPUNLOCK(*vpp, 0);
1629 		return (0);
1630 	}
1631 	error = EIO;
1632 
1633 bad:
1634 	if (clp != NULL)
1635 		nfscl_clientrelease(clp);
1636 	newnfs_disconnect(&nmp->nm_sockreq);
1637 	crfree(nmp->nm_sockreq.nr_cred);
1638 	if (nmp->nm_sockreq.nr_auth != NULL)
1639 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1640 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1641 	mtx_destroy(&nmp->nm_mtx);
1642 	if (nmp->nm_clp != NULL) {
1643 		NFSLOCKCLSTATE();
1644 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1645 		NFSUNLOCKCLSTATE();
1646 		free(nmp->nm_clp, M_NFSCLCLIENT);
1647 	}
1648 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1649 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1650 		    dsp->nfsclds_sockp != NULL)
1651 			newnfs_disconnect(dsp->nfsclds_sockp);
1652 		nfscl_freenfsclds(dsp);
1653 	}
1654 	FREE(nmp, M_NEWNFSMNT);
1655 	FREE(nam, M_SONAME);
1656 	return (error);
1657 }
1658 
1659 /*
1660  * unmount system call
1661  */
1662 static int
1663 nfs_unmount(struct mount *mp, int mntflags)
1664 {
1665 	struct thread *td;
1666 	struct nfsmount *nmp;
1667 	int error, flags = 0, i, trycnt = 0;
1668 	struct nfsclds *dsp, *tdsp;
1669 
1670 	td = curthread;
1671 
1672 	if (mntflags & MNT_FORCE)
1673 		flags |= FORCECLOSE;
1674 	nmp = VFSTONFS(mp);
1675 	/*
1676 	 * Goes something like this..
1677 	 * - Call vflush() to clear out vnodes for this filesystem
1678 	 * - Close the socket
1679 	 * - Free up the data structures
1680 	 */
1681 	/* In the forced case, cancel any outstanding requests. */
1682 	if (mntflags & MNT_FORCE) {
1683 		error = newnfs_nmcancelreqs(nmp);
1684 		if (error)
1685 			goto out;
1686 		/* For a forced close, get rid of the renew thread now */
1687 		nfscl_umount(nmp, td);
1688 	}
1689 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1690 	do {
1691 		error = vflush(mp, 1, flags, td);
1692 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1693 			(void) nfs_catnap(PSOCK, error, "newndm");
1694 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1695 	if (error)
1696 		goto out;
1697 
1698 	/*
1699 	 * We are now committed to the unmount.
1700 	 */
1701 	if ((mntflags & MNT_FORCE) == 0)
1702 		nfscl_umount(nmp, td);
1703 	else {
1704 		mtx_lock(&nmp->nm_mtx);
1705 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1706 		mtx_unlock(&nmp->nm_mtx);
1707 	}
1708 	/* Make sure no nfsiods are assigned to this mount. */
1709 	mtx_lock(&ncl_iod_mutex);
1710 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1711 		if (ncl_iodmount[i] == nmp) {
1712 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1713 			ncl_iodmount[i] = NULL;
1714 		}
1715 	mtx_unlock(&ncl_iod_mutex);
1716 
1717 	/*
1718 	 * We can now set mnt_data to NULL and wait for
1719 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1720 	 */
1721 	mtx_lock(&mountlist_mtx);
1722 	mtx_lock(&nmp->nm_mtx);
1723 	mp->mnt_data = NULL;
1724 	mtx_unlock(&mountlist_mtx);
1725 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1726 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1727 	mtx_unlock(&nmp->nm_mtx);
1728 
1729 	newnfs_disconnect(&nmp->nm_sockreq);
1730 	crfree(nmp->nm_sockreq.nr_cred);
1731 	FREE(nmp->nm_nam, M_SONAME);
1732 	if (nmp->nm_sockreq.nr_auth != NULL)
1733 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1734 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1735 	mtx_destroy(&nmp->nm_mtx);
1736 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1737 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1738 		    dsp->nfsclds_sockp != NULL)
1739 			newnfs_disconnect(dsp->nfsclds_sockp);
1740 		nfscl_freenfsclds(dsp);
1741 	}
1742 	FREE(nmp, M_NEWNFSMNT);
1743 out:
1744 	return (error);
1745 }
1746 
1747 /*
1748  * Return root of a filesystem
1749  */
1750 static int
1751 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1752 {
1753 	struct vnode *vp;
1754 	struct nfsmount *nmp;
1755 	struct nfsnode *np;
1756 	int error;
1757 
1758 	nmp = VFSTONFS(mp);
1759 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1760 	if (error)
1761 		return error;
1762 	vp = NFSTOV(np);
1763 	/*
1764 	 * Get transfer parameters and attributes for root vnode once.
1765 	 */
1766 	mtx_lock(&nmp->nm_mtx);
1767 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1768 		mtx_unlock(&nmp->nm_mtx);
1769 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1770 	} else
1771 		mtx_unlock(&nmp->nm_mtx);
1772 	if (vp->v_type == VNON)
1773 	    vp->v_type = VDIR;
1774 	vp->v_vflag |= VV_ROOT;
1775 	*vpp = vp;
1776 	return (0);
1777 }
1778 
1779 /*
1780  * Flush out the buffer cache
1781  */
1782 /* ARGSUSED */
1783 static int
1784 nfs_sync(struct mount *mp, int waitfor)
1785 {
1786 	struct vnode *vp, *mvp;
1787 	struct thread *td;
1788 	int error, allerror = 0;
1789 
1790 	td = curthread;
1791 
1792 	MNT_ILOCK(mp);
1793 	/*
1794 	 * If a forced dismount is in progress, return from here so that
1795 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1796 	 * calling VFS_UNMOUNT().
1797 	 */
1798 	if (NFSCL_FORCEDISM(mp)) {
1799 		MNT_IUNLOCK(mp);
1800 		return (EBADF);
1801 	}
1802 	MNT_IUNLOCK(mp);
1803 
1804 	/*
1805 	 * Force stale buffer cache information to be flushed.
1806 	 */
1807 loop:
1808 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1809 		/* XXX Racy bv_cnt check. */
1810 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1811 		    waitfor == MNT_LAZY) {
1812 			VI_UNLOCK(vp);
1813 			continue;
1814 		}
1815 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1816 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1817 			goto loop;
1818 		}
1819 		error = VOP_FSYNC(vp, waitfor, td);
1820 		if (error)
1821 			allerror = error;
1822 		NFSVOPUNLOCK(vp, 0);
1823 		vrele(vp);
1824 	}
1825 	return (allerror);
1826 }
1827 
1828 static int
1829 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1830 {
1831 	struct nfsmount *nmp = VFSTONFS(mp);
1832 	struct vfsquery vq;
1833 	int error;
1834 
1835 	bzero(&vq, sizeof(vq));
1836 	switch (op) {
1837 #if 0
1838 	case VFS_CTL_NOLOCKS:
1839 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1840  		if (req->oldptr != NULL) {
1841  			error = SYSCTL_OUT(req, &val, sizeof(val));
1842  			if (error)
1843  				return (error);
1844  		}
1845  		if (req->newptr != NULL) {
1846  			error = SYSCTL_IN(req, &val, sizeof(val));
1847  			if (error)
1848  				return (error);
1849 			if (val)
1850 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1851 			else
1852 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1853  		}
1854 		break;
1855 #endif
1856 	case VFS_CTL_QUERY:
1857 		mtx_lock(&nmp->nm_mtx);
1858 		if (nmp->nm_state & NFSSTA_TIMEO)
1859 			vq.vq_flags |= VQ_NOTRESP;
1860 		mtx_unlock(&nmp->nm_mtx);
1861 #if 0
1862 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1863 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1864 			vq.vq_flags |= VQ_NOTRESPLOCK;
1865 #endif
1866 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1867 		break;
1868  	case VFS_CTL_TIMEO:
1869  		if (req->oldptr != NULL) {
1870  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1871  			    sizeof(nmp->nm_tprintf_initial_delay));
1872  			if (error)
1873  				return (error);
1874  		}
1875  		if (req->newptr != NULL) {
1876 			error = vfs_suser(mp, req->td);
1877 			if (error)
1878 				return (error);
1879  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1880  			    sizeof(nmp->nm_tprintf_initial_delay));
1881  			if (error)
1882  				return (error);
1883  			if (nmp->nm_tprintf_initial_delay < 0)
1884  				nmp->nm_tprintf_initial_delay = 0;
1885  		}
1886 		break;
1887 	default:
1888 		return (ENOTSUP);
1889 	}
1890 	return (0);
1891 }
1892 
1893 /*
1894  * Purge any RPCs in progress, so that they will all return errors.
1895  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1896  * forced dismount.
1897  */
1898 static void
1899 nfs_purge(struct mount *mp)
1900 {
1901 	struct nfsmount *nmp = VFSTONFS(mp);
1902 
1903 	newnfs_nmcancelreqs(nmp);
1904 }
1905 
1906 /*
1907  * Extract the information needed by the nlm from the nfs vnode.
1908  */
1909 static void
1910 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1911     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1912     struct timeval *timeop)
1913 {
1914 	struct nfsmount *nmp;
1915 	struct nfsnode *np = VTONFS(vp);
1916 
1917 	nmp = VFSTONFS(vp->v_mount);
1918 	if (fhlenp != NULL)
1919 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1920 	if (fhp != NULL)
1921 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1922 	if (sp != NULL)
1923 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1924 	if (is_v3p != NULL)
1925 		*is_v3p = NFS_ISV3(vp);
1926 	if (sizep != NULL)
1927 		*sizep = np->n_size;
1928 	if (timeop != NULL) {
1929 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1930 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1931 	}
1932 }
1933 
1934 /*
1935  * This function prints out an option name, based on the conditional
1936  * argument.
1937  */
1938 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1939     char *opt, char **buf, size_t *blen)
1940 {
1941 	int len;
1942 
1943 	if (testval != 0 && *blen > strlen(opt)) {
1944 		len = snprintf(*buf, *blen, "%s", opt);
1945 		if (len != strlen(opt))
1946 			printf("EEK!!\n");
1947 		*buf += len;
1948 		*blen -= len;
1949 	}
1950 }
1951 
1952 /*
1953  * This function printf out an options integer value.
1954  */
1955 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1956     char *opt, char **buf, size_t *blen)
1957 {
1958 	int len;
1959 
1960 	if (*blen > strlen(opt) + 1) {
1961 		/* Could result in truncated output string. */
1962 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1963 		if (len < *blen) {
1964 			*buf += len;
1965 			*blen -= len;
1966 		}
1967 	}
1968 }
1969 
1970 /*
1971  * Load the option flags and values into the buffer.
1972  */
1973 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1974 {
1975 	char *buf;
1976 	size_t blen;
1977 
1978 	buf = buffer;
1979 	blen = buflen;
1980 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1981 	    &blen);
1982 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1983 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1984 		    &blen);
1985 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1986 		    &buf, &blen);
1987 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1988 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1989 	}
1990 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1991 	    &blen);
1992 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1993 	    "nfsv2", &buf, &blen);
1994 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1995 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1996 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1997 	    &buf, &blen);
1998 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1999 	    &buf, &blen);
2000 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2001 	    &blen);
2002 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2003 	    &blen);
2004 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2005 	    &blen);
2006 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2007 	    &blen);
2008 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2009 	    &blen);
2010 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2011 	    ",noncontigwr", &buf, &blen);
2012 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2013 	    0, ",lockd", &buf, &blen);
2014 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2015 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2016 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2017 	    &buf, &blen);
2018 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2019 	    &buf, &blen);
2020 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2021 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2022 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2023 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2024 	    &buf, &blen);
2025 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2026 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2027 	    &buf, &blen);
2028 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2029 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2030 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2031 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2032 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2033 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2034 	    &blen);
2035 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2036 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2037 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2038 	    &blen);
2039 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2040 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2041 	    &blen);
2042 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2043 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2044 }
2045 
2046