xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision e17f5b1d)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1995
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Rick Macklem at The University of Guelph.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 
41 #include "opt_bootp.h"
42 #include "opt_nfsroot.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/bio.h>
48 #include <sys/buf.h>
49 #include <sys/clock.h>
50 #include <sys/jail.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/module.h>
56 #include <sys/mount.h>
57 #include <sys/proc.h>
58 #include <sys/socket.h>
59 #include <sys/socketvar.h>
60 #include <sys/sockio.h>
61 #include <sys/sysctl.h>
62 #include <sys/vnode.h>
63 #include <sys/signalvar.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 #include <vm/uma.h>
68 
69 #include <net/if.h>
70 #include <net/route.h>
71 #include <netinet/in.h>
72 
73 #include <fs/nfs/nfsport.h>
74 #include <fs/nfsclient/nfsnode.h>
75 #include <fs/nfsclient/nfsmount.h>
76 #include <fs/nfsclient/nfs.h>
77 #include <nfs/nfsdiskless.h>
78 
79 FEATURE(nfscl, "NFSv4 client");
80 
81 extern int nfscl_ticks;
82 extern struct timeval nfsboottime;
83 extern int nfsrv_useacl;
84 extern int nfscl_debuglevel;
85 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
86 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
87 extern struct mtx ncl_iod_mutex;
88 NFSCLSTATEMUTEX;
89 extern struct mtx nfsrv_dslock_mtx;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		vfs_cache_root,
140 	.vfs_cachedroot =	nfs_root,
141 	.vfs_statfs =		nfs_statfs,
142 	.vfs_sync =		nfs_sync,
143 	.vfs_uninit =		ncl_uninit,
144 	.vfs_unmount =		nfs_unmount,
145 	.vfs_sysctl =		nfs_sysctl,
146 	.vfs_purge =		nfs_purge,
147 };
148 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
149 
150 /* So that loader and kldload(2) can find us, wherever we are.. */
151 MODULE_VERSION(nfs, 1);
152 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
153 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
154 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
155 
156 /*
157  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
158  * can be shared by both NFS clients. It is declared here so that it
159  * will be defined for kernels built without NFS_ROOT, although it
160  * isn't used in that case.
161  */
162 #if !defined(NFS_ROOT)
163 struct nfs_diskless	nfs_diskless = { { { 0 } } };
164 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
165 int			nfs_diskless_valid = 0;
166 #endif
167 
168 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
169     &nfs_diskless_valid, 0,
170     "Has the diskless struct been filled correctly");
171 
172 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
173     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
174 
175 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
176     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
177     "%Ssockaddr_in", "Diskless root nfs address");
178 
179 
180 void		newnfsargs_ntoh(struct nfs_args *);
181 static int	nfs_mountdiskless(char *,
182 		    struct sockaddr_in *, struct nfs_args *,
183 		    struct thread *, struct vnode **, struct mount *);
184 static void	nfs_convert_diskless(void);
185 static void	nfs_convert_oargs(struct nfs_args *args,
186 		    struct onfs_args *oargs);
187 
188 int
189 newnfs_iosize(struct nfsmount *nmp)
190 {
191 	int iosize, maxio;
192 
193 	/* First, set the upper limit for iosize */
194 	if (nmp->nm_flag & NFSMNT_NFSV4) {
195 		maxio = NFS_MAXBSIZE;
196 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
197 		if (nmp->nm_sotype == SOCK_DGRAM)
198 			maxio = NFS_MAXDGRAMDATA;
199 		else
200 			maxio = NFS_MAXBSIZE;
201 	} else {
202 		maxio = NFS_V2MAXDATA;
203 	}
204 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
205 		nmp->nm_rsize = maxio;
206 	if (nmp->nm_rsize > NFS_MAXBSIZE)
207 		nmp->nm_rsize = NFS_MAXBSIZE;
208 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
209 		nmp->nm_readdirsize = maxio;
210 	if (nmp->nm_readdirsize > nmp->nm_rsize)
211 		nmp->nm_readdirsize = nmp->nm_rsize;
212 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
213 		nmp->nm_wsize = maxio;
214 	if (nmp->nm_wsize > NFS_MAXBSIZE)
215 		nmp->nm_wsize = NFS_MAXBSIZE;
216 
217 	/*
218 	 * Calculate the size used for io buffers.  Use the larger
219 	 * of the two sizes to minimise nfs requests but make sure
220 	 * that it is at least one VM page to avoid wasting buffer
221 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
222 	 * that is the buffer size used for directories.
223 	 */
224 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
225 	iosize = imax(iosize, PAGE_SIZE);
226 	iosize = imax(iosize, NFS_DIRBLKSIZ);
227 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
228 	return (iosize);
229 }
230 
231 static void
232 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
233 {
234 
235 	args->version = NFS_ARGSVERSION;
236 	args->addr = oargs->addr;
237 	args->addrlen = oargs->addrlen;
238 	args->sotype = oargs->sotype;
239 	args->proto = oargs->proto;
240 	args->fh = oargs->fh;
241 	args->fhsize = oargs->fhsize;
242 	args->flags = oargs->flags;
243 	args->wsize = oargs->wsize;
244 	args->rsize = oargs->rsize;
245 	args->readdirsize = oargs->readdirsize;
246 	args->timeo = oargs->timeo;
247 	args->retrans = oargs->retrans;
248 	args->readahead = oargs->readahead;
249 	args->hostname = oargs->hostname;
250 }
251 
252 static void
253 nfs_convert_diskless(void)
254 {
255 
256 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
257 		sizeof(struct ifaliasreq));
258 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
259 		sizeof(struct sockaddr_in));
260 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
261 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
262 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
264 	} else {
265 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
267 	}
268 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
269 		sizeof(struct sockaddr_in));
270 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
271 	nfsv3_diskless.root_time = nfs_diskless.root_time;
272 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
273 		MAXHOSTNAMELEN);
274 	nfs_diskless_valid = 3;
275 }
276 
277 /*
278  * nfs statfs call
279  */
280 static int
281 nfs_statfs(struct mount *mp, struct statfs *sbp)
282 {
283 	struct vnode *vp;
284 	struct thread *td;
285 	struct nfsmount *nmp = VFSTONFS(mp);
286 	struct nfsvattr nfsva;
287 	struct nfsfsinfo fs;
288 	struct nfsstatfs sb;
289 	int error = 0, attrflag, gotfsinfo = 0, ret;
290 	struct nfsnode *np;
291 
292 	td = curthread;
293 
294 	error = vfs_busy(mp, MBF_NOWAIT);
295 	if (error)
296 		return (error);
297 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
298 	if (error) {
299 		vfs_unbusy(mp);
300 		return (error);
301 	}
302 	vp = NFSTOV(np);
303 	mtx_lock(&nmp->nm_mtx);
304 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
305 		mtx_unlock(&nmp->nm_mtx);
306 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
307 		    &attrflag, NULL);
308 		if (!error)
309 			gotfsinfo = 1;
310 	} else
311 		mtx_unlock(&nmp->nm_mtx);
312 	if (!error)
313 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
314 		    &attrflag, NULL);
315 	if (error != 0)
316 		NFSCL_DEBUG(2, "statfs=%d\n", error);
317 	if (attrflag == 0) {
318 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
319 		    td->td_ucred, td, &nfsva, NULL, NULL);
320 		if (ret) {
321 			/*
322 			 * Just set default values to get things going.
323 			 */
324 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
325 			nfsva.na_vattr.va_type = VDIR;
326 			nfsva.na_vattr.va_mode = 0777;
327 			nfsva.na_vattr.va_nlink = 100;
328 			nfsva.na_vattr.va_uid = (uid_t)0;
329 			nfsva.na_vattr.va_gid = (gid_t)0;
330 			nfsva.na_vattr.va_fileid = 2;
331 			nfsva.na_vattr.va_gen = 1;
332 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
333 			nfsva.na_vattr.va_size = 512 * 1024;
334 		}
335 	}
336 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
337 	if (!error) {
338 	    mtx_lock(&nmp->nm_mtx);
339 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
340 		nfscl_loadfsinfo(nmp, &fs);
341 	    nfscl_loadsbinfo(nmp, &sb, sbp);
342 	    sbp->f_iosize = newnfs_iosize(nmp);
343 	    mtx_unlock(&nmp->nm_mtx);
344 	    if (sbp != &mp->mnt_stat) {
345 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
346 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
347 	    }
348 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
349 	} else if (NFS_ISV4(vp)) {
350 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
351 	}
352 	vput(vp);
353 	vfs_unbusy(mp);
354 	return (error);
355 }
356 
357 /*
358  * nfs version 3 fsinfo rpc call
359  */
360 int
361 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
362     struct thread *td)
363 {
364 	struct nfsfsinfo fs;
365 	struct nfsvattr nfsva;
366 	int error, attrflag;
367 
368 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
369 	if (!error) {
370 		if (attrflag)
371 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
372 			    1);
373 		mtx_lock(&nmp->nm_mtx);
374 		nfscl_loadfsinfo(nmp, &fs);
375 		mtx_unlock(&nmp->nm_mtx);
376 	}
377 	return (error);
378 }
379 
380 /*
381  * Mount a remote root fs via. nfs. This depends on the info in the
382  * nfs_diskless structure that has been filled in properly by some primary
383  * bootstrap.
384  * It goes something like this:
385  * - do enough of "ifconfig" by calling ifioctl() so that the system
386  *   can talk to the server
387  * - If nfs_diskless.mygateway is filled in, use that address as
388  *   a default gateway.
389  * - build the rootfs mount point and call mountnfs() to do the rest.
390  *
391  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
392  * structure, as well as other global NFS client variables here, as
393  * nfs_mountroot() will be called once in the boot before any other NFS
394  * client activity occurs.
395  */
396 static int
397 nfs_mountroot(struct mount *mp)
398 {
399 	struct thread *td = curthread;
400 	struct nfsv3_diskless *nd = &nfsv3_diskless;
401 	struct socket *so;
402 	struct vnode *vp;
403 	struct ifreq ir;
404 	int error;
405 	u_long l;
406 	char buf[128];
407 	char *cp;
408 
409 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
410 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
411 #elif defined(NFS_ROOT)
412 	nfs_setup_diskless();
413 #endif
414 
415 	if (nfs_diskless_valid == 0)
416 		return (-1);
417 	if (nfs_diskless_valid == 1)
418 		nfs_convert_diskless();
419 
420 	/*
421 	 * Do enough of ifconfig(8) so that the critical net interface can
422 	 * talk to the server.
423 	 */
424 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
425 	    td->td_ucred, td);
426 	if (error)
427 		panic("nfs_mountroot: socreate(%04x): %d",
428 			nd->myif.ifra_addr.sa_family, error);
429 
430 #if 0 /* XXX Bad idea */
431 	/*
432 	 * We might not have been told the right interface, so we pass
433 	 * over the first ten interfaces of the same kind, until we get
434 	 * one of them configured.
435 	 */
436 
437 	for (i = strlen(nd->myif.ifra_name) - 1;
438 		nd->myif.ifra_name[i] >= '0' &&
439 		nd->myif.ifra_name[i] <= '9';
440 		nd->myif.ifra_name[i] ++) {
441 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
442 		if(!error)
443 			break;
444 	}
445 #endif
446 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
447 	if (error)
448 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
449 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
450 		ir.ifr_mtu = strtol(cp, NULL, 10);
451 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
452 		freeenv(cp);
453 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
454 		if (error)
455 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
456 	}
457 	soclose(so);
458 
459 	/*
460 	 * If the gateway field is filled in, set it as the default route.
461 	 * Note that pxeboot will set a default route of 0 if the route
462 	 * is not set by the DHCP server.  Check also for a value of 0
463 	 * to avoid panicking inappropriately in that situation.
464 	 */
465 	if (nd->mygateway.sin_len != 0 &&
466 	    nd->mygateway.sin_addr.s_addr != 0) {
467 		struct sockaddr_in mask, sin;
468 		struct epoch_tracker et;
469 
470 		bzero((caddr_t)&mask, sizeof(mask));
471 		sin = mask;
472 		sin.sin_family = AF_INET;
473 		sin.sin_len = sizeof(sin);
474                 /* XXX MRT use table 0 for this sort of thing */
475 		NET_EPOCH_ENTER(et);
476 		CURVNET_SET(TD_TO_VNET(td));
477 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
478 		    (struct sockaddr *)&nd->mygateway,
479 		    (struct sockaddr *)&mask,
480 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
481 		CURVNET_RESTORE();
482 		NET_EPOCH_EXIT(et);
483 		if (error)
484 			panic("nfs_mountroot: RTM_ADD: %d", error);
485 	}
486 
487 	/*
488 	 * Create the rootfs mount point.
489 	 */
490 	nd->root_args.fh = nd->root_fh;
491 	nd->root_args.fhsize = nd->root_fhsize;
492 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
493 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
494 		(l >> 24) & 0xff, (l >> 16) & 0xff,
495 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
496 	printf("NFS ROOT: %s\n", buf);
497 	nd->root_args.hostname = buf;
498 	if ((error = nfs_mountdiskless(buf,
499 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
500 		return (error);
501 	}
502 
503 	/*
504 	 * This is not really an nfs issue, but it is much easier to
505 	 * set hostname here and then let the "/etc/rc.xxx" files
506 	 * mount the right /var based upon its preset value.
507 	 */
508 	mtx_lock(&prison0.pr_mtx);
509 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
510 	    sizeof(prison0.pr_hostname));
511 	mtx_unlock(&prison0.pr_mtx);
512 	inittodr(ntohl(nd->root_time));
513 	return (0);
514 }
515 
516 /*
517  * Internal version of mount system call for diskless setup.
518  */
519 static int
520 nfs_mountdiskless(char *path,
521     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
522     struct vnode **vpp, struct mount *mp)
523 {
524 	struct sockaddr *nam;
525 	int dirlen, error;
526 	char *dirpath;
527 
528 	/*
529 	 * Find the directory path in "path", which also has the server's
530 	 * name/ip address in it.
531 	 */
532 	dirpath = strchr(path, ':');
533 	if (dirpath != NULL)
534 		dirlen = strlen(++dirpath);
535 	else
536 		dirlen = 0;
537 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
538 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
539 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
540 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
541 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
542 		return (error);
543 	}
544 	return (0);
545 }
546 
547 static void
548 nfs_sec_name(char *sec, int *flagsp)
549 {
550 	if (!strcmp(sec, "krb5"))
551 		*flagsp |= NFSMNT_KERB;
552 	else if (!strcmp(sec, "krb5i"))
553 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
554 	else if (!strcmp(sec, "krb5p"))
555 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
556 }
557 
558 static void
559 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
560     const char *hostname, struct ucred *cred, struct thread *td)
561 {
562 	int adjsock;
563 	char *p;
564 
565 	/*
566 	 * Set read-only flag if requested; otherwise, clear it if this is
567 	 * an update.  If this is not an update, then either the read-only
568 	 * flag is already clear, or this is a root mount and it was set
569 	 * intentionally at some previous point.
570 	 */
571 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
572 		MNT_ILOCK(mp);
573 		mp->mnt_flag |= MNT_RDONLY;
574 		MNT_IUNLOCK(mp);
575 	} else if (mp->mnt_flag & MNT_UPDATE) {
576 		MNT_ILOCK(mp);
577 		mp->mnt_flag &= ~MNT_RDONLY;
578 		MNT_IUNLOCK(mp);
579 	}
580 
581 	/*
582 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
583 	 * no sense in that context.  Also, set up appropriate retransmit
584 	 * and soft timeout behavior.
585 	 */
586 	if (argp->sotype == SOCK_STREAM) {
587 		nmp->nm_flag &= ~NFSMNT_NOCONN;
588 		nmp->nm_timeo = NFS_MAXTIMEO;
589 		if ((argp->flags & NFSMNT_NFSV4) != 0)
590 			nmp->nm_retry = INT_MAX;
591 		else
592 			nmp->nm_retry = NFS_RETRANS_TCP;
593 	}
594 
595 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
596 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
597 		argp->flags &= ~NFSMNT_RDIRPLUS;
598 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
599 	}
600 
601 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
602 	if (nmp->nm_minorvers == 0) {
603 		argp->flags &= ~NFSMNT_ONEOPENOWN;
604 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
605 	}
606 
607 	/* Re-bind if rsrvd port requested and wasn't on one */
608 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
609 		  && (argp->flags & NFSMNT_RESVPORT);
610 	/* Also re-bind if we're switching to/from a connected UDP socket */
611 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
612 		    (argp->flags & NFSMNT_NOCONN));
613 
614 	/* Update flags atomically.  Don't change the lock bits. */
615 	nmp->nm_flag = argp->flags | nmp->nm_flag;
616 
617 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
618 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
619 		if (nmp->nm_timeo < NFS_MINTIMEO)
620 			nmp->nm_timeo = NFS_MINTIMEO;
621 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
622 			nmp->nm_timeo = NFS_MAXTIMEO;
623 	}
624 
625 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
626 		nmp->nm_retry = argp->retrans;
627 		if (nmp->nm_retry > NFS_MAXREXMIT)
628 			nmp->nm_retry = NFS_MAXREXMIT;
629 	}
630 
631 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
632 		nmp->nm_wsize = argp->wsize;
633 		/*
634 		 * Clip at the power of 2 below the size. There is an
635 		 * issue (not isolated) that causes intermittent page
636 		 * faults if this is not done.
637 		 */
638 		if (nmp->nm_wsize > NFS_FABLKSIZE)
639 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
640 		else
641 			nmp->nm_wsize = NFS_FABLKSIZE;
642 	}
643 
644 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
645 		nmp->nm_rsize = argp->rsize;
646 		/*
647 		 * Clip at the power of 2 below the size. There is an
648 		 * issue (not isolated) that causes intermittent page
649 		 * faults if this is not done.
650 		 */
651 		if (nmp->nm_rsize > NFS_FABLKSIZE)
652 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
653 		else
654 			nmp->nm_rsize = NFS_FABLKSIZE;
655 	}
656 
657 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
658 		nmp->nm_readdirsize = argp->readdirsize;
659 	}
660 
661 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
662 		nmp->nm_acregmin = argp->acregmin;
663 	else
664 		nmp->nm_acregmin = NFS_MINATTRTIMO;
665 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
666 		nmp->nm_acregmax = argp->acregmax;
667 	else
668 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
669 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
670 		nmp->nm_acdirmin = argp->acdirmin;
671 	else
672 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
673 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
674 		nmp->nm_acdirmax = argp->acdirmax;
675 	else
676 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
677 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
678 		nmp->nm_acdirmin = nmp->nm_acdirmax;
679 	if (nmp->nm_acregmin > nmp->nm_acregmax)
680 		nmp->nm_acregmin = nmp->nm_acregmax;
681 
682 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
683 		if (argp->readahead <= NFS_MAXRAHEAD)
684 			nmp->nm_readahead = argp->readahead;
685 		else
686 			nmp->nm_readahead = NFS_MAXRAHEAD;
687 	}
688 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
689 		if (argp->wcommitsize < nmp->nm_wsize)
690 			nmp->nm_wcommitsize = nmp->nm_wsize;
691 		else
692 			nmp->nm_wcommitsize = argp->wcommitsize;
693 	}
694 
695 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
696 		    (nmp->nm_soproto != argp->proto));
697 
698 	if (nmp->nm_client != NULL && adjsock) {
699 		int haslock = 0, error = 0;
700 
701 		if (nmp->nm_sotype == SOCK_STREAM) {
702 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
703 			if (!error)
704 				haslock = 1;
705 		}
706 		if (!error) {
707 		    newnfs_disconnect(&nmp->nm_sockreq);
708 		    if (haslock)
709 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
710 		    nmp->nm_sotype = argp->sotype;
711 		    nmp->nm_soproto = argp->proto;
712 		    if (nmp->nm_sotype == SOCK_DGRAM)
713 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
714 			    cred, td, 0)) {
715 				printf("newnfs_args: retrying connect\n");
716 				(void) nfs_catnap(PSOCK, 0, "nfscon");
717 			}
718 		}
719 	} else {
720 		nmp->nm_sotype = argp->sotype;
721 		nmp->nm_soproto = argp->proto;
722 	}
723 
724 	if (hostname != NULL) {
725 		strlcpy(nmp->nm_hostname, hostname,
726 		    sizeof(nmp->nm_hostname));
727 		p = strchr(nmp->nm_hostname, ':');
728 		if (p != NULL)
729 			*p = '\0';
730 	}
731 }
732 
733 static const char *nfs_opts[] = { "from", "nfs_args",
734     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
735     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
736     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
737     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
738     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
739     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
740     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
741     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
742     "pnfs", "wcommitsize", "oneopenown",
743     NULL };
744 
745 /*
746  * Parse the "from" mountarg, passed by the generic mount(8) program
747  * or the mountroot code.  This is used when rerooting into NFS.
748  *
749  * Note that the "hostname" is actually a "hostname:/share/path" string.
750  */
751 static int
752 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
753     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
754 {
755 	char *nam, *delimp, *hostp, *spec;
756 	int error, have_bracket = 0, offset, rv, speclen;
757 	struct sockaddr_in *sin;
758 	size_t len;
759 
760 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
761 	if (error != 0)
762 		return (error);
763 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
764 
765 	/*
766 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
767 	 */
768 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
769 	    *(delimp + 1) == ':') {
770 		hostp = spec + 1;
771 		spec = delimp + 2;
772 		have_bracket = 1;
773 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
774 		hostp = spec;
775 		spec = delimp + 1;
776 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
777 		printf("%s: path@server syntax is deprecated, "
778 		    "use server:path\n", __func__);
779 		hostp = delimp + 1;
780 	} else {
781 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
782 		free(nam, M_TEMP);
783 		return (EINVAL);
784 	}
785 	*delimp = '\0';
786 
787 	/*
788 	 * If there has been a trailing slash at mounttime it seems
789 	 * that some mountd implementations fail to remove the mount
790 	 * entries from their mountlist while unmounting.
791 	 */
792 	for (speclen = strlen(spec);
793 	    speclen > 1 && spec[speclen - 1] == '/';
794 	    speclen--)
795 		spec[speclen - 1] = '\0';
796 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
797 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
798 		free(nam, M_TEMP);
799 		return (EINVAL);
800 	}
801 	/* Make both '@' and ':' notations equal */
802 	if (*hostp != '\0') {
803 		len = strlen(hostp);
804 		offset = 0;
805 		if (have_bracket)
806 			nam[offset++] = '[';
807 		memmove(nam + offset, hostp, len);
808 		if (have_bracket)
809 			nam[len + offset++] = ']';
810 		nam[len + offset++] = ':';
811 		memmove(nam + len + offset, spec, speclen);
812 		nam[len + speclen + offset] = '\0';
813 	} else
814 		nam[0] = '\0';
815 
816 	/*
817 	 * XXX: IPv6
818 	 */
819 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
820 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
821 	if (rv != 1) {
822 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
823 		    __func__, hostp, rv);
824 		free(nam, M_TEMP);
825 		free(sin, M_SONAME);
826 		return (EINVAL);
827 	}
828 
829 	sin->sin_len = sizeof(*sin);
830 	sin->sin_family = AF_INET;
831 	/*
832 	 * XXX: hardcoded port number.
833 	 */
834 	sin->sin_port = htons(2049);
835 
836 	*hostnamep = strdup(nam, M_NEWNFSMNT);
837 	*sinp = sin;
838 	strlcpy(dirpath, spec, dirpathsize);
839 	*dirlenp = strlen(dirpath);
840 
841 	free(nam, M_TEMP);
842 	return (0);
843 }
844 
845 /*
846  * VFS Operations.
847  *
848  * mount system call
849  * It seems a bit dumb to copyinstr() the host and path here and then
850  * bcopy() them in mountnfs(), but I wanted to detect errors before
851  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
852  * an error after that means that I have to release the mbuf.
853  */
854 /* ARGSUSED */
855 static int
856 nfs_mount(struct mount *mp)
857 {
858 	struct nfs_args args = {
859 	    .version = NFS_ARGSVERSION,
860 	    .addr = NULL,
861 	    .addrlen = sizeof (struct sockaddr_in),
862 	    .sotype = SOCK_STREAM,
863 	    .proto = 0,
864 	    .fh = NULL,
865 	    .fhsize = 0,
866 	    .flags = NFSMNT_RESVPORT,
867 	    .wsize = NFS_WSIZE,
868 	    .rsize = NFS_RSIZE,
869 	    .readdirsize = NFS_READDIRSIZE,
870 	    .timeo = 10,
871 	    .retrans = NFS_RETRANS,
872 	    .readahead = NFS_DEFRAHEAD,
873 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
874 	    .hostname = NULL,
875 	    .acregmin = NFS_MINATTRTIMO,
876 	    .acregmax = NFS_MAXATTRTIMO,
877 	    .acdirmin = NFS_MINDIRATTRTIMO,
878 	    .acdirmax = NFS_MAXDIRATTRTIMO,
879 	};
880 	int error = 0, ret, len;
881 	struct sockaddr *nam = NULL;
882 	struct vnode *vp;
883 	struct thread *td;
884 	char *hst;
885 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
886 	char *cp, *opt, *name, *secname;
887 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
888 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
889 	int minvers = 0;
890 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
891 	    krbnamelen, srvkrbnamelen;
892 	size_t hstlen;
893 
894 	has_nfs_args_opt = 0;
895 	has_nfs_from_opt = 0;
896 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
897 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
898 		error = EINVAL;
899 		goto out;
900 	}
901 
902 	td = curthread;
903 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
904 	    nfs_diskless_valid != 0) {
905 		error = nfs_mountroot(mp);
906 		goto out;
907 	}
908 
909 	nfscl_init();
910 
911 	/*
912 	 * The old mount_nfs program passed the struct nfs_args
913 	 * from userspace to kernel.  The new mount_nfs program
914 	 * passes string options via nmount() from userspace to kernel
915 	 * and we populate the struct nfs_args in the kernel.
916 	 */
917 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
918 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
919 		    sizeof(args));
920 		if (error != 0)
921 			goto out;
922 
923 		if (args.version != NFS_ARGSVERSION) {
924 			error = EPROGMISMATCH;
925 			goto out;
926 		}
927 		has_nfs_args_opt = 1;
928 	}
929 
930 	/* Handle the new style options. */
931 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
932 		args.acdirmin = args.acdirmax =
933 		    args.acregmin = args.acregmax = 0;
934 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
935 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
936 	}
937 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
938 		args.flags |= NFSMNT_NOCONN;
939 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
940 		args.flags &= ~NFSMNT_NOCONN;
941 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
942 		args.flags |= NFSMNT_NOLOCKD;
943 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
944 		args.flags &= ~NFSMNT_NOLOCKD;
945 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
946 		args.flags |= NFSMNT_INT;
947 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
948 		args.flags |= NFSMNT_RDIRPLUS;
949 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
950 		args.flags |= NFSMNT_RESVPORT;
951 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
952 		args.flags &= ~NFSMNT_RESVPORT;
953 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
954 		args.flags |= NFSMNT_SOFT;
955 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
956 		args.flags &= ~NFSMNT_SOFT;
957 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
958 		args.sotype = SOCK_DGRAM;
959 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
960 		args.sotype = SOCK_DGRAM;
961 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
962 		args.sotype = SOCK_STREAM;
963 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
964 		args.flags |= NFSMNT_NFSV3;
965 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
966 		args.flags |= NFSMNT_NFSV4;
967 		args.sotype = SOCK_STREAM;
968 	}
969 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
970 		args.flags |= NFSMNT_ALLGSSNAME;
971 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
972 		args.flags |= NFSMNT_NOCTO;
973 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
974 		args.flags |= NFSMNT_NONCONTIGWR;
975 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
976 		args.flags |= NFSMNT_PNFS;
977 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
978 		args.flags |= NFSMNT_ONEOPENOWN;
979 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
980 		if (opt == NULL) {
981 			vfs_mount_error(mp, "illegal readdirsize");
982 			error = EINVAL;
983 			goto out;
984 		}
985 		ret = sscanf(opt, "%d", &args.readdirsize);
986 		if (ret != 1 || args.readdirsize <= 0) {
987 			vfs_mount_error(mp, "illegal readdirsize: %s",
988 			    opt);
989 			error = EINVAL;
990 			goto out;
991 		}
992 		args.flags |= NFSMNT_READDIRSIZE;
993 	}
994 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
995 		if (opt == NULL) {
996 			vfs_mount_error(mp, "illegal readahead");
997 			error = EINVAL;
998 			goto out;
999 		}
1000 		ret = sscanf(opt, "%d", &args.readahead);
1001 		if (ret != 1 || args.readahead <= 0) {
1002 			vfs_mount_error(mp, "illegal readahead: %s",
1003 			    opt);
1004 			error = EINVAL;
1005 			goto out;
1006 		}
1007 		args.flags |= NFSMNT_READAHEAD;
1008 	}
1009 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1010 		if (opt == NULL) {
1011 			vfs_mount_error(mp, "illegal wsize");
1012 			error = EINVAL;
1013 			goto out;
1014 		}
1015 		ret = sscanf(opt, "%d", &args.wsize);
1016 		if (ret != 1 || args.wsize <= 0) {
1017 			vfs_mount_error(mp, "illegal wsize: %s",
1018 			    opt);
1019 			error = EINVAL;
1020 			goto out;
1021 		}
1022 		args.flags |= NFSMNT_WSIZE;
1023 	}
1024 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1025 		if (opt == NULL) {
1026 			vfs_mount_error(mp, "illegal rsize");
1027 			error = EINVAL;
1028 			goto out;
1029 		}
1030 		ret = sscanf(opt, "%d", &args.rsize);
1031 		if (ret != 1 || args.rsize <= 0) {
1032 			vfs_mount_error(mp, "illegal wsize: %s",
1033 			    opt);
1034 			error = EINVAL;
1035 			goto out;
1036 		}
1037 		args.flags |= NFSMNT_RSIZE;
1038 	}
1039 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1040 		if (opt == NULL) {
1041 			vfs_mount_error(mp, "illegal retrans");
1042 			error = EINVAL;
1043 			goto out;
1044 		}
1045 		ret = sscanf(opt, "%d", &args.retrans);
1046 		if (ret != 1 || args.retrans <= 0) {
1047 			vfs_mount_error(mp, "illegal retrans: %s",
1048 			    opt);
1049 			error = EINVAL;
1050 			goto out;
1051 		}
1052 		args.flags |= NFSMNT_RETRANS;
1053 	}
1054 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1055 		ret = sscanf(opt, "%d", &args.acregmin);
1056 		if (ret != 1 || args.acregmin < 0) {
1057 			vfs_mount_error(mp, "illegal actimeo: %s",
1058 			    opt);
1059 			error = EINVAL;
1060 			goto out;
1061 		}
1062 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1063 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1064 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1065 	}
1066 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1067 		ret = sscanf(opt, "%d", &args.acregmin);
1068 		if (ret != 1 || args.acregmin < 0) {
1069 			vfs_mount_error(mp, "illegal acregmin: %s",
1070 			    opt);
1071 			error = EINVAL;
1072 			goto out;
1073 		}
1074 		args.flags |= NFSMNT_ACREGMIN;
1075 	}
1076 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1077 		ret = sscanf(opt, "%d", &args.acregmax);
1078 		if (ret != 1 || args.acregmax < 0) {
1079 			vfs_mount_error(mp, "illegal acregmax: %s",
1080 			    opt);
1081 			error = EINVAL;
1082 			goto out;
1083 		}
1084 		args.flags |= NFSMNT_ACREGMAX;
1085 	}
1086 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1087 		ret = sscanf(opt, "%d", &args.acdirmin);
1088 		if (ret != 1 || args.acdirmin < 0) {
1089 			vfs_mount_error(mp, "illegal acdirmin: %s",
1090 			    opt);
1091 			error = EINVAL;
1092 			goto out;
1093 		}
1094 		args.flags |= NFSMNT_ACDIRMIN;
1095 	}
1096 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1097 		ret = sscanf(opt, "%d", &args.acdirmax);
1098 		if (ret != 1 || args.acdirmax < 0) {
1099 			vfs_mount_error(mp, "illegal acdirmax: %s",
1100 			    opt);
1101 			error = EINVAL;
1102 			goto out;
1103 		}
1104 		args.flags |= NFSMNT_ACDIRMAX;
1105 	}
1106 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1107 		ret = sscanf(opt, "%d", &args.wcommitsize);
1108 		if (ret != 1 || args.wcommitsize < 0) {
1109 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1110 			error = EINVAL;
1111 			goto out;
1112 		}
1113 		args.flags |= NFSMNT_WCOMMITSIZE;
1114 	}
1115 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1116 		ret = sscanf(opt, "%d", &args.timeo);
1117 		if (ret != 1 || args.timeo <= 0) {
1118 			vfs_mount_error(mp, "illegal timeo: %s",
1119 			    opt);
1120 			error = EINVAL;
1121 			goto out;
1122 		}
1123 		args.flags |= NFSMNT_TIMEO;
1124 	}
1125 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1126 		ret = sscanf(opt, "%d", &args.timeo);
1127 		if (ret != 1 || args.timeo <= 0) {
1128 			vfs_mount_error(mp, "illegal timeout: %s",
1129 			    opt);
1130 			error = EINVAL;
1131 			goto out;
1132 		}
1133 		args.flags |= NFSMNT_TIMEO;
1134 	}
1135 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1136 		ret = sscanf(opt, "%d", &nametimeo);
1137 		if (ret != 1 || nametimeo < 0) {
1138 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1139 			error = EINVAL;
1140 			goto out;
1141 		}
1142 	}
1143 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1144 	    == 0) {
1145 		ret = sscanf(opt, "%d", &negnametimeo);
1146 		if (ret != 1 || negnametimeo < 0) {
1147 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1148 			    opt);
1149 			error = EINVAL;
1150 			goto out;
1151 		}
1152 	}
1153 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1154 	    0) {
1155 		ret = sscanf(opt, "%d", &minvers);
1156 		if (ret != 1 || minvers < 0 || minvers > 2 ||
1157 		    (args.flags & NFSMNT_NFSV4) == 0) {
1158 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1159 			error = EINVAL;
1160 			goto out;
1161 		}
1162 	}
1163 	if (vfs_getopt(mp->mnt_optnew, "sec",
1164 		(void **) &secname, NULL) == 0)
1165 		nfs_sec_name(secname, &args.flags);
1166 
1167 	if (mp->mnt_flag & MNT_UPDATE) {
1168 		struct nfsmount *nmp = VFSTONFS(mp);
1169 
1170 		if (nmp == NULL) {
1171 			error = EIO;
1172 			goto out;
1173 		}
1174 
1175 		/*
1176 		 * If a change from TCP->UDP is done and there are thread(s)
1177 		 * that have I/O RPC(s) in progress with a transfer size
1178 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1179 		 * hung, retrying the RPC(s) forever. Usually these threads
1180 		 * will be seen doing an uninterruptible sleep on wait channel
1181 		 * "nfsreq".
1182 		 */
1183 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1184 			tprintf(td->td_proc, LOG_WARNING,
1185 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1186 
1187 		/*
1188 		 * When doing an update, we can't change version,
1189 		 * security, switch lockd strategies, change cookie
1190 		 * translation or switch oneopenown.
1191 		 */
1192 		args.flags = (args.flags &
1193 		    ~(NFSMNT_NFSV3 |
1194 		      NFSMNT_NFSV4 |
1195 		      NFSMNT_KERB |
1196 		      NFSMNT_INTEGRITY |
1197 		      NFSMNT_PRIVACY |
1198 		      NFSMNT_ONEOPENOWN |
1199 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1200 		    (nmp->nm_flag &
1201 			(NFSMNT_NFSV3 |
1202 			 NFSMNT_NFSV4 |
1203 			 NFSMNT_KERB |
1204 			 NFSMNT_INTEGRITY |
1205 			 NFSMNT_PRIVACY |
1206 			 NFSMNT_ONEOPENOWN |
1207 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1208 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1209 		goto out;
1210 	}
1211 
1212 	/*
1213 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1214 	 * or no-connection mode for those protocols that support
1215 	 * no-connection mode (the flag will be cleared later for protocols
1216 	 * that do not support no-connection mode).  This will allow a client
1217 	 * to receive replies from a different IP then the request was
1218 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1219 	 * not 0.
1220 	 */
1221 	if (nfs_ip_paranoia == 0)
1222 		args.flags |= NFSMNT_NOCONN;
1223 
1224 	if (has_nfs_args_opt != 0) {
1225 		/*
1226 		 * In the 'nfs_args' case, the pointers in the args
1227 		 * structure are in userland - we copy them in here.
1228 		 */
1229 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1230 			vfs_mount_error(mp, "Bad file handle");
1231 			error = EINVAL;
1232 			goto out;
1233 		}
1234 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1235 		    args.fhsize);
1236 		if (error != 0)
1237 			goto out;
1238 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1239 		if (error != 0)
1240 			goto out;
1241 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1242 		args.hostname = hst;
1243 		/* getsockaddr() call must be after above copyin() calls */
1244 		error = getsockaddr(&nam, args.addr, args.addrlen);
1245 		if (error != 0)
1246 			goto out;
1247 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1248 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1249 	    sizeof(dirpath), &dirlen) == 0) {
1250 		has_nfs_from_opt = 1;
1251 		bcopy(args.hostname, hst, MNAMELEN);
1252 		hst[MNAMELEN - 1] = '\0';
1253 
1254 		/*
1255 		 * This only works with NFSv4 for now.
1256 		 */
1257 		args.fhsize = 0;
1258 		args.flags |= NFSMNT_NFSV4;
1259 		args.sotype = SOCK_STREAM;
1260 	} else {
1261 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1262 		    &args.fhsize) == 0) {
1263 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1264 				vfs_mount_error(mp, "Bad file handle");
1265 				error = EINVAL;
1266 				goto out;
1267 			}
1268 			bcopy(args.fh, nfh, args.fhsize);
1269 		} else {
1270 			args.fhsize = 0;
1271 		}
1272 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1273 		    (void **)&args.hostname, &len);
1274 		if (args.hostname == NULL) {
1275 			vfs_mount_error(mp, "Invalid hostname");
1276 			error = EINVAL;
1277 			goto out;
1278 		}
1279 		if (len >= MNAMELEN) {
1280 			vfs_mount_error(mp, "Hostname too long");
1281 			error = EINVAL;
1282 			goto out;
1283 		}
1284 		bcopy(args.hostname, hst, len);
1285 		hst[len] = '\0';
1286 	}
1287 
1288 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1289 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1290 	else {
1291 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1292 		cp = strchr(srvkrbname, ':');
1293 		if (cp != NULL)
1294 			*cp = '\0';
1295 	}
1296 	srvkrbnamelen = strlen(srvkrbname);
1297 
1298 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1299 		strlcpy(krbname, name, sizeof (krbname));
1300 	else
1301 		krbname[0] = '\0';
1302 	krbnamelen = strlen(krbname);
1303 
1304 	if (has_nfs_from_opt == 0) {
1305 		if (vfs_getopt(mp->mnt_optnew,
1306 		    "dirpath", (void **)&name, NULL) == 0)
1307 			strlcpy(dirpath, name, sizeof (dirpath));
1308 		else
1309 			dirpath[0] = '\0';
1310 		dirlen = strlen(dirpath);
1311 	}
1312 
1313 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1314 		if (vfs_getopt(mp->mnt_optnew, "addr",
1315 		    (void **)&args.addr, &args.addrlen) == 0) {
1316 			if (args.addrlen > SOCK_MAXADDRLEN) {
1317 				error = ENAMETOOLONG;
1318 				goto out;
1319 			}
1320 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1321 			bcopy(args.addr, nam, args.addrlen);
1322 			nam->sa_len = args.addrlen;
1323 		} else {
1324 			vfs_mount_error(mp, "No server address");
1325 			error = EINVAL;
1326 			goto out;
1327 		}
1328 	}
1329 
1330 	args.fh = nfh;
1331 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1332 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1333 	    nametimeo, negnametimeo, minvers);
1334 out:
1335 	if (!error) {
1336 		MNT_ILOCK(mp);
1337 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1338 		    MNTK_USES_BCACHE;
1339 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1340 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1341 		MNT_IUNLOCK(mp);
1342 	}
1343 	free(hst, M_TEMP);
1344 	return (error);
1345 }
1346 
1347 
1348 /*
1349  * VFS Operations.
1350  *
1351  * mount system call
1352  * It seems a bit dumb to copyinstr() the host and path here and then
1353  * bcopy() them in mountnfs(), but I wanted to detect errors before
1354  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1355  * an error after that means that I have to release the mbuf.
1356  */
1357 /* ARGSUSED */
1358 static int
1359 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1360 {
1361 	int error;
1362 	struct nfs_args args;
1363 
1364 	error = copyin(data, &args, sizeof (struct nfs_args));
1365 	if (error)
1366 		return error;
1367 
1368 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1369 
1370 	error = kernel_mount(ma, flags);
1371 	return (error);
1372 }
1373 
1374 /*
1375  * Common code for mount and mountroot
1376  */
1377 static int
1378 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1379     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1380     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1381     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1382     int minvers)
1383 {
1384 	struct nfsmount *nmp;
1385 	struct nfsnode *np;
1386 	int error, trycnt, ret;
1387 	struct nfsvattr nfsva;
1388 	struct nfsclclient *clp;
1389 	struct nfsclds *dsp, *tdsp;
1390 	uint32_t lease;
1391 	static u_int64_t clval = 0;
1392 
1393 	NFSCL_DEBUG(3, "in mnt\n");
1394 	clp = NULL;
1395 	if (mp->mnt_flag & MNT_UPDATE) {
1396 		nmp = VFSTONFS(mp);
1397 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1398 		free(nam, M_SONAME);
1399 		return (0);
1400 	} else {
1401 		nmp = malloc(sizeof (struct nfsmount) +
1402 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1403 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1404 		TAILQ_INIT(&nmp->nm_bufq);
1405 		TAILQ_INIT(&nmp->nm_sess);
1406 		if (clval == 0)
1407 			clval = (u_int64_t)nfsboottime.tv_sec;
1408 		nmp->nm_clval = clval++;
1409 		nmp->nm_krbnamelen = krbnamelen;
1410 		nmp->nm_dirpathlen = dirlen;
1411 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1412 		if (td->td_ucred->cr_uid != (uid_t)0) {
1413 			/*
1414 			 * nm_uid is used to get KerberosV credentials for
1415 			 * the nfsv4 state handling operations if there is
1416 			 * no host based principal set. Use the uid of
1417 			 * this user if not root, since they are doing the
1418 			 * mount. I don't think setting this for root will
1419 			 * work, since root normally does not have user
1420 			 * credentials in a credentials cache.
1421 			 */
1422 			nmp->nm_uid = td->td_ucred->cr_uid;
1423 		} else {
1424 			/*
1425 			 * Just set to -1, so it won't be used.
1426 			 */
1427 			nmp->nm_uid = (uid_t)-1;
1428 		}
1429 
1430 		/* Copy and null terminate all the names */
1431 		if (nmp->nm_krbnamelen > 0) {
1432 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1433 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1434 		}
1435 		if (nmp->nm_dirpathlen > 0) {
1436 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1437 			    nmp->nm_dirpathlen);
1438 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1439 			    + 1] = '\0';
1440 		}
1441 		if (nmp->nm_srvkrbnamelen > 0) {
1442 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1443 			    nmp->nm_srvkrbnamelen);
1444 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1445 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1446 		}
1447 		nmp->nm_sockreq.nr_cred = crhold(cred);
1448 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1449 		mp->mnt_data = nmp;
1450 		nmp->nm_getinfo = nfs_getnlminfo;
1451 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1452 	}
1453 	vfs_getnewfsid(mp);
1454 	nmp->nm_mountp = mp;
1455 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1456 
1457 	/*
1458 	 * Since nfs_decode_args() might optionally set them, these
1459 	 * need to be set to defaults before the call, so that the
1460 	 * optional settings aren't overwritten.
1461 	 */
1462 	nmp->nm_nametimeo = nametimeo;
1463 	nmp->nm_negnametimeo = negnametimeo;
1464 	nmp->nm_timeo = NFS_TIMEO;
1465 	nmp->nm_retry = NFS_RETRANS;
1466 	nmp->nm_readahead = NFS_DEFRAHEAD;
1467 
1468 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1469 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1470 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1471 		nmp->nm_wcommitsize *= 2;
1472 	nmp->nm_wcommitsize *= 256;
1473 
1474 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1475 		nmp->nm_minorvers = minvers;
1476 	else
1477 		nmp->nm_minorvers = 0;
1478 
1479 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1480 
1481 	/*
1482 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1483 	 * high, depending on whether we end up with negative offsets in
1484 	 * the client or server somewhere.  2GB-1 may be safer.
1485 	 *
1486 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1487 	 * that we can handle until we find out otherwise.
1488 	 */
1489 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1490 		nmp->nm_maxfilesize = 0xffffffffLL;
1491 	else
1492 		nmp->nm_maxfilesize = OFF_MAX;
1493 
1494 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1495 		nmp->nm_wsize = NFS_WSIZE;
1496 		nmp->nm_rsize = NFS_RSIZE;
1497 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1498 	}
1499 	nmp->nm_numgrps = NFS_MAXGRPS;
1500 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1501 	if (nmp->nm_tprintf_delay < 0)
1502 		nmp->nm_tprintf_delay = 0;
1503 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1504 	if (nmp->nm_tprintf_initial_delay < 0)
1505 		nmp->nm_tprintf_initial_delay = 0;
1506 	nmp->nm_fhsize = argp->fhsize;
1507 	if (nmp->nm_fhsize > 0)
1508 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1509 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1510 	nmp->nm_nam = nam;
1511 	/* Set up the sockets and per-host congestion */
1512 	nmp->nm_sotype = argp->sotype;
1513 	nmp->nm_soproto = argp->proto;
1514 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1515 	if ((argp->flags & NFSMNT_NFSV4))
1516 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1517 	else if ((argp->flags & NFSMNT_NFSV3))
1518 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1519 	else
1520 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1521 
1522 
1523 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1524 		goto bad;
1525 	/* For NFSv4.1, get the clientid now. */
1526 	if (nmp->nm_minorvers > 0) {
1527 		NFSCL_DEBUG(3, "at getcl\n");
1528 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1529 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1530 		if (error != 0)
1531 			goto bad;
1532 	}
1533 
1534 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1535 	    nmp->nm_dirpathlen > 0) {
1536 		NFSCL_DEBUG(3, "in dirp\n");
1537 		/*
1538 		 * If the fhsize on the mount point == 0 for V4, the mount
1539 		 * path needs to be looked up.
1540 		 */
1541 		trycnt = 3;
1542 		do {
1543 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1544 			    cred, td);
1545 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1546 			if (error)
1547 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1548 		} while (error && --trycnt > 0);
1549 		if (error)
1550 			goto bad;
1551 	}
1552 
1553 	/*
1554 	 * A reference count is needed on the nfsnode representing the
1555 	 * remote root.  If this object is not persistent, then backward
1556 	 * traversals of the mount point (i.e. "..") will not work if
1557 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1558 	 * this problem, because one can identify root inodes by their
1559 	 * number == UFS_ROOTINO (2).
1560 	 */
1561 	if (nmp->nm_fhsize > 0) {
1562 		/*
1563 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1564 		 * non-zero for the root vnode. f_iosize will be set correctly
1565 		 * by nfs_statfs() before any I/O occurs.
1566 		 */
1567 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1568 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1569 		    LK_EXCLUSIVE);
1570 		if (error)
1571 			goto bad;
1572 		*vpp = NFSTOV(np);
1573 
1574 		/*
1575 		 * Get file attributes and transfer parameters for the
1576 		 * mountpoint.  This has the side effect of filling in
1577 		 * (*vpp)->v_type with the correct value.
1578 		 */
1579 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1580 		    cred, td, &nfsva, NULL, &lease);
1581 		if (ret) {
1582 			/*
1583 			 * Just set default values to get things going.
1584 			 */
1585 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1586 			nfsva.na_vattr.va_type = VDIR;
1587 			nfsva.na_vattr.va_mode = 0777;
1588 			nfsva.na_vattr.va_nlink = 100;
1589 			nfsva.na_vattr.va_uid = (uid_t)0;
1590 			nfsva.na_vattr.va_gid = (gid_t)0;
1591 			nfsva.na_vattr.va_fileid = 2;
1592 			nfsva.na_vattr.va_gen = 1;
1593 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1594 			nfsva.na_vattr.va_size = 512 * 1024;
1595 			lease = 60;
1596 		}
1597 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1598 		if (nmp->nm_minorvers > 0) {
1599 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1600 			NFSLOCKCLSTATE();
1601 			clp->nfsc_renew = NFSCL_RENEW(lease);
1602 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1603 			clp->nfsc_clientidrev++;
1604 			if (clp->nfsc_clientidrev == 0)
1605 				clp->nfsc_clientidrev++;
1606 			NFSUNLOCKCLSTATE();
1607 			/*
1608 			 * Mount will succeed, so the renew thread can be
1609 			 * started now.
1610 			 */
1611 			nfscl_start_renewthread(clp);
1612 			nfscl_clientrelease(clp);
1613 		}
1614 		if (argp->flags & NFSMNT_NFSV3)
1615 			ncl_fsinfo(nmp, *vpp, cred, td);
1616 
1617 		/* Mark if the mount point supports NFSv4 ACLs. */
1618 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1619 		    ret == 0 &&
1620 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1621 			MNT_ILOCK(mp);
1622 			mp->mnt_flag |= MNT_NFS4ACLS;
1623 			MNT_IUNLOCK(mp);
1624 		}
1625 
1626 		/*
1627 		 * Lose the lock but keep the ref.
1628 		 */
1629 		NFSVOPUNLOCK(*vpp);
1630 		vfs_cache_root_set(mp, *vpp);
1631 		return (0);
1632 	}
1633 	error = EIO;
1634 
1635 bad:
1636 	if (clp != NULL)
1637 		nfscl_clientrelease(clp);
1638 	newnfs_disconnect(&nmp->nm_sockreq);
1639 	crfree(nmp->nm_sockreq.nr_cred);
1640 	if (nmp->nm_sockreq.nr_auth != NULL)
1641 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1642 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1643 	mtx_destroy(&nmp->nm_mtx);
1644 	if (nmp->nm_clp != NULL) {
1645 		NFSLOCKCLSTATE();
1646 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1647 		NFSUNLOCKCLSTATE();
1648 		free(nmp->nm_clp, M_NFSCLCLIENT);
1649 	}
1650 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1651 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1652 		    dsp->nfsclds_sockp != NULL)
1653 			newnfs_disconnect(dsp->nfsclds_sockp);
1654 		nfscl_freenfsclds(dsp);
1655 	}
1656 	free(nmp, M_NEWNFSMNT);
1657 	free(nam, M_SONAME);
1658 	return (error);
1659 }
1660 
1661 /*
1662  * unmount system call
1663  */
1664 static int
1665 nfs_unmount(struct mount *mp, int mntflags)
1666 {
1667 	struct thread *td;
1668 	struct nfsmount *nmp;
1669 	int error, flags = 0, i, trycnt = 0;
1670 	struct nfsclds *dsp, *tdsp;
1671 
1672 	td = curthread;
1673 
1674 	if (mntflags & MNT_FORCE)
1675 		flags |= FORCECLOSE;
1676 	nmp = VFSTONFS(mp);
1677 	error = 0;
1678 	/*
1679 	 * Goes something like this..
1680 	 * - Call vflush() to clear out vnodes for this filesystem
1681 	 * - Close the socket
1682 	 * - Free up the data structures
1683 	 */
1684 	/* In the forced case, cancel any outstanding requests. */
1685 	if (mntflags & MNT_FORCE) {
1686 		NFSDDSLOCK();
1687 		if (nfsv4_findmirror(nmp) != NULL)
1688 			error = ENXIO;
1689 		NFSDDSUNLOCK();
1690 		if (error)
1691 			goto out;
1692 		error = newnfs_nmcancelreqs(nmp);
1693 		if (error)
1694 			goto out;
1695 		/* For a forced close, get rid of the renew thread now */
1696 		nfscl_umount(nmp, td);
1697 	}
1698 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1699 	do {
1700 		error = vflush(mp, 1, flags, td);
1701 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1702 			(void) nfs_catnap(PSOCK, error, "newndm");
1703 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1704 	if (error)
1705 		goto out;
1706 
1707 	/*
1708 	 * We are now committed to the unmount.
1709 	 */
1710 	if ((mntflags & MNT_FORCE) == 0)
1711 		nfscl_umount(nmp, td);
1712 	else {
1713 		mtx_lock(&nmp->nm_mtx);
1714 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1715 		mtx_unlock(&nmp->nm_mtx);
1716 	}
1717 	/* Make sure no nfsiods are assigned to this mount. */
1718 	NFSLOCKIOD();
1719 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1720 		if (ncl_iodmount[i] == nmp) {
1721 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1722 			ncl_iodmount[i] = NULL;
1723 		}
1724 	NFSUNLOCKIOD();
1725 
1726 	/*
1727 	 * We can now set mnt_data to NULL and wait for
1728 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1729 	 */
1730 	mtx_lock(&mountlist_mtx);
1731 	mtx_lock(&nmp->nm_mtx);
1732 	mp->mnt_data = NULL;
1733 	mtx_unlock(&mountlist_mtx);
1734 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1735 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1736 	mtx_unlock(&nmp->nm_mtx);
1737 
1738 	newnfs_disconnect(&nmp->nm_sockreq);
1739 	crfree(nmp->nm_sockreq.nr_cred);
1740 	free(nmp->nm_nam, M_SONAME);
1741 	if (nmp->nm_sockreq.nr_auth != NULL)
1742 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1743 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1744 	mtx_destroy(&nmp->nm_mtx);
1745 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1746 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1747 		    dsp->nfsclds_sockp != NULL)
1748 			newnfs_disconnect(dsp->nfsclds_sockp);
1749 		nfscl_freenfsclds(dsp);
1750 	}
1751 	free(nmp, M_NEWNFSMNT);
1752 out:
1753 	return (error);
1754 }
1755 
1756 /*
1757  * Return root of a filesystem
1758  */
1759 static int
1760 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1761 {
1762 	struct vnode *vp;
1763 	struct nfsmount *nmp;
1764 	struct nfsnode *np;
1765 	int error;
1766 
1767 	nmp = VFSTONFS(mp);
1768 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1769 	if (error)
1770 		return error;
1771 	vp = NFSTOV(np);
1772 	/*
1773 	 * Get transfer parameters and attributes for root vnode once.
1774 	 */
1775 	mtx_lock(&nmp->nm_mtx);
1776 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1777 		mtx_unlock(&nmp->nm_mtx);
1778 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1779 	} else
1780 		mtx_unlock(&nmp->nm_mtx);
1781 	if (vp->v_type == VNON)
1782 	    vp->v_type = VDIR;
1783 	vp->v_vflag |= VV_ROOT;
1784 	*vpp = vp;
1785 	return (0);
1786 }
1787 
1788 /*
1789  * Flush out the buffer cache
1790  */
1791 /* ARGSUSED */
1792 static int
1793 nfs_sync(struct mount *mp, int waitfor)
1794 {
1795 	struct vnode *vp, *mvp;
1796 	struct thread *td;
1797 	int error, allerror = 0;
1798 
1799 	td = curthread;
1800 
1801 	MNT_ILOCK(mp);
1802 	/*
1803 	 * If a forced dismount is in progress, return from here so that
1804 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1805 	 * calling VFS_UNMOUNT().
1806 	 */
1807 	if (NFSCL_FORCEDISM(mp)) {
1808 		MNT_IUNLOCK(mp);
1809 		return (EBADF);
1810 	}
1811 	MNT_IUNLOCK(mp);
1812 
1813 	/*
1814 	 * Force stale buffer cache information to be flushed.
1815 	 */
1816 loop:
1817 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1818 		/* XXX Racy bv_cnt check. */
1819 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1820 		    waitfor == MNT_LAZY) {
1821 			VI_UNLOCK(vp);
1822 			continue;
1823 		}
1824 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1825 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1826 			goto loop;
1827 		}
1828 		error = VOP_FSYNC(vp, waitfor, td);
1829 		if (error)
1830 			allerror = error;
1831 		NFSVOPUNLOCK(vp);
1832 		vrele(vp);
1833 	}
1834 	return (allerror);
1835 }
1836 
1837 static int
1838 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1839 {
1840 	struct nfsmount *nmp = VFSTONFS(mp);
1841 	struct vfsquery vq;
1842 	int error;
1843 
1844 	bzero(&vq, sizeof(vq));
1845 	switch (op) {
1846 #if 0
1847 	case VFS_CTL_NOLOCKS:
1848 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1849  		if (req->oldptr != NULL) {
1850  			error = SYSCTL_OUT(req, &val, sizeof(val));
1851  			if (error)
1852  				return (error);
1853  		}
1854  		if (req->newptr != NULL) {
1855  			error = SYSCTL_IN(req, &val, sizeof(val));
1856  			if (error)
1857  				return (error);
1858 			if (val)
1859 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1860 			else
1861 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1862  		}
1863 		break;
1864 #endif
1865 	case VFS_CTL_QUERY:
1866 		mtx_lock(&nmp->nm_mtx);
1867 		if (nmp->nm_state & NFSSTA_TIMEO)
1868 			vq.vq_flags |= VQ_NOTRESP;
1869 		mtx_unlock(&nmp->nm_mtx);
1870 #if 0
1871 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1872 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1873 			vq.vq_flags |= VQ_NOTRESPLOCK;
1874 #endif
1875 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1876 		break;
1877  	case VFS_CTL_TIMEO:
1878  		if (req->oldptr != NULL) {
1879  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1880  			    sizeof(nmp->nm_tprintf_initial_delay));
1881  			if (error)
1882  				return (error);
1883  		}
1884  		if (req->newptr != NULL) {
1885 			error = vfs_suser(mp, req->td);
1886 			if (error)
1887 				return (error);
1888  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1889  			    sizeof(nmp->nm_tprintf_initial_delay));
1890  			if (error)
1891  				return (error);
1892  			if (nmp->nm_tprintf_initial_delay < 0)
1893  				nmp->nm_tprintf_initial_delay = 0;
1894  		}
1895 		break;
1896 	default:
1897 		return (ENOTSUP);
1898 	}
1899 	return (0);
1900 }
1901 
1902 /*
1903  * Purge any RPCs in progress, so that they will all return errors.
1904  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1905  * forced dismount.
1906  */
1907 static void
1908 nfs_purge(struct mount *mp)
1909 {
1910 	struct nfsmount *nmp = VFSTONFS(mp);
1911 
1912 	newnfs_nmcancelreqs(nmp);
1913 }
1914 
1915 /*
1916  * Extract the information needed by the nlm from the nfs vnode.
1917  */
1918 static void
1919 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1920     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1921     struct timeval *timeop)
1922 {
1923 	struct nfsmount *nmp;
1924 	struct nfsnode *np = VTONFS(vp);
1925 
1926 	nmp = VFSTONFS(vp->v_mount);
1927 	if (fhlenp != NULL)
1928 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1929 	if (fhp != NULL)
1930 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1931 	if (sp != NULL)
1932 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1933 	if (is_v3p != NULL)
1934 		*is_v3p = NFS_ISV3(vp);
1935 	if (sizep != NULL)
1936 		*sizep = np->n_size;
1937 	if (timeop != NULL) {
1938 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1939 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1940 	}
1941 }
1942 
1943 /*
1944  * This function prints out an option name, based on the conditional
1945  * argument.
1946  */
1947 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1948     char *opt, char **buf, size_t *blen)
1949 {
1950 	int len;
1951 
1952 	if (testval != 0 && *blen > strlen(opt)) {
1953 		len = snprintf(*buf, *blen, "%s", opt);
1954 		if (len != strlen(opt))
1955 			printf("EEK!!\n");
1956 		*buf += len;
1957 		*blen -= len;
1958 	}
1959 }
1960 
1961 /*
1962  * This function printf out an options integer value.
1963  */
1964 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1965     char *opt, char **buf, size_t *blen)
1966 {
1967 	int len;
1968 
1969 	if (*blen > strlen(opt) + 1) {
1970 		/* Could result in truncated output string. */
1971 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1972 		if (len < *blen) {
1973 			*buf += len;
1974 			*blen -= len;
1975 		}
1976 	}
1977 }
1978 
1979 /*
1980  * Load the option flags and values into the buffer.
1981  */
1982 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1983 {
1984 	char *buf;
1985 	size_t blen;
1986 
1987 	buf = buffer;
1988 	blen = buflen;
1989 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1990 	    &blen);
1991 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1992 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1993 		    &blen);
1994 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1995 		    &buf, &blen);
1996 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1997 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1998 	}
1999 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
2000 	    &blen);
2001 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
2002 	    "nfsv2", &buf, &blen);
2003 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
2004 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
2005 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
2006 	    &buf, &blen);
2007 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
2008 	    &buf, &blen);
2009 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
2010 	    &blen);
2011 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2012 	    &blen);
2013 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2014 	    &blen);
2015 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2016 	    &blen);
2017 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2018 	    &blen);
2019 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2020 	    ",noncontigwr", &buf, &blen);
2021 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2022 	    0, ",lockd", &buf, &blen);
2023 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2024 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2025 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2026 	    &buf, &blen);
2027 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2028 	    &buf, &blen);
2029 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2030 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2031 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2032 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2033 	    &buf, &blen);
2034 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2035 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2036 	    &buf, &blen);
2037 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2038 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2039 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2040 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2041 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2042 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2043 	    &blen);
2044 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2045 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2046 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2047 	    &blen);
2048 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2049 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2050 	    &blen);
2051 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2052 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2053 }
2054 
2055