xref: /freebsd/sys/fs/nfsclient/nfs_clvfsops.c (revision 38069501)
1 /*-
2  * Copyright (c) 1989, 1993, 1995
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 
39 #include "opt_bootp.h"
40 #include "opt_nfsroot.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/bio.h>
46 #include <sys/buf.h>
47 #include <sys/clock.h>
48 #include <sys/jail.h>
49 #include <sys/limits.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/module.h>
54 #include <sys/mount.h>
55 #include <sys/proc.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/sockio.h>
59 #include <sys/sysctl.h>
60 #include <sys/vnode.h>
61 #include <sys/signalvar.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 #include <vm/uma.h>
66 
67 #include <net/if.h>
68 #include <net/route.h>
69 #include <netinet/in.h>
70 
71 #include <fs/nfs/nfsport.h>
72 #include <fs/nfsclient/nfsnode.h>
73 #include <fs/nfsclient/nfsmount.h>
74 #include <fs/nfsclient/nfs.h>
75 #include <nfs/nfsdiskless.h>
76 
77 FEATURE(nfscl, "NFSv4 client");
78 
79 extern int nfscl_ticks;
80 extern struct timeval nfsboottime;
81 extern int nfsrv_useacl;
82 extern int nfscl_debuglevel;
83 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
84 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
85 extern struct mtx ncl_iod_mutex;
86 NFSCLSTATEMUTEX;
87 
88 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
89 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
90 
91 SYSCTL_DECL(_vfs_nfs);
92 static int nfs_ip_paranoia = 1;
93 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
94     &nfs_ip_paranoia, 0, "");
95 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
96 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
97         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
98 /* how long between console messages "nfs server foo not responding" */
99 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
100 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
101         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
102 #ifdef NFS_DEBUG
103 int nfs_debug;
104 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
105     "Toggle debug flag");
106 #endif
107 
108 static int	nfs_mountroot(struct mount *);
109 static void	nfs_sec_name(char *, int *);
110 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
111 		    struct nfs_args *argp, const char *, struct ucred *,
112 		    struct thread *);
113 static int	mountnfs(struct nfs_args *, struct mount *,
114 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
115 		    u_char *, int, struct vnode **, struct ucred *,
116 		    struct thread *, int, int, int);
117 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
118 		    struct sockaddr_storage *, int *, off_t *,
119 		    struct timeval *);
120 static vfs_mount_t nfs_mount;
121 static vfs_cmount_t nfs_cmount;
122 static vfs_unmount_t nfs_unmount;
123 static vfs_root_t nfs_root;
124 static vfs_statfs_t nfs_statfs;
125 static vfs_sync_t nfs_sync;
126 static vfs_sysctl_t nfs_sysctl;
127 static vfs_purge_t nfs_purge;
128 
129 /*
130  * nfs vfs operations.
131  */
132 static struct vfsops nfs_vfsops = {
133 	.vfs_init =		ncl_init,
134 	.vfs_mount =		nfs_mount,
135 	.vfs_cmount =		nfs_cmount,
136 	.vfs_root =		nfs_root,
137 	.vfs_statfs =		nfs_statfs,
138 	.vfs_sync =		nfs_sync,
139 	.vfs_uninit =		ncl_uninit,
140 	.vfs_unmount =		nfs_unmount,
141 	.vfs_sysctl =		nfs_sysctl,
142 	.vfs_purge =		nfs_purge,
143 };
144 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
145 
146 /* So that loader and kldload(2) can find us, wherever we are.. */
147 MODULE_VERSION(nfs, 1);
148 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
149 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
150 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
151 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
152 
153 /*
154  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
155  * can be shared by both NFS clients. It is declared here so that it
156  * will be defined for kernels built without NFS_ROOT, although it
157  * isn't used in that case.
158  */
159 #if !defined(NFS_ROOT)
160 struct nfs_diskless	nfs_diskless = { { { 0 } } };
161 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
162 int			nfs_diskless_valid = 0;
163 #endif
164 
165 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
166     &nfs_diskless_valid, 0,
167     "Has the diskless struct been filled correctly");
168 
169 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
170     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
171 
172 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
173     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
174     "%Ssockaddr_in", "Diskless root nfs address");
175 
176 
177 void		newnfsargs_ntoh(struct nfs_args *);
178 static int	nfs_mountdiskless(char *,
179 		    struct sockaddr_in *, struct nfs_args *,
180 		    struct thread *, struct vnode **, struct mount *);
181 static void	nfs_convert_diskless(void);
182 static void	nfs_convert_oargs(struct nfs_args *args,
183 		    struct onfs_args *oargs);
184 
185 int
186 newnfs_iosize(struct nfsmount *nmp)
187 {
188 	int iosize, maxio;
189 
190 	/* First, set the upper limit for iosize */
191 	if (nmp->nm_flag & NFSMNT_NFSV4) {
192 		maxio = NFS_MAXBSIZE;
193 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
194 		if (nmp->nm_sotype == SOCK_DGRAM)
195 			maxio = NFS_MAXDGRAMDATA;
196 		else
197 			maxio = NFS_MAXBSIZE;
198 	} else {
199 		maxio = NFS_V2MAXDATA;
200 	}
201 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
202 		nmp->nm_rsize = maxio;
203 	if (nmp->nm_rsize > NFS_MAXBSIZE)
204 		nmp->nm_rsize = NFS_MAXBSIZE;
205 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
206 		nmp->nm_readdirsize = maxio;
207 	if (nmp->nm_readdirsize > nmp->nm_rsize)
208 		nmp->nm_readdirsize = nmp->nm_rsize;
209 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
210 		nmp->nm_wsize = maxio;
211 	if (nmp->nm_wsize > NFS_MAXBSIZE)
212 		nmp->nm_wsize = NFS_MAXBSIZE;
213 
214 	/*
215 	 * Calculate the size used for io buffers.  Use the larger
216 	 * of the two sizes to minimise nfs requests but make sure
217 	 * that it is at least one VM page to avoid wasting buffer
218 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
219 	 * that is the buffer size used for directories.
220 	 */
221 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
222 	iosize = imax(iosize, PAGE_SIZE);
223 	iosize = imax(iosize, NFS_DIRBLKSIZ);
224 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
225 	return (iosize);
226 }
227 
228 static void
229 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
230 {
231 
232 	args->version = NFS_ARGSVERSION;
233 	args->addr = oargs->addr;
234 	args->addrlen = oargs->addrlen;
235 	args->sotype = oargs->sotype;
236 	args->proto = oargs->proto;
237 	args->fh = oargs->fh;
238 	args->fhsize = oargs->fhsize;
239 	args->flags = oargs->flags;
240 	args->wsize = oargs->wsize;
241 	args->rsize = oargs->rsize;
242 	args->readdirsize = oargs->readdirsize;
243 	args->timeo = oargs->timeo;
244 	args->retrans = oargs->retrans;
245 	args->readahead = oargs->readahead;
246 	args->hostname = oargs->hostname;
247 }
248 
249 static void
250 nfs_convert_diskless(void)
251 {
252 
253 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
254 		sizeof(struct ifaliasreq));
255 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
256 		sizeof(struct sockaddr_in));
257 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
258 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
259 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
260 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
261 	} else {
262 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
264 	}
265 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
266 		sizeof(struct sockaddr_in));
267 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
268 	nfsv3_diskless.root_time = nfs_diskless.root_time;
269 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
270 		MAXHOSTNAMELEN);
271 	nfs_diskless_valid = 3;
272 }
273 
274 /*
275  * nfs statfs call
276  */
277 static int
278 nfs_statfs(struct mount *mp, struct statfs *sbp)
279 {
280 	struct vnode *vp;
281 	struct thread *td;
282 	struct nfsmount *nmp = VFSTONFS(mp);
283 	struct nfsvattr nfsva;
284 	struct nfsfsinfo fs;
285 	struct nfsstatfs sb;
286 	int error = 0, attrflag, gotfsinfo = 0, ret;
287 	struct nfsnode *np;
288 
289 	td = curthread;
290 
291 	error = vfs_busy(mp, MBF_NOWAIT);
292 	if (error)
293 		return (error);
294 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
295 	if (error) {
296 		vfs_unbusy(mp);
297 		return (error);
298 	}
299 	vp = NFSTOV(np);
300 	mtx_lock(&nmp->nm_mtx);
301 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
302 		mtx_unlock(&nmp->nm_mtx);
303 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
304 		    &attrflag, NULL);
305 		if (!error)
306 			gotfsinfo = 1;
307 	} else
308 		mtx_unlock(&nmp->nm_mtx);
309 	if (!error)
310 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
311 		    &attrflag, NULL);
312 	if (error != 0)
313 		NFSCL_DEBUG(2, "statfs=%d\n", error);
314 	if (attrflag == 0) {
315 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
316 		    td->td_ucred, td, &nfsva, NULL, NULL);
317 		if (ret) {
318 			/*
319 			 * Just set default values to get things going.
320 			 */
321 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
322 			nfsva.na_vattr.va_type = VDIR;
323 			nfsva.na_vattr.va_mode = 0777;
324 			nfsva.na_vattr.va_nlink = 100;
325 			nfsva.na_vattr.va_uid = (uid_t)0;
326 			nfsva.na_vattr.va_gid = (gid_t)0;
327 			nfsva.na_vattr.va_fileid = 2;
328 			nfsva.na_vattr.va_gen = 1;
329 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
330 			nfsva.na_vattr.va_size = 512 * 1024;
331 		}
332 	}
333 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
334 	if (!error) {
335 	    mtx_lock(&nmp->nm_mtx);
336 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
337 		nfscl_loadfsinfo(nmp, &fs);
338 	    nfscl_loadsbinfo(nmp, &sb, sbp);
339 	    sbp->f_iosize = newnfs_iosize(nmp);
340 	    mtx_unlock(&nmp->nm_mtx);
341 	    if (sbp != &mp->mnt_stat) {
342 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
343 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
344 	    }
345 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
346 	} else if (NFS_ISV4(vp)) {
347 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
348 	}
349 	vput(vp);
350 	vfs_unbusy(mp);
351 	return (error);
352 }
353 
354 /*
355  * nfs version 3 fsinfo rpc call
356  */
357 int
358 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
359     struct thread *td)
360 {
361 	struct nfsfsinfo fs;
362 	struct nfsvattr nfsva;
363 	int error, attrflag;
364 
365 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
366 	if (!error) {
367 		if (attrflag)
368 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
369 			    1);
370 		mtx_lock(&nmp->nm_mtx);
371 		nfscl_loadfsinfo(nmp, &fs);
372 		mtx_unlock(&nmp->nm_mtx);
373 	}
374 	return (error);
375 }
376 
377 /*
378  * Mount a remote root fs via. nfs. This depends on the info in the
379  * nfs_diskless structure that has been filled in properly by some primary
380  * bootstrap.
381  * It goes something like this:
382  * - do enough of "ifconfig" by calling ifioctl() so that the system
383  *   can talk to the server
384  * - If nfs_diskless.mygateway is filled in, use that address as
385  *   a default gateway.
386  * - build the rootfs mount point and call mountnfs() to do the rest.
387  *
388  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
389  * structure, as well as other global NFS client variables here, as
390  * nfs_mountroot() will be called once in the boot before any other NFS
391  * client activity occurs.
392  */
393 static int
394 nfs_mountroot(struct mount *mp)
395 {
396 	struct thread *td = curthread;
397 	struct nfsv3_diskless *nd = &nfsv3_diskless;
398 	struct socket *so;
399 	struct vnode *vp;
400 	struct ifreq ir;
401 	int error;
402 	u_long l;
403 	char buf[128];
404 	char *cp;
405 
406 #if defined(BOOTP_NFSROOT) && defined(BOOTP)
407 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
408 #elif defined(NFS_ROOT)
409 	nfs_setup_diskless();
410 #endif
411 
412 	if (nfs_diskless_valid == 0)
413 		return (-1);
414 	if (nfs_diskless_valid == 1)
415 		nfs_convert_diskless();
416 
417 	/*
418 	 * Do enough of ifconfig(8) so that the critical net interface can
419 	 * talk to the server.
420 	 */
421 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
422 	    td->td_ucred, td);
423 	if (error)
424 		panic("nfs_mountroot: socreate(%04x): %d",
425 			nd->myif.ifra_addr.sa_family, error);
426 
427 #if 0 /* XXX Bad idea */
428 	/*
429 	 * We might not have been told the right interface, so we pass
430 	 * over the first ten interfaces of the same kind, until we get
431 	 * one of them configured.
432 	 */
433 
434 	for (i = strlen(nd->myif.ifra_name) - 1;
435 		nd->myif.ifra_name[i] >= '0' &&
436 		nd->myif.ifra_name[i] <= '9';
437 		nd->myif.ifra_name[i] ++) {
438 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
439 		if(!error)
440 			break;
441 	}
442 #endif
443 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
444 	if (error)
445 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
446 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
447 		ir.ifr_mtu = strtol(cp, NULL, 10);
448 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
449 		freeenv(cp);
450 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
451 		if (error)
452 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
453 	}
454 	soclose(so);
455 
456 	/*
457 	 * If the gateway field is filled in, set it as the default route.
458 	 * Note that pxeboot will set a default route of 0 if the route
459 	 * is not set by the DHCP server.  Check also for a value of 0
460 	 * to avoid panicking inappropriately in that situation.
461 	 */
462 	if (nd->mygateway.sin_len != 0 &&
463 	    nd->mygateway.sin_addr.s_addr != 0) {
464 		struct sockaddr_in mask, sin;
465 
466 		bzero((caddr_t)&mask, sizeof(mask));
467 		sin = mask;
468 		sin.sin_family = AF_INET;
469 		sin.sin_len = sizeof(sin);
470                 /* XXX MRT use table 0 for this sort of thing */
471 		CURVNET_SET(TD_TO_VNET(td));
472 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
473 		    (struct sockaddr *)&nd->mygateway,
474 		    (struct sockaddr *)&mask,
475 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
476 		CURVNET_RESTORE();
477 		if (error)
478 			panic("nfs_mountroot: RTM_ADD: %d", error);
479 	}
480 
481 	/*
482 	 * Create the rootfs mount point.
483 	 */
484 	nd->root_args.fh = nd->root_fh;
485 	nd->root_args.fhsize = nd->root_fhsize;
486 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
487 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
488 		(l >> 24) & 0xff, (l >> 16) & 0xff,
489 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
490 	printf("NFS ROOT: %s\n", buf);
491 	nd->root_args.hostname = buf;
492 	if ((error = nfs_mountdiskless(buf,
493 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
494 		return (error);
495 	}
496 
497 	/*
498 	 * This is not really an nfs issue, but it is much easier to
499 	 * set hostname here and then let the "/etc/rc.xxx" files
500 	 * mount the right /var based upon its preset value.
501 	 */
502 	mtx_lock(&prison0.pr_mtx);
503 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
504 	    sizeof(prison0.pr_hostname));
505 	mtx_unlock(&prison0.pr_mtx);
506 	inittodr(ntohl(nd->root_time));
507 	return (0);
508 }
509 
510 /*
511  * Internal version of mount system call for diskless setup.
512  */
513 static int
514 nfs_mountdiskless(char *path,
515     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
516     struct vnode **vpp, struct mount *mp)
517 {
518 	struct sockaddr *nam;
519 	int dirlen, error;
520 	char *dirpath;
521 
522 	/*
523 	 * Find the directory path in "path", which also has the server's
524 	 * name/ip address in it.
525 	 */
526 	dirpath = strchr(path, ':');
527 	if (dirpath != NULL)
528 		dirlen = strlen(++dirpath);
529 	else
530 		dirlen = 0;
531 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
532 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
533 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
534 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
535 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
536 		return (error);
537 	}
538 	return (0);
539 }
540 
541 static void
542 nfs_sec_name(char *sec, int *flagsp)
543 {
544 	if (!strcmp(sec, "krb5"))
545 		*flagsp |= NFSMNT_KERB;
546 	else if (!strcmp(sec, "krb5i"))
547 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
548 	else if (!strcmp(sec, "krb5p"))
549 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
550 }
551 
552 static void
553 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
554     const char *hostname, struct ucred *cred, struct thread *td)
555 {
556 	int adjsock;
557 	char *p;
558 
559 	/*
560 	 * Set read-only flag if requested; otherwise, clear it if this is
561 	 * an update.  If this is not an update, then either the read-only
562 	 * flag is already clear, or this is a root mount and it was set
563 	 * intentionally at some previous point.
564 	 */
565 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
566 		MNT_ILOCK(mp);
567 		mp->mnt_flag |= MNT_RDONLY;
568 		MNT_IUNLOCK(mp);
569 	} else if (mp->mnt_flag & MNT_UPDATE) {
570 		MNT_ILOCK(mp);
571 		mp->mnt_flag &= ~MNT_RDONLY;
572 		MNT_IUNLOCK(mp);
573 	}
574 
575 	/*
576 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
577 	 * no sense in that context.  Also, set up appropriate retransmit
578 	 * and soft timeout behavior.
579 	 */
580 	if (argp->sotype == SOCK_STREAM) {
581 		nmp->nm_flag &= ~NFSMNT_NOCONN;
582 		nmp->nm_timeo = NFS_MAXTIMEO;
583 		if ((argp->flags & NFSMNT_NFSV4) != 0)
584 			nmp->nm_retry = INT_MAX;
585 		else
586 			nmp->nm_retry = NFS_RETRANS_TCP;
587 	}
588 
589 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
590 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
591 		argp->flags &= ~NFSMNT_RDIRPLUS;
592 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
593 	}
594 
595 	/* Clear ONEOPENOWN for NFSv2, 3 and 4.0. */
596 	if (nmp->nm_minorvers == 0) {
597 		argp->flags &= ~NFSMNT_ONEOPENOWN;
598 		nmp->nm_flag &= ~NFSMNT_ONEOPENOWN;
599 	}
600 
601 	/* Re-bind if rsrvd port requested and wasn't on one */
602 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
603 		  && (argp->flags & NFSMNT_RESVPORT);
604 	/* Also re-bind if we're switching to/from a connected UDP socket */
605 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
606 		    (argp->flags & NFSMNT_NOCONN));
607 
608 	/* Update flags atomically.  Don't change the lock bits. */
609 	nmp->nm_flag = argp->flags | nmp->nm_flag;
610 
611 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
612 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
613 		if (nmp->nm_timeo < NFS_MINTIMEO)
614 			nmp->nm_timeo = NFS_MINTIMEO;
615 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
616 			nmp->nm_timeo = NFS_MAXTIMEO;
617 	}
618 
619 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
620 		nmp->nm_retry = argp->retrans;
621 		if (nmp->nm_retry > NFS_MAXREXMIT)
622 			nmp->nm_retry = NFS_MAXREXMIT;
623 	}
624 
625 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
626 		nmp->nm_wsize = argp->wsize;
627 		/*
628 		 * Clip at the power of 2 below the size. There is an
629 		 * issue (not isolated) that causes intermittent page
630 		 * faults if this is not done.
631 		 */
632 		if (nmp->nm_wsize > NFS_FABLKSIZE)
633 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
634 		else
635 			nmp->nm_wsize = NFS_FABLKSIZE;
636 	}
637 
638 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
639 		nmp->nm_rsize = argp->rsize;
640 		/*
641 		 * Clip at the power of 2 below the size. There is an
642 		 * issue (not isolated) that causes intermittent page
643 		 * faults if this is not done.
644 		 */
645 		if (nmp->nm_rsize > NFS_FABLKSIZE)
646 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
647 		else
648 			nmp->nm_rsize = NFS_FABLKSIZE;
649 	}
650 
651 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
652 		nmp->nm_readdirsize = argp->readdirsize;
653 	}
654 
655 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
656 		nmp->nm_acregmin = argp->acregmin;
657 	else
658 		nmp->nm_acregmin = NFS_MINATTRTIMO;
659 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
660 		nmp->nm_acregmax = argp->acregmax;
661 	else
662 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
663 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
664 		nmp->nm_acdirmin = argp->acdirmin;
665 	else
666 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
667 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
668 		nmp->nm_acdirmax = argp->acdirmax;
669 	else
670 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
671 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
672 		nmp->nm_acdirmin = nmp->nm_acdirmax;
673 	if (nmp->nm_acregmin > nmp->nm_acregmax)
674 		nmp->nm_acregmin = nmp->nm_acregmax;
675 
676 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
677 		if (argp->readahead <= NFS_MAXRAHEAD)
678 			nmp->nm_readahead = argp->readahead;
679 		else
680 			nmp->nm_readahead = NFS_MAXRAHEAD;
681 	}
682 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
683 		if (argp->wcommitsize < nmp->nm_wsize)
684 			nmp->nm_wcommitsize = nmp->nm_wsize;
685 		else
686 			nmp->nm_wcommitsize = argp->wcommitsize;
687 	}
688 
689 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
690 		    (nmp->nm_soproto != argp->proto));
691 
692 	if (nmp->nm_client != NULL && adjsock) {
693 		int haslock = 0, error = 0;
694 
695 		if (nmp->nm_sotype == SOCK_STREAM) {
696 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
697 			if (!error)
698 				haslock = 1;
699 		}
700 		if (!error) {
701 		    newnfs_disconnect(&nmp->nm_sockreq);
702 		    if (haslock)
703 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
704 		    nmp->nm_sotype = argp->sotype;
705 		    nmp->nm_soproto = argp->proto;
706 		    if (nmp->nm_sotype == SOCK_DGRAM)
707 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
708 			    cred, td, 0)) {
709 				printf("newnfs_args: retrying connect\n");
710 				(void) nfs_catnap(PSOCK, 0, "nfscon");
711 			}
712 		}
713 	} else {
714 		nmp->nm_sotype = argp->sotype;
715 		nmp->nm_soproto = argp->proto;
716 	}
717 
718 	if (hostname != NULL) {
719 		strlcpy(nmp->nm_hostname, hostname,
720 		    sizeof(nmp->nm_hostname));
721 		p = strchr(nmp->nm_hostname, ':');
722 		if (p != NULL)
723 			*p = '\0';
724 	}
725 }
726 
727 static const char *nfs_opts[] = { "from", "nfs_args",
728     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
729     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
730     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
731     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
732     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
733     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
734     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
735     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
736     "pnfs", "wcommitsize", "oneopenown",
737     NULL };
738 
739 /*
740  * Parse the "from" mountarg, passed by the generic mount(8) program
741  * or the mountroot code.  This is used when rerooting into NFS.
742  *
743  * Note that the "hostname" is actually a "hostname:/share/path" string.
744  */
745 static int
746 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
747     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
748 {
749 	char *nam, *delimp, *hostp, *spec;
750 	int error, have_bracket = 0, offset, rv, speclen;
751 	struct sockaddr_in *sin;
752 	size_t len;
753 
754 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
755 	if (error != 0)
756 		return (error);
757 	nam = malloc(MNAMELEN + 1, M_TEMP, M_WAITOK);
758 
759 	/*
760 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
761 	 */
762 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
763 	    *(delimp + 1) == ':') {
764 		hostp = spec + 1;
765 		spec = delimp + 2;
766 		have_bracket = 1;
767 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
768 		hostp = spec;
769 		spec = delimp + 1;
770 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
771 		printf("%s: path@server syntax is deprecated, "
772 		    "use server:path\n", __func__);
773 		hostp = delimp + 1;
774 	} else {
775 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
776 		free(nam, M_TEMP);
777 		return (EINVAL);
778 	}
779 	*delimp = '\0';
780 
781 	/*
782 	 * If there has been a trailing slash at mounttime it seems
783 	 * that some mountd implementations fail to remove the mount
784 	 * entries from their mountlist while unmounting.
785 	 */
786 	for (speclen = strlen(spec);
787 	    speclen > 1 && spec[speclen - 1] == '/';
788 	    speclen--)
789 		spec[speclen - 1] = '\0';
790 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
791 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
792 		free(nam, M_TEMP);
793 		return (EINVAL);
794 	}
795 	/* Make both '@' and ':' notations equal */
796 	if (*hostp != '\0') {
797 		len = strlen(hostp);
798 		offset = 0;
799 		if (have_bracket)
800 			nam[offset++] = '[';
801 		memmove(nam + offset, hostp, len);
802 		if (have_bracket)
803 			nam[len + offset++] = ']';
804 		nam[len + offset++] = ':';
805 		memmove(nam + len + offset, spec, speclen);
806 		nam[len + speclen + offset] = '\0';
807 	} else
808 		nam[0] = '\0';
809 
810 	/*
811 	 * XXX: IPv6
812 	 */
813 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
814 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
815 	if (rv != 1) {
816 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
817 		    __func__, hostp, rv);
818 		free(nam, M_TEMP);
819 		free(sin, M_SONAME);
820 		return (EINVAL);
821 	}
822 
823 	sin->sin_len = sizeof(*sin);
824 	sin->sin_family = AF_INET;
825 	/*
826 	 * XXX: hardcoded port number.
827 	 */
828 	sin->sin_port = htons(2049);
829 
830 	*hostnamep = strdup(nam, M_NEWNFSMNT);
831 	*sinp = sin;
832 	strlcpy(dirpath, spec, dirpathsize);
833 	*dirlenp = strlen(dirpath);
834 
835 	free(nam, M_TEMP);
836 	return (0);
837 }
838 
839 /*
840  * VFS Operations.
841  *
842  * mount system call
843  * It seems a bit dumb to copyinstr() the host and path here and then
844  * bcopy() them in mountnfs(), but I wanted to detect errors before
845  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
846  * an error after that means that I have to release the mbuf.
847  */
848 /* ARGSUSED */
849 static int
850 nfs_mount(struct mount *mp)
851 {
852 	struct nfs_args args = {
853 	    .version = NFS_ARGSVERSION,
854 	    .addr = NULL,
855 	    .addrlen = sizeof (struct sockaddr_in),
856 	    .sotype = SOCK_STREAM,
857 	    .proto = 0,
858 	    .fh = NULL,
859 	    .fhsize = 0,
860 	    .flags = NFSMNT_RESVPORT,
861 	    .wsize = NFS_WSIZE,
862 	    .rsize = NFS_RSIZE,
863 	    .readdirsize = NFS_READDIRSIZE,
864 	    .timeo = 10,
865 	    .retrans = NFS_RETRANS,
866 	    .readahead = NFS_DEFRAHEAD,
867 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
868 	    .hostname = NULL,
869 	    .acregmin = NFS_MINATTRTIMO,
870 	    .acregmax = NFS_MAXATTRTIMO,
871 	    .acdirmin = NFS_MINDIRATTRTIMO,
872 	    .acdirmax = NFS_MAXDIRATTRTIMO,
873 	};
874 	int error = 0, ret, len;
875 	struct sockaddr *nam = NULL;
876 	struct vnode *vp;
877 	struct thread *td;
878 	char *hst;
879 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
880 	char *cp, *opt, *name, *secname;
881 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
882 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
883 	int minvers = 0;
884 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
885 	    krbnamelen, srvkrbnamelen;
886 	size_t hstlen;
887 
888 	has_nfs_args_opt = 0;
889 	has_nfs_from_opt = 0;
890 	hst = malloc(MNAMELEN, M_TEMP, M_WAITOK);
891 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
892 		error = EINVAL;
893 		goto out;
894 	}
895 
896 	td = curthread;
897 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
898 	    nfs_diskless_valid != 0) {
899 		error = nfs_mountroot(mp);
900 		goto out;
901 	}
902 
903 	nfscl_init();
904 
905 	/*
906 	 * The old mount_nfs program passed the struct nfs_args
907 	 * from userspace to kernel.  The new mount_nfs program
908 	 * passes string options via nmount() from userspace to kernel
909 	 * and we populate the struct nfs_args in the kernel.
910 	 */
911 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
912 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
913 		    sizeof(args));
914 		if (error != 0)
915 			goto out;
916 
917 		if (args.version != NFS_ARGSVERSION) {
918 			error = EPROGMISMATCH;
919 			goto out;
920 		}
921 		has_nfs_args_opt = 1;
922 	}
923 
924 	/* Handle the new style options. */
925 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
926 		args.acdirmin = args.acdirmax =
927 		    args.acregmin = args.acregmax = 0;
928 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
929 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
930 	}
931 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
932 		args.flags |= NFSMNT_NOCONN;
933 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
934 		args.flags &= ~NFSMNT_NOCONN;
935 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
936 		args.flags |= NFSMNT_NOLOCKD;
937 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
938 		args.flags &= ~NFSMNT_NOLOCKD;
939 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
940 		args.flags |= NFSMNT_INT;
941 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
942 		args.flags |= NFSMNT_RDIRPLUS;
943 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
944 		args.flags |= NFSMNT_RESVPORT;
945 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
946 		args.flags &= ~NFSMNT_RESVPORT;
947 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
948 		args.flags |= NFSMNT_SOFT;
949 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
950 		args.flags &= ~NFSMNT_SOFT;
951 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
952 		args.sotype = SOCK_DGRAM;
953 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
954 		args.sotype = SOCK_DGRAM;
955 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
956 		args.sotype = SOCK_STREAM;
957 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
958 		args.flags |= NFSMNT_NFSV3;
959 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
960 		args.flags |= NFSMNT_NFSV4;
961 		args.sotype = SOCK_STREAM;
962 	}
963 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
964 		args.flags |= NFSMNT_ALLGSSNAME;
965 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
966 		args.flags |= NFSMNT_NOCTO;
967 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
968 		args.flags |= NFSMNT_NONCONTIGWR;
969 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
970 		args.flags |= NFSMNT_PNFS;
971 	if (vfs_getopt(mp->mnt_optnew, "oneopenown", NULL, NULL) == 0)
972 		args.flags |= NFSMNT_ONEOPENOWN;
973 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
974 		if (opt == NULL) {
975 			vfs_mount_error(mp, "illegal readdirsize");
976 			error = EINVAL;
977 			goto out;
978 		}
979 		ret = sscanf(opt, "%d", &args.readdirsize);
980 		if (ret != 1 || args.readdirsize <= 0) {
981 			vfs_mount_error(mp, "illegal readdirsize: %s",
982 			    opt);
983 			error = EINVAL;
984 			goto out;
985 		}
986 		args.flags |= NFSMNT_READDIRSIZE;
987 	}
988 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
989 		if (opt == NULL) {
990 			vfs_mount_error(mp, "illegal readahead");
991 			error = EINVAL;
992 			goto out;
993 		}
994 		ret = sscanf(opt, "%d", &args.readahead);
995 		if (ret != 1 || args.readahead <= 0) {
996 			vfs_mount_error(mp, "illegal readahead: %s",
997 			    opt);
998 			error = EINVAL;
999 			goto out;
1000 		}
1001 		args.flags |= NFSMNT_READAHEAD;
1002 	}
1003 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1004 		if (opt == NULL) {
1005 			vfs_mount_error(mp, "illegal wsize");
1006 			error = EINVAL;
1007 			goto out;
1008 		}
1009 		ret = sscanf(opt, "%d", &args.wsize);
1010 		if (ret != 1 || args.wsize <= 0) {
1011 			vfs_mount_error(mp, "illegal wsize: %s",
1012 			    opt);
1013 			error = EINVAL;
1014 			goto out;
1015 		}
1016 		args.flags |= NFSMNT_WSIZE;
1017 	}
1018 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1019 		if (opt == NULL) {
1020 			vfs_mount_error(mp, "illegal rsize");
1021 			error = EINVAL;
1022 			goto out;
1023 		}
1024 		ret = sscanf(opt, "%d", &args.rsize);
1025 		if (ret != 1 || args.rsize <= 0) {
1026 			vfs_mount_error(mp, "illegal wsize: %s",
1027 			    opt);
1028 			error = EINVAL;
1029 			goto out;
1030 		}
1031 		args.flags |= NFSMNT_RSIZE;
1032 	}
1033 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1034 		if (opt == NULL) {
1035 			vfs_mount_error(mp, "illegal retrans");
1036 			error = EINVAL;
1037 			goto out;
1038 		}
1039 		ret = sscanf(opt, "%d", &args.retrans);
1040 		if (ret != 1 || args.retrans <= 0) {
1041 			vfs_mount_error(mp, "illegal retrans: %s",
1042 			    opt);
1043 			error = EINVAL;
1044 			goto out;
1045 		}
1046 		args.flags |= NFSMNT_RETRANS;
1047 	}
1048 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1049 		ret = sscanf(opt, "%d", &args.acregmin);
1050 		if (ret != 1 || args.acregmin < 0) {
1051 			vfs_mount_error(mp, "illegal actimeo: %s",
1052 			    opt);
1053 			error = EINVAL;
1054 			goto out;
1055 		}
1056 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1057 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1058 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1059 	}
1060 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1061 		ret = sscanf(opt, "%d", &args.acregmin);
1062 		if (ret != 1 || args.acregmin < 0) {
1063 			vfs_mount_error(mp, "illegal acregmin: %s",
1064 			    opt);
1065 			error = EINVAL;
1066 			goto out;
1067 		}
1068 		args.flags |= NFSMNT_ACREGMIN;
1069 	}
1070 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1071 		ret = sscanf(opt, "%d", &args.acregmax);
1072 		if (ret != 1 || args.acregmax < 0) {
1073 			vfs_mount_error(mp, "illegal acregmax: %s",
1074 			    opt);
1075 			error = EINVAL;
1076 			goto out;
1077 		}
1078 		args.flags |= NFSMNT_ACREGMAX;
1079 	}
1080 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1081 		ret = sscanf(opt, "%d", &args.acdirmin);
1082 		if (ret != 1 || args.acdirmin < 0) {
1083 			vfs_mount_error(mp, "illegal acdirmin: %s",
1084 			    opt);
1085 			error = EINVAL;
1086 			goto out;
1087 		}
1088 		args.flags |= NFSMNT_ACDIRMIN;
1089 	}
1090 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1091 		ret = sscanf(opt, "%d", &args.acdirmax);
1092 		if (ret != 1 || args.acdirmax < 0) {
1093 			vfs_mount_error(mp, "illegal acdirmax: %s",
1094 			    opt);
1095 			error = EINVAL;
1096 			goto out;
1097 		}
1098 		args.flags |= NFSMNT_ACDIRMAX;
1099 	}
1100 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1101 		ret = sscanf(opt, "%d", &args.wcommitsize);
1102 		if (ret != 1 || args.wcommitsize < 0) {
1103 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1104 			error = EINVAL;
1105 			goto out;
1106 		}
1107 		args.flags |= NFSMNT_WCOMMITSIZE;
1108 	}
1109 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1110 		ret = sscanf(opt, "%d", &args.timeo);
1111 		if (ret != 1 || args.timeo <= 0) {
1112 			vfs_mount_error(mp, "illegal timeo: %s",
1113 			    opt);
1114 			error = EINVAL;
1115 			goto out;
1116 		}
1117 		args.flags |= NFSMNT_TIMEO;
1118 	}
1119 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1120 		ret = sscanf(opt, "%d", &args.timeo);
1121 		if (ret != 1 || args.timeo <= 0) {
1122 			vfs_mount_error(mp, "illegal timeout: %s",
1123 			    opt);
1124 			error = EINVAL;
1125 			goto out;
1126 		}
1127 		args.flags |= NFSMNT_TIMEO;
1128 	}
1129 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1130 		ret = sscanf(opt, "%d", &nametimeo);
1131 		if (ret != 1 || nametimeo < 0) {
1132 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1133 			error = EINVAL;
1134 			goto out;
1135 		}
1136 	}
1137 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1138 	    == 0) {
1139 		ret = sscanf(opt, "%d", &negnametimeo);
1140 		if (ret != 1 || negnametimeo < 0) {
1141 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1142 			    opt);
1143 			error = EINVAL;
1144 			goto out;
1145 		}
1146 	}
1147 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1148 	    0) {
1149 		ret = sscanf(opt, "%d", &minvers);
1150 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1151 		    (args.flags & NFSMNT_NFSV4) == 0) {
1152 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1153 			error = EINVAL;
1154 			goto out;
1155 		}
1156 	}
1157 	if (vfs_getopt(mp->mnt_optnew, "sec",
1158 		(void **) &secname, NULL) == 0)
1159 		nfs_sec_name(secname, &args.flags);
1160 
1161 	if (mp->mnt_flag & MNT_UPDATE) {
1162 		struct nfsmount *nmp = VFSTONFS(mp);
1163 
1164 		if (nmp == NULL) {
1165 			error = EIO;
1166 			goto out;
1167 		}
1168 
1169 		/*
1170 		 * If a change from TCP->UDP is done and there are thread(s)
1171 		 * that have I/O RPC(s) in progress with a transfer size
1172 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1173 		 * hung, retrying the RPC(s) forever. Usually these threads
1174 		 * will be seen doing an uninterruptible sleep on wait channel
1175 		 * "nfsreq".
1176 		 */
1177 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1178 			tprintf(td->td_proc, LOG_WARNING,
1179 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1180 
1181 		/*
1182 		 * When doing an update, we can't change version,
1183 		 * security, switch lockd strategies, change cookie
1184 		 * translation or switch oneopenown.
1185 		 */
1186 		args.flags = (args.flags &
1187 		    ~(NFSMNT_NFSV3 |
1188 		      NFSMNT_NFSV4 |
1189 		      NFSMNT_KERB |
1190 		      NFSMNT_INTEGRITY |
1191 		      NFSMNT_PRIVACY |
1192 		      NFSMNT_ONEOPENOWN |
1193 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1194 		    (nmp->nm_flag &
1195 			(NFSMNT_NFSV3 |
1196 			 NFSMNT_NFSV4 |
1197 			 NFSMNT_KERB |
1198 			 NFSMNT_INTEGRITY |
1199 			 NFSMNT_PRIVACY |
1200 			 NFSMNT_ONEOPENOWN |
1201 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1202 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1203 		goto out;
1204 	}
1205 
1206 	/*
1207 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1208 	 * or no-connection mode for those protocols that support
1209 	 * no-connection mode (the flag will be cleared later for protocols
1210 	 * that do not support no-connection mode).  This will allow a client
1211 	 * to receive replies from a different IP then the request was
1212 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1213 	 * not 0.
1214 	 */
1215 	if (nfs_ip_paranoia == 0)
1216 		args.flags |= NFSMNT_NOCONN;
1217 
1218 	if (has_nfs_args_opt != 0) {
1219 		/*
1220 		 * In the 'nfs_args' case, the pointers in the args
1221 		 * structure are in userland - we copy them in here.
1222 		 */
1223 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1224 			vfs_mount_error(mp, "Bad file handle");
1225 			error = EINVAL;
1226 			goto out;
1227 		}
1228 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1229 		    args.fhsize);
1230 		if (error != 0)
1231 			goto out;
1232 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1233 		if (error != 0)
1234 			goto out;
1235 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1236 		args.hostname = hst;
1237 		/* getsockaddr() call must be after above copyin() calls */
1238 		error = getsockaddr(&nam, (caddr_t)args.addr,
1239 		    args.addrlen);
1240 		if (error != 0)
1241 			goto out;
1242 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1243 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1244 	    sizeof(dirpath), &dirlen) == 0) {
1245 		has_nfs_from_opt = 1;
1246 		bcopy(args.hostname, hst, MNAMELEN);
1247 		hst[MNAMELEN - 1] = '\0';
1248 
1249 		/*
1250 		 * This only works with NFSv4 for now.
1251 		 */
1252 		args.fhsize = 0;
1253 		args.flags |= NFSMNT_NFSV4;
1254 		args.sotype = SOCK_STREAM;
1255 	} else {
1256 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1257 		    &args.fhsize) == 0) {
1258 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1259 				vfs_mount_error(mp, "Bad file handle");
1260 				error = EINVAL;
1261 				goto out;
1262 			}
1263 			bcopy(args.fh, nfh, args.fhsize);
1264 		} else {
1265 			args.fhsize = 0;
1266 		}
1267 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1268 		    (void **)&args.hostname, &len);
1269 		if (args.hostname == NULL) {
1270 			vfs_mount_error(mp, "Invalid hostname");
1271 			error = EINVAL;
1272 			goto out;
1273 		}
1274 		if (len >= MNAMELEN) {
1275 			vfs_mount_error(mp, "Hostname too long");
1276 			error = EINVAL;
1277 			goto out;
1278 		}
1279 		bcopy(args.hostname, hst, len);
1280 		hst[len] = '\0';
1281 	}
1282 
1283 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1284 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1285 	else {
1286 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1287 		cp = strchr(srvkrbname, ':');
1288 		if (cp != NULL)
1289 			*cp = '\0';
1290 	}
1291 	srvkrbnamelen = strlen(srvkrbname);
1292 
1293 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1294 		strlcpy(krbname, name, sizeof (krbname));
1295 	else
1296 		krbname[0] = '\0';
1297 	krbnamelen = strlen(krbname);
1298 
1299 	if (has_nfs_from_opt == 0) {
1300 		if (vfs_getopt(mp->mnt_optnew,
1301 		    "dirpath", (void **)&name, NULL) == 0)
1302 			strlcpy(dirpath, name, sizeof (dirpath));
1303 		else
1304 			dirpath[0] = '\0';
1305 		dirlen = strlen(dirpath);
1306 	}
1307 
1308 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1309 		if (vfs_getopt(mp->mnt_optnew, "addr",
1310 		    (void **)&args.addr, &args.addrlen) == 0) {
1311 			if (args.addrlen > SOCK_MAXADDRLEN) {
1312 				error = ENAMETOOLONG;
1313 				goto out;
1314 			}
1315 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1316 			bcopy(args.addr, nam, args.addrlen);
1317 			nam->sa_len = args.addrlen;
1318 		} else {
1319 			vfs_mount_error(mp, "No server address");
1320 			error = EINVAL;
1321 			goto out;
1322 		}
1323 	}
1324 
1325 	args.fh = nfh;
1326 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1327 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1328 	    nametimeo, negnametimeo, minvers);
1329 out:
1330 	if (!error) {
1331 		MNT_ILOCK(mp);
1332 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1333 		    MNTK_USES_BCACHE;
1334 		if ((VFSTONFS(mp)->nm_flag & NFSMNT_NFSV4) != 0)
1335 			mp->mnt_kern_flag |= MNTK_NULL_NOCACHE;
1336 		MNT_IUNLOCK(mp);
1337 	}
1338 	free(hst, M_TEMP);
1339 	return (error);
1340 }
1341 
1342 
1343 /*
1344  * VFS Operations.
1345  *
1346  * mount system call
1347  * It seems a bit dumb to copyinstr() the host and path here and then
1348  * bcopy() them in mountnfs(), but I wanted to detect errors before
1349  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1350  * an error after that means that I have to release the mbuf.
1351  */
1352 /* ARGSUSED */
1353 static int
1354 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1355 {
1356 	int error;
1357 	struct nfs_args args;
1358 
1359 	error = copyin(data, &args, sizeof (struct nfs_args));
1360 	if (error)
1361 		return error;
1362 
1363 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1364 
1365 	error = kernel_mount(ma, flags);
1366 	return (error);
1367 }
1368 
1369 /*
1370  * Common code for mount and mountroot
1371  */
1372 static int
1373 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1374     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1375     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1376     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1377     int minvers)
1378 {
1379 	struct nfsmount *nmp;
1380 	struct nfsnode *np;
1381 	int error, trycnt, ret;
1382 	struct nfsvattr nfsva;
1383 	struct nfsclclient *clp;
1384 	struct nfsclds *dsp, *tdsp;
1385 	uint32_t lease;
1386 	static u_int64_t clval = 0;
1387 
1388 	NFSCL_DEBUG(3, "in mnt\n");
1389 	clp = NULL;
1390 	if (mp->mnt_flag & MNT_UPDATE) {
1391 		nmp = VFSTONFS(mp);
1392 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1393 		FREE(nam, M_SONAME);
1394 		return (0);
1395 	} else {
1396 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1397 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1398 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1399 		TAILQ_INIT(&nmp->nm_bufq);
1400 		TAILQ_INIT(&nmp->nm_sess);
1401 		if (clval == 0)
1402 			clval = (u_int64_t)nfsboottime.tv_sec;
1403 		nmp->nm_clval = clval++;
1404 		nmp->nm_krbnamelen = krbnamelen;
1405 		nmp->nm_dirpathlen = dirlen;
1406 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1407 		if (td->td_ucred->cr_uid != (uid_t)0) {
1408 			/*
1409 			 * nm_uid is used to get KerberosV credentials for
1410 			 * the nfsv4 state handling operations if there is
1411 			 * no host based principal set. Use the uid of
1412 			 * this user if not root, since they are doing the
1413 			 * mount. I don't think setting this for root will
1414 			 * work, since root normally does not have user
1415 			 * credentials in a credentials cache.
1416 			 */
1417 			nmp->nm_uid = td->td_ucred->cr_uid;
1418 		} else {
1419 			/*
1420 			 * Just set to -1, so it won't be used.
1421 			 */
1422 			nmp->nm_uid = (uid_t)-1;
1423 		}
1424 
1425 		/* Copy and null terminate all the names */
1426 		if (nmp->nm_krbnamelen > 0) {
1427 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1428 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1429 		}
1430 		if (nmp->nm_dirpathlen > 0) {
1431 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1432 			    nmp->nm_dirpathlen);
1433 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1434 			    + 1] = '\0';
1435 		}
1436 		if (nmp->nm_srvkrbnamelen > 0) {
1437 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1438 			    nmp->nm_srvkrbnamelen);
1439 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1440 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1441 		}
1442 		nmp->nm_sockreq.nr_cred = crhold(cred);
1443 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1444 		mp->mnt_data = nmp;
1445 		nmp->nm_getinfo = nfs_getnlminfo;
1446 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1447 	}
1448 	vfs_getnewfsid(mp);
1449 	nmp->nm_mountp = mp;
1450 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1451 
1452 	/*
1453 	 * Since nfs_decode_args() might optionally set them, these
1454 	 * need to be set to defaults before the call, so that the
1455 	 * optional settings aren't overwritten.
1456 	 */
1457 	nmp->nm_nametimeo = nametimeo;
1458 	nmp->nm_negnametimeo = negnametimeo;
1459 	nmp->nm_timeo = NFS_TIMEO;
1460 	nmp->nm_retry = NFS_RETRANS;
1461 	nmp->nm_readahead = NFS_DEFRAHEAD;
1462 
1463 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1464 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1465 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1466 		nmp->nm_wcommitsize *= 2;
1467 	nmp->nm_wcommitsize *= 256;
1468 
1469 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1470 		nmp->nm_minorvers = minvers;
1471 	else
1472 		nmp->nm_minorvers = 0;
1473 
1474 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1475 
1476 	/*
1477 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1478 	 * high, depending on whether we end up with negative offsets in
1479 	 * the client or server somewhere.  2GB-1 may be safer.
1480 	 *
1481 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1482 	 * that we can handle until we find out otherwise.
1483 	 */
1484 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1485 		nmp->nm_maxfilesize = 0xffffffffLL;
1486 	else
1487 		nmp->nm_maxfilesize = OFF_MAX;
1488 
1489 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1490 		nmp->nm_wsize = NFS_WSIZE;
1491 		nmp->nm_rsize = NFS_RSIZE;
1492 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1493 	}
1494 	nmp->nm_numgrps = NFS_MAXGRPS;
1495 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1496 	if (nmp->nm_tprintf_delay < 0)
1497 		nmp->nm_tprintf_delay = 0;
1498 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1499 	if (nmp->nm_tprintf_initial_delay < 0)
1500 		nmp->nm_tprintf_initial_delay = 0;
1501 	nmp->nm_fhsize = argp->fhsize;
1502 	if (nmp->nm_fhsize > 0)
1503 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1504 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1505 	nmp->nm_nam = nam;
1506 	/* Set up the sockets and per-host congestion */
1507 	nmp->nm_sotype = argp->sotype;
1508 	nmp->nm_soproto = argp->proto;
1509 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1510 	if ((argp->flags & NFSMNT_NFSV4))
1511 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1512 	else if ((argp->flags & NFSMNT_NFSV3))
1513 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1514 	else
1515 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1516 
1517 
1518 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1519 		goto bad;
1520 	/* For NFSv4.1, get the clientid now. */
1521 	if (nmp->nm_minorvers > 0) {
1522 		NFSCL_DEBUG(3, "at getcl\n");
1523 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1524 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1525 		if (error != 0)
1526 			goto bad;
1527 	}
1528 
1529 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1530 	    nmp->nm_dirpathlen > 0) {
1531 		NFSCL_DEBUG(3, "in dirp\n");
1532 		/*
1533 		 * If the fhsize on the mount point == 0 for V4, the mount
1534 		 * path needs to be looked up.
1535 		 */
1536 		trycnt = 3;
1537 		do {
1538 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1539 			    cred, td);
1540 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1541 			if (error)
1542 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1543 		} while (error && --trycnt > 0);
1544 		if (error) {
1545 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1546 			goto bad;
1547 		}
1548 	}
1549 
1550 	/*
1551 	 * A reference count is needed on the nfsnode representing the
1552 	 * remote root.  If this object is not persistent, then backward
1553 	 * traversals of the mount point (i.e. "..") will not work if
1554 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1555 	 * this problem, because one can identify root inodes by their
1556 	 * number == UFS_ROOTINO (2).
1557 	 */
1558 	if (nmp->nm_fhsize > 0) {
1559 		/*
1560 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1561 		 * non-zero for the root vnode. f_iosize will be set correctly
1562 		 * by nfs_statfs() before any I/O occurs.
1563 		 */
1564 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1565 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1566 		    LK_EXCLUSIVE);
1567 		if (error)
1568 			goto bad;
1569 		*vpp = NFSTOV(np);
1570 
1571 		/*
1572 		 * Get file attributes and transfer parameters for the
1573 		 * mountpoint.  This has the side effect of filling in
1574 		 * (*vpp)->v_type with the correct value.
1575 		 */
1576 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1577 		    cred, td, &nfsva, NULL, &lease);
1578 		if (ret) {
1579 			/*
1580 			 * Just set default values to get things going.
1581 			 */
1582 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1583 			nfsva.na_vattr.va_type = VDIR;
1584 			nfsva.na_vattr.va_mode = 0777;
1585 			nfsva.na_vattr.va_nlink = 100;
1586 			nfsva.na_vattr.va_uid = (uid_t)0;
1587 			nfsva.na_vattr.va_gid = (gid_t)0;
1588 			nfsva.na_vattr.va_fileid = 2;
1589 			nfsva.na_vattr.va_gen = 1;
1590 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1591 			nfsva.na_vattr.va_size = 512 * 1024;
1592 			lease = 60;
1593 		}
1594 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1595 		if (nmp->nm_minorvers > 0) {
1596 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1597 			NFSLOCKCLSTATE();
1598 			clp->nfsc_renew = NFSCL_RENEW(lease);
1599 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1600 			clp->nfsc_clientidrev++;
1601 			if (clp->nfsc_clientidrev == 0)
1602 				clp->nfsc_clientidrev++;
1603 			NFSUNLOCKCLSTATE();
1604 			/*
1605 			 * Mount will succeed, so the renew thread can be
1606 			 * started now.
1607 			 */
1608 			nfscl_start_renewthread(clp);
1609 			nfscl_clientrelease(clp);
1610 		}
1611 		if (argp->flags & NFSMNT_NFSV3)
1612 			ncl_fsinfo(nmp, *vpp, cred, td);
1613 
1614 		/* Mark if the mount point supports NFSv4 ACLs. */
1615 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1616 		    ret == 0 &&
1617 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1618 			MNT_ILOCK(mp);
1619 			mp->mnt_flag |= MNT_NFS4ACLS;
1620 			MNT_IUNLOCK(mp);
1621 		}
1622 
1623 		/*
1624 		 * Lose the lock but keep the ref.
1625 		 */
1626 		NFSVOPUNLOCK(*vpp, 0);
1627 		return (0);
1628 	}
1629 	error = EIO;
1630 
1631 bad:
1632 	if (clp != NULL)
1633 		nfscl_clientrelease(clp);
1634 	newnfs_disconnect(&nmp->nm_sockreq);
1635 	crfree(nmp->nm_sockreq.nr_cred);
1636 	if (nmp->nm_sockreq.nr_auth != NULL)
1637 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1638 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1639 	mtx_destroy(&nmp->nm_mtx);
1640 	if (nmp->nm_clp != NULL) {
1641 		NFSLOCKCLSTATE();
1642 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1643 		NFSUNLOCKCLSTATE();
1644 		free(nmp->nm_clp, M_NFSCLCLIENT);
1645 	}
1646 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1647 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1648 		    dsp->nfsclds_sockp != NULL)
1649 			newnfs_disconnect(dsp->nfsclds_sockp);
1650 		nfscl_freenfsclds(dsp);
1651 	}
1652 	FREE(nmp, M_NEWNFSMNT);
1653 	FREE(nam, M_SONAME);
1654 	return (error);
1655 }
1656 
1657 /*
1658  * unmount system call
1659  */
1660 static int
1661 nfs_unmount(struct mount *mp, int mntflags)
1662 {
1663 	struct thread *td;
1664 	struct nfsmount *nmp;
1665 	int error, flags = 0, i, trycnt = 0;
1666 	struct nfsclds *dsp, *tdsp;
1667 
1668 	td = curthread;
1669 
1670 	if (mntflags & MNT_FORCE)
1671 		flags |= FORCECLOSE;
1672 	nmp = VFSTONFS(mp);
1673 	/*
1674 	 * Goes something like this..
1675 	 * - Call vflush() to clear out vnodes for this filesystem
1676 	 * - Close the socket
1677 	 * - Free up the data structures
1678 	 */
1679 	/* In the forced case, cancel any outstanding requests. */
1680 	if (mntflags & MNT_FORCE) {
1681 		error = newnfs_nmcancelreqs(nmp);
1682 		if (error)
1683 			goto out;
1684 		/* For a forced close, get rid of the renew thread now */
1685 		nfscl_umount(nmp, td);
1686 	}
1687 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1688 	do {
1689 		error = vflush(mp, 1, flags, td);
1690 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1691 			(void) nfs_catnap(PSOCK, error, "newndm");
1692 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1693 	if (error)
1694 		goto out;
1695 
1696 	/*
1697 	 * We are now committed to the unmount.
1698 	 */
1699 	if ((mntflags & MNT_FORCE) == 0)
1700 		nfscl_umount(nmp, td);
1701 	else {
1702 		mtx_lock(&nmp->nm_mtx);
1703 		nmp->nm_privflag |= NFSMNTP_FORCEDISM;
1704 		mtx_unlock(&nmp->nm_mtx);
1705 	}
1706 	/* Make sure no nfsiods are assigned to this mount. */
1707 	mtx_lock(&ncl_iod_mutex);
1708 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1709 		if (ncl_iodmount[i] == nmp) {
1710 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1711 			ncl_iodmount[i] = NULL;
1712 		}
1713 	mtx_unlock(&ncl_iod_mutex);
1714 
1715 	/*
1716 	 * We can now set mnt_data to NULL and wait for
1717 	 * nfssvc(NFSSVC_FORCEDISM) to complete.
1718 	 */
1719 	mtx_lock(&mountlist_mtx);
1720 	mtx_lock(&nmp->nm_mtx);
1721 	mp->mnt_data = NULL;
1722 	mtx_unlock(&mountlist_mtx);
1723 	while ((nmp->nm_privflag & NFSMNTP_CANCELRPCS) != 0)
1724 		msleep(nmp, &nmp->nm_mtx, PVFS, "nfsfdism", 0);
1725 	mtx_unlock(&nmp->nm_mtx);
1726 
1727 	newnfs_disconnect(&nmp->nm_sockreq);
1728 	crfree(nmp->nm_sockreq.nr_cred);
1729 	FREE(nmp->nm_nam, M_SONAME);
1730 	if (nmp->nm_sockreq.nr_auth != NULL)
1731 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1732 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1733 	mtx_destroy(&nmp->nm_mtx);
1734 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp) {
1735 		if (dsp != TAILQ_FIRST(&nmp->nm_sess) &&
1736 		    dsp->nfsclds_sockp != NULL)
1737 			newnfs_disconnect(dsp->nfsclds_sockp);
1738 		nfscl_freenfsclds(dsp);
1739 	}
1740 	FREE(nmp, M_NEWNFSMNT);
1741 out:
1742 	return (error);
1743 }
1744 
1745 /*
1746  * Return root of a filesystem
1747  */
1748 static int
1749 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1750 {
1751 	struct vnode *vp;
1752 	struct nfsmount *nmp;
1753 	struct nfsnode *np;
1754 	int error;
1755 
1756 	nmp = VFSTONFS(mp);
1757 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1758 	if (error)
1759 		return error;
1760 	vp = NFSTOV(np);
1761 	/*
1762 	 * Get transfer parameters and attributes for root vnode once.
1763 	 */
1764 	mtx_lock(&nmp->nm_mtx);
1765 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1766 		mtx_unlock(&nmp->nm_mtx);
1767 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1768 	} else
1769 		mtx_unlock(&nmp->nm_mtx);
1770 	if (vp->v_type == VNON)
1771 	    vp->v_type = VDIR;
1772 	vp->v_vflag |= VV_ROOT;
1773 	*vpp = vp;
1774 	return (0);
1775 }
1776 
1777 /*
1778  * Flush out the buffer cache
1779  */
1780 /* ARGSUSED */
1781 static int
1782 nfs_sync(struct mount *mp, int waitfor)
1783 {
1784 	struct vnode *vp, *mvp;
1785 	struct thread *td;
1786 	int error, allerror = 0;
1787 
1788 	td = curthread;
1789 
1790 	MNT_ILOCK(mp);
1791 	/*
1792 	 * If a forced dismount is in progress, return from here so that
1793 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1794 	 * calling VFS_UNMOUNT().
1795 	 */
1796 	if (NFSCL_FORCEDISM(mp)) {
1797 		MNT_IUNLOCK(mp);
1798 		return (EBADF);
1799 	}
1800 	MNT_IUNLOCK(mp);
1801 
1802 	/*
1803 	 * Force stale buffer cache information to be flushed.
1804 	 */
1805 loop:
1806 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1807 		/* XXX Racy bv_cnt check. */
1808 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1809 		    waitfor == MNT_LAZY) {
1810 			VI_UNLOCK(vp);
1811 			continue;
1812 		}
1813 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1814 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1815 			goto loop;
1816 		}
1817 		error = VOP_FSYNC(vp, waitfor, td);
1818 		if (error)
1819 			allerror = error;
1820 		NFSVOPUNLOCK(vp, 0);
1821 		vrele(vp);
1822 	}
1823 	return (allerror);
1824 }
1825 
1826 static int
1827 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1828 {
1829 	struct nfsmount *nmp = VFSTONFS(mp);
1830 	struct vfsquery vq;
1831 	int error;
1832 
1833 	bzero(&vq, sizeof(vq));
1834 	switch (op) {
1835 #if 0
1836 	case VFS_CTL_NOLOCKS:
1837 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1838  		if (req->oldptr != NULL) {
1839  			error = SYSCTL_OUT(req, &val, sizeof(val));
1840  			if (error)
1841  				return (error);
1842  		}
1843  		if (req->newptr != NULL) {
1844  			error = SYSCTL_IN(req, &val, sizeof(val));
1845  			if (error)
1846  				return (error);
1847 			if (val)
1848 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1849 			else
1850 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1851  		}
1852 		break;
1853 #endif
1854 	case VFS_CTL_QUERY:
1855 		mtx_lock(&nmp->nm_mtx);
1856 		if (nmp->nm_state & NFSSTA_TIMEO)
1857 			vq.vq_flags |= VQ_NOTRESP;
1858 		mtx_unlock(&nmp->nm_mtx);
1859 #if 0
1860 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1861 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1862 			vq.vq_flags |= VQ_NOTRESPLOCK;
1863 #endif
1864 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1865 		break;
1866  	case VFS_CTL_TIMEO:
1867  		if (req->oldptr != NULL) {
1868  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1869  			    sizeof(nmp->nm_tprintf_initial_delay));
1870  			if (error)
1871  				return (error);
1872  		}
1873  		if (req->newptr != NULL) {
1874 			error = vfs_suser(mp, req->td);
1875 			if (error)
1876 				return (error);
1877  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1878  			    sizeof(nmp->nm_tprintf_initial_delay));
1879  			if (error)
1880  				return (error);
1881  			if (nmp->nm_tprintf_initial_delay < 0)
1882  				nmp->nm_tprintf_initial_delay = 0;
1883  		}
1884 		break;
1885 	default:
1886 		return (ENOTSUP);
1887 	}
1888 	return (0);
1889 }
1890 
1891 /*
1892  * Purge any RPCs in progress, so that they will all return errors.
1893  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1894  * forced dismount.
1895  */
1896 static void
1897 nfs_purge(struct mount *mp)
1898 {
1899 	struct nfsmount *nmp = VFSTONFS(mp);
1900 
1901 	newnfs_nmcancelreqs(nmp);
1902 }
1903 
1904 /*
1905  * Extract the information needed by the nlm from the nfs vnode.
1906  */
1907 static void
1908 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1909     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1910     struct timeval *timeop)
1911 {
1912 	struct nfsmount *nmp;
1913 	struct nfsnode *np = VTONFS(vp);
1914 
1915 	nmp = VFSTONFS(vp->v_mount);
1916 	if (fhlenp != NULL)
1917 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1918 	if (fhp != NULL)
1919 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1920 	if (sp != NULL)
1921 		bcopy(nmp->nm_nam, sp, min(nmp->nm_nam->sa_len, sizeof(*sp)));
1922 	if (is_v3p != NULL)
1923 		*is_v3p = NFS_ISV3(vp);
1924 	if (sizep != NULL)
1925 		*sizep = np->n_size;
1926 	if (timeop != NULL) {
1927 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1928 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1929 	}
1930 }
1931 
1932 /*
1933  * This function prints out an option name, based on the conditional
1934  * argument.
1935  */
1936 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1937     char *opt, char **buf, size_t *blen)
1938 {
1939 	int len;
1940 
1941 	if (testval != 0 && *blen > strlen(opt)) {
1942 		len = snprintf(*buf, *blen, "%s", opt);
1943 		if (len != strlen(opt))
1944 			printf("EEK!!\n");
1945 		*buf += len;
1946 		*blen -= len;
1947 	}
1948 }
1949 
1950 /*
1951  * This function printf out an options integer value.
1952  */
1953 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1954     char *opt, char **buf, size_t *blen)
1955 {
1956 	int len;
1957 
1958 	if (*blen > strlen(opt) + 1) {
1959 		/* Could result in truncated output string. */
1960 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1961 		if (len < *blen) {
1962 			*buf += len;
1963 			*blen -= len;
1964 		}
1965 	}
1966 }
1967 
1968 /*
1969  * Load the option flags and values into the buffer.
1970  */
1971 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1972 {
1973 	char *buf;
1974 	size_t blen;
1975 
1976 	buf = buffer;
1977 	blen = buflen;
1978 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1979 	    &blen);
1980 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1981 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1982 		    &blen);
1983 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1984 		    &buf, &blen);
1985 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&
1986 		    nmp->nm_minorvers > 0, ",oneopenown", &buf, &blen);
1987 	}
1988 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1989 	    &blen);
1990 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1991 	    "nfsv2", &buf, &blen);
1992 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1993 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1994 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1995 	    &buf, &blen);
1996 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1997 	    &buf, &blen);
1998 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1999 	    &blen);
2000 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
2001 	    &blen);
2002 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
2003 	    &blen);
2004 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
2005 	    &blen);
2006 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
2007 	    &blen);
2008 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
2009 	    ",noncontigwr", &buf, &blen);
2010 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2011 	    0, ",lockd", &buf, &blen);
2012 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
2013 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
2014 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
2015 	    &buf, &blen);
2016 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
2017 	    &buf, &blen);
2018 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2019 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
2020 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2021 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
2022 	    &buf, &blen);
2023 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
2024 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
2025 	    &buf, &blen);
2026 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
2027 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
2028 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
2029 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
2030 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
2031 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
2032 	    &blen);
2033 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
2034 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
2035 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
2036 	    &blen);
2037 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
2038 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
2039 	    &blen);
2040 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2041 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2042 }
2043 
2044