xref: /netbsd/sys/fs/nfs/client/nfs_clvfsops.c (revision a8a5c538)
1 /*	$NetBSD: nfs_clvfsops.c,v 1.3 2018/09/03 16:29:34 riastradh Exp $	*/
2 /*-
3  * Copyright (c) 1989, 1993, 1995
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Rick Macklem at The University of Guelph.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	from nfs_vfsops.c	8.12 (Berkeley) 5/20/95
34  */
35 
36 #include <sys/cdefs.h>
37 /* __FBSDID("FreeBSD: head/sys/fs/nfsclient/nfs_clvfsops.c 304026 2016-08-12 22:44:59Z rmacklem "); */
38 __RCSID("$NetBSD: nfs_clvfsops.c,v 1.3 2018/09/03 16:29:34 riastradh Exp $");
39 
40 
41 #ifdef _KERNEL_OPT
42 #include "opt_newnfs.h"
43 #endif
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/bio.h>
49 #include <sys/buf.h>
50 #include <sys/clock.h>
51 #include <sys/jail.h>
52 #include <sys/limits.h>
53 #include <sys/lock.h>
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/module.h>
57 #include <sys/mount.h>
58 #include <sys/proc.h>
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/sockio.h>
62 #include <sys/sysctl.h>
63 #include <sys/vnode.h>
64 #include <sys/signalvar.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_extern.h>
68 #include <vm/uma.h>
69 
70 #include <net/if.h>
71 #include <net/route.h>
72 #include <netinet/in.h>
73 
74 #include <fs/nfs/common/nfsport.h>
75 #include <fs/nfs/client/nfsnode.h>
76 #include <fs/nfs/client/nfsmount.h>
77 #include <fs/nfs/client/nfs.h>
78 #include <fs/nfs/common/nfsdiskless.h>
79 
80 FEATURE(nfscl, "NFSv4 client");
81 
82 extern int nfscl_ticks;
83 extern struct timeval nfsboottime;
84 extern int nfsrv_useacl;
85 extern int nfscl_debuglevel;
86 extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON];
87 extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON];
88 extern struct mtx ncl_iod_mutex;
89 NFSCLSTATEMUTEX;
90 
91 MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header");
92 MALLOC_DEFINE(M_NEWNFSMNT, "newnfsmnt", "NFS mount struct");
93 
94 SYSCTL_DECL(_vfs_nfs);
95 static int nfs_ip_paranoia = 1;
96 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW,
97     &nfs_ip_paranoia, 0, "");
98 static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY;
99 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY,
100         downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, "");
101 /* how long between console messages "nfs server foo not responding" */
102 static int nfs_tprintf_delay = NFS_TPRINTF_DELAY;
103 SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY,
104         downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, "");
105 #ifdef NFS_DEBUG
106 int nfs_debug;
107 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0,
108     "Toggle debug flag");
109 #endif
110 
111 static int	nfs_mountroot(struct mount *);
112 static void	nfs_sec_name(char *, int *);
113 static void	nfs_decode_args(struct mount *mp, struct nfsmount *nmp,
114 		    struct nfs_args *argp, const char *, struct ucred *,
115 		    struct thread *);
116 static int	mountnfs(struct nfs_args *, struct mount *,
117 		    struct sockaddr *, char *, u_char *, int, u_char *, int,
118 		    u_char *, int, struct vnode **, struct ucred *,
119 		    struct thread *, int, int, int);
120 static void	nfs_getnlminfo(struct vnode *, uint8_t *, size_t *,
121 		    struct sockaddr_storage *, int *, off_t *,
122 		    struct timeval *);
123 static vfs_mount_t nfs_mount;
124 static vfs_cmount_t nfs_cmount;
125 static vfs_unmount_t nfs_unmount;
126 static vfs_root_t nfs_root;
127 static vfs_statfs_t nfs_statfs;
128 static vfs_sync_t nfs_sync;
129 static vfs_sysctl_t nfs_sysctl;
130 static vfs_purge_t nfs_purge;
131 
132 /*
133  * nfs vfs operations.
134  */
135 static struct vfsops nfs_vfsops = {
136 	.vfs_init =		ncl_init,
137 	.vfs_mount =		nfs_mount,
138 	.vfs_cmount =		nfs_cmount,
139 	.vfs_root =		nfs_root,
140 	.vfs_statfs =		nfs_statfs,
141 	.vfs_sync =		nfs_sync,
142 	.vfs_uninit =		ncl_uninit,
143 	.vfs_unmount =		nfs_unmount,
144 	.vfs_sysctl =		nfs_sysctl,
145 	.vfs_purge =		nfs_purge,
146 };
147 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);
148 
149 /* So that loader and kldload(2) can find us, wherever we are.. */
150 MODULE_VERSION(nfs, 1);
151 MODULE_DEPEND(nfs, nfscommon, 1, 1, 1);
152 MODULE_DEPEND(nfs, krpc, 1, 1, 1);
153 MODULE_DEPEND(nfs, nfssvc, 1, 1, 1);
154 MODULE_DEPEND(nfs, nfslock, 1, 1, 1);
155 
156 /*
157  * This structure is now defined in sys/nfs/nfs_diskless.c so that it
158  * can be shared by both NFS clients. It is declared here so that it
159  * will be defined for kernels built without NFS_ROOT, although it
160  * isn't used in that case.
161  */
162 #if !defined(NEW_NFS_BOOT)
163 struct nfs_diskless	nfs_diskless = { { { 0 } } };
164 struct nfsv3_diskless	nfsv3_diskless = { { { 0 } } };
165 int			nfs_diskless_valid = 0;
166 #endif
167 
168 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD,
169     &nfs_diskless_valid, 0,
170     "Has the diskless struct been filled correctly");
171 
172 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD,
173     nfsv3_diskless.root_hostnam, 0, "Path to nfs root");
174 
175 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD,
176     &nfsv3_diskless.root_saddr, sizeof(nfsv3_diskless.root_saddr),
177     "%Ssockaddr_in", "Diskless root nfs address");
178 
179 
180 void		newnfsargs_ntoh(struct nfs_args *);
181 static int	nfs_mountdiskless(char *,
182 		    struct sockaddr_in *, struct nfs_args *,
183 		    struct thread *, struct vnode **, struct mount *);
184 static void	nfs_convert_diskless(void);
185 static void	nfs_convert_oargs(struct nfs_args *args,
186 		    struct onfs_args *oargs);
187 
188 int
newnfs_iosize(struct nfsmount * nmp)189 newnfs_iosize(struct nfsmount *nmp)
190 {
191 	int iosize, maxio;
192 
193 	/* First, set the upper limit for iosize */
194 	if (nmp->nm_flag & NFSMNT_NFSV4) {
195 		maxio = NFS_MAXBSIZE;
196 	} else if (nmp->nm_flag & NFSMNT_NFSV3) {
197 		if (nmp->nm_sotype == SOCK_DGRAM)
198 			maxio = NFS_MAXDGRAMDATA;
199 		else
200 			maxio = NFS_MAXBSIZE;
201 	} else {
202 		maxio = NFS_V2MAXDATA;
203 	}
204 	if (nmp->nm_rsize > maxio || nmp->nm_rsize == 0)
205 		nmp->nm_rsize = maxio;
206 	if (nmp->nm_rsize > NFS_MAXBSIZE)
207 		nmp->nm_rsize = NFS_MAXBSIZE;
208 	if (nmp->nm_readdirsize > maxio || nmp->nm_readdirsize == 0)
209 		nmp->nm_readdirsize = maxio;
210 	if (nmp->nm_readdirsize > nmp->nm_rsize)
211 		nmp->nm_readdirsize = nmp->nm_rsize;
212 	if (nmp->nm_wsize > maxio || nmp->nm_wsize == 0)
213 		nmp->nm_wsize = maxio;
214 	if (nmp->nm_wsize > NFS_MAXBSIZE)
215 		nmp->nm_wsize = NFS_MAXBSIZE;
216 
217 	/*
218 	 * Calculate the size used for io buffers.  Use the larger
219 	 * of the two sizes to minimise nfs requests but make sure
220 	 * that it is at least one VM page to avoid wasting buffer
221 	 * space.  It must also be at least NFS_DIRBLKSIZ, since
222 	 * that is the buffer size used for directories.
223 	 */
224 	iosize = imax(nmp->nm_rsize, nmp->nm_wsize);
225 	iosize = imax(iosize, PAGE_SIZE);
226 	iosize = imax(iosize, NFS_DIRBLKSIZ);
227 	nmp->nm_mountp->mnt_stat.f_iosize = iosize;
228 	return (iosize);
229 }
230 
231 static void
nfs_convert_oargs(struct nfs_args * args,struct onfs_args * oargs)232 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs)
233 {
234 
235 	args->version = NFS_ARGSVERSION;
236 	args->addr = oargs->addr;
237 	args->addrlen = oargs->addrlen;
238 	args->sotype = oargs->sotype;
239 	args->proto = oargs->proto;
240 	args->fh = oargs->fh;
241 	args->fhsize = oargs->fhsize;
242 	args->flags = oargs->flags;
243 	args->wsize = oargs->wsize;
244 	args->rsize = oargs->rsize;
245 	args->readdirsize = oargs->readdirsize;
246 	args->timeo = oargs->timeo;
247 	args->retrans = oargs->retrans;
248 	args->readahead = oargs->readahead;
249 	args->hostname = oargs->hostname;
250 }
251 
252 static void
nfs_convert_diskless(void)253 nfs_convert_diskless(void)
254 {
255 
256 	bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif,
257 		sizeof(struct ifaliasreq));
258 	bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway,
259 		sizeof(struct sockaddr_in));
260 	nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args);
261 	if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) {
262 		nfsv3_diskless.root_fhsize = NFSX_MYFH;
263 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_MYFH);
264 	} else {
265 		nfsv3_diskless.root_fhsize = NFSX_V2FH;
266 		bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH);
267 	}
268 	bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr,
269 		sizeof(struct sockaddr_in));
270 	bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN);
271 	nfsv3_diskless.root_time = nfs_diskless.root_time;
272 	bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam,
273 		MAXHOSTNAMELEN);
274 	nfs_diskless_valid = 3;
275 }
276 
277 /*
278  * nfs statfs call
279  */
280 static int
nfs_statfs(struct mount * mp,struct statfs * sbp)281 nfs_statfs(struct mount *mp, struct statfs *sbp)
282 {
283 	struct vnode *vp;
284 	struct thread *td;
285 	struct nfsmount *nmp = VFSTONFS(mp);
286 	struct nfsvattr nfsva;
287 	struct nfsfsinfo fs;
288 	struct nfsstatfs sb;
289 	int error = 0, attrflag, gotfsinfo = 0, ret;
290 	struct nfsnode *np;
291 
292 	td = curthread;
293 
294 	error = vfs_busy(mp, MBF_NOWAIT);
295 	if (error)
296 		return (error);
297 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
298 	if (error) {
299 		vfs_unbusy(mp);
300 		return (error);
301 	}
302 	vp = NFSTOV(np);
303 	mtx_lock(&nmp->nm_mtx);
304 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
305 		mtx_unlock(&nmp->nm_mtx);
306 		error = nfsrpc_fsinfo(vp, &fs, td->td_ucred, td, &nfsva,
307 		    &attrflag, NULL);
308 		if (!error)
309 			gotfsinfo = 1;
310 	} else
311 		mtx_unlock(&nmp->nm_mtx);
312 	if (!error)
313 		error = nfsrpc_statfs(vp, &sb, &fs, td->td_ucred, td, &nfsva,
314 		    &attrflag, NULL);
315 	if (error != 0)
316 		NFSCL_DEBUG(2, "statfs=%d\n", error);
317 	if (attrflag == 0) {
318 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
319 		    td->td_ucred, td, &nfsva, NULL, NULL);
320 		if (ret) {
321 			/*
322 			 * Just set default values to get things going.
323 			 */
324 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
325 			nfsva.na_vattr.va_type = VDIR;
326 			nfsva.na_vattr.va_mode = 0777;
327 			nfsva.na_vattr.va_nlink = 100;
328 			nfsva.na_vattr.va_uid = (uid_t)0;
329 			nfsva.na_vattr.va_gid = (gid_t)0;
330 			nfsva.na_vattr.va_fileid = 2;
331 			nfsva.na_vattr.va_gen = 1;
332 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
333 			nfsva.na_vattr.va_size = 512 * 1024;
334 		}
335 	}
336 	(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
337 	if (!error) {
338 	    mtx_lock(&nmp->nm_mtx);
339 	    if (gotfsinfo || (nmp->nm_flag & NFSMNT_NFSV4))
340 		nfscl_loadfsinfo(nmp, &fs);
341 	    nfscl_loadsbinfo(nmp, &sb, sbp);
342 	    sbp->f_iosize = newnfs_iosize(nmp);
343 	    mtx_unlock(&nmp->nm_mtx);
344 	    if (sbp != &mp->mnt_stat) {
345 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
346 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
347 	    }
348 	    strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
349 	} else if (NFS_ISV4(vp)) {
350 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
351 	}
352 	vput(vp);
353 	vfs_unbusy(mp);
354 	return (error);
355 }
356 
357 /*
358  * nfs version 3 fsinfo rpc call
359  */
360 int
ncl_fsinfo(struct nfsmount * nmp,struct vnode * vp,struct ucred * cred,struct thread * td)361 ncl_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred,
362     struct thread *td)
363 {
364 	struct nfsfsinfo fs;
365 	struct nfsvattr nfsva;
366 	int error, attrflag;
367 
368 	error = nfsrpc_fsinfo(vp, &fs, cred, td, &nfsva, &attrflag, NULL);
369 	if (!error) {
370 		if (attrflag)
371 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
372 			    1);
373 		mtx_lock(&nmp->nm_mtx);
374 		nfscl_loadfsinfo(nmp, &fs);
375 		mtx_unlock(&nmp->nm_mtx);
376 	}
377 	return (error);
378 }
379 
380 /*
381  * Mount a remote root fs via. nfs. This depends on the info in the
382  * nfs_diskless structure that has been filled in properly by some primary
383  * bootstrap.
384  * It goes something like this:
385  * - do enough of "ifconfig" by calling ifioctl() so that the system
386  *   can talk to the server
387  * - If nfs_diskless.mygateway is filled in, use that address as
388  *   a default gateway.
389  * - build the rootfs mount point and call mountnfs() to do the rest.
390  *
391  * It is assumed to be safe to read, modify, and write the nfsv3_diskless
392  * structure, as well as other global NFS client variables here, as
393  * nfs_mountroot() will be called once in the boot before any other NFS
394  * client activity occurs.
395  */
396 static int
nfs_mountroot(struct mount * mp)397 nfs_mountroot(struct mount *mp)
398 {
399 	struct thread *td = curthread;
400 	struct nfsv3_diskless *nd = &nfsv3_diskless;
401 	struct socket *so;
402 	struct vnode *vp;
403 	struct ifreq ir;
404 	int error;
405 	u_long l;
406 	char buf[128];
407 	char *cp;
408 
409 #if defined(NEW_NFS_BOOT) && defined(NEW_NFS_BOOT_BOOTP)
410 	bootpc_init();		/* use bootp to get nfs_diskless filled in */
411 #elif defined(NEW_NFS_BOOT)
412 	nfs_setup_diskless();
413 #endif
414 
415 	if (nfs_diskless_valid == 0)
416 		return (-1);
417 	if (nfs_diskless_valid == 1)
418 		nfs_convert_diskless();
419 
420 	/*
421 	 * XXX splnet, so networks will receive...
422 	 */
423 	splnet();
424 
425 	/*
426 	 * Do enough of ifconfig(8) so that the critical net interface can
427 	 * talk to the server.
428 	 */
429 	error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0,
430 	    td->td_ucred, td);
431 	if (error)
432 		panic("nfs_mountroot: socreate(%04x): %d",
433 			nd->myif.ifra_addr.sa_family, error);
434 
435 #if 0 /* XXX Bad idea */
436 	/*
437 	 * We might not have been told the right interface, so we pass
438 	 * over the first ten interfaces of the same kind, until we get
439 	 * one of them configured.
440 	 */
441 
442 	for (i = strlen(nd->myif.ifra_name) - 1;
443 		nd->myif.ifra_name[i] >= '0' &&
444 		nd->myif.ifra_name[i] <= '9';
445 		nd->myif.ifra_name[i] ++) {
446 		error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
447 		if(!error)
448 			break;
449 	}
450 #endif
451 	error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td);
452 	if (error)
453 		panic("nfs_mountroot: SIOCAIFADDR: %d", error);
454 	if ((cp = kern_getenv("boot.netif.mtu")) != NULL) {
455 		ir.ifr_mtu = strtol(cp, NULL, 10);
456 		bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ);
457 		freeenv(cp);
458 		error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td);
459 		if (error)
460 			printf("nfs_mountroot: SIOCSIFMTU: %d", error);
461 	}
462 	soclose(so);
463 
464 	/*
465 	 * If the gateway field is filled in, set it as the default route.
466 	 * Note that pxeboot will set a default route of 0 if the route
467 	 * is not set by the DHCP server.  Check also for a value of 0
468 	 * to avoid panicking inappropriately in that situation.
469 	 */
470 	if (nd->mygateway.sin_len != 0 &&
471 	    nd->mygateway.sin_addr.s_addr != 0) {
472 		struct sockaddr_in mask, sin;
473 
474 		bzero((caddr_t)&mask, sizeof(mask));
475 		sin = mask;
476 		sin.sin_family = AF_INET;
477 		sin.sin_len = sizeof(sin);
478                 /* XXX MRT use table 0 for this sort of thing */
479 		CURVNET_SET(TD_TO_VNET(td));
480 		error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
481 		    (struct sockaddr *)&nd->mygateway,
482 		    (struct sockaddr *)&mask,
483 		    RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
484 		CURVNET_RESTORE();
485 		if (error)
486 			panic("nfs_mountroot: RTM_ADD: %d", error);
487 	}
488 
489 	/*
490 	 * Create the rootfs mount point.
491 	 */
492 	nd->root_args.fh = nd->root_fh;
493 	nd->root_args.fhsize = nd->root_fhsize;
494 	l = ntohl(nd->root_saddr.sin_addr.s_addr);
495 	snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s",
496 		(l >> 24) & 0xff, (l >> 16) & 0xff,
497 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
498 	printf("NFS ROOT: %s\n", buf);
499 	nd->root_args.hostname = buf;
500 	if ((error = nfs_mountdiskless(buf,
501 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
502 		return (error);
503 	}
504 
505 	/*
506 	 * This is not really an nfs issue, but it is much easier to
507 	 * set hostname here and then let the "/etc/rc.xxx" files
508 	 * mount the right /var based upon its preset value.
509 	 */
510 	mtx_lock(&prison0.pr_mtx);
511 	strlcpy(prison0.pr_hostname, nd->my_hostnam,
512 	    sizeof(prison0.pr_hostname));
513 	mtx_unlock(&prison0.pr_mtx);
514 	inittodr(ntohl(nd->root_time));
515 	return (0);
516 }
517 
518 /*
519  * Internal version of mount system call for diskless setup.
520  */
521 static int
nfs_mountdiskless(char * path,struct sockaddr_in * sin,struct nfs_args * args,struct thread * td,struct vnode ** vpp,struct mount * mp)522 nfs_mountdiskless(char *path,
523     struct sockaddr_in *sin, struct nfs_args *args, struct thread *td,
524     struct vnode **vpp, struct mount *mp)
525 {
526 	struct sockaddr *nam;
527 	int dirlen, error;
528 	char *dirpath;
529 
530 	/*
531 	 * Find the directory path in "path", which also has the server's
532 	 * name/ip address in it.
533 	 */
534 	dirpath = strchr(path, ':');
535 	if (dirpath != NULL)
536 		dirlen = strlen(++dirpath);
537 	else
538 		dirlen = 0;
539 	nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK);
540 	if ((error = mountnfs(args, mp, nam, path, NULL, 0, dirpath, dirlen,
541 	    NULL, 0, vpp, td->td_ucred, td, NFS_DEFAULT_NAMETIMEO,
542 	    NFS_DEFAULT_NEGNAMETIMEO, 0)) != 0) {
543 		printf("nfs_mountroot: mount %s on /: %d\n", path, error);
544 		return (error);
545 	}
546 	return (0);
547 }
548 
549 static void
nfs_sec_name(char * sec,int * flagsp)550 nfs_sec_name(char *sec, int *flagsp)
551 {
552 	if (!strcmp(sec, "krb5"))
553 		*flagsp |= NFSMNT_KERB;
554 	else if (!strcmp(sec, "krb5i"))
555 		*flagsp |= (NFSMNT_KERB | NFSMNT_INTEGRITY);
556 	else if (!strcmp(sec, "krb5p"))
557 		*flagsp |= (NFSMNT_KERB | NFSMNT_PRIVACY);
558 }
559 
560 static void
nfs_decode_args(struct mount * mp,struct nfsmount * nmp,struct nfs_args * argp,const char * hostname,struct ucred * cred,struct thread * td)561 nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp,
562     const char *hostname, struct ucred *cred, struct thread *td)
563 {
564 	int s;
565 	int adjsock;
566 	char *p;
567 
568 	s = splnet();
569 
570 	/*
571 	 * Set read-only flag if requested; otherwise, clear it if this is
572 	 * an update.  If this is not an update, then either the read-only
573 	 * flag is already clear, or this is a root mount and it was set
574 	 * intentionally at some previous point.
575 	 */
576 	if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) {
577 		MNT_ILOCK(mp);
578 		mp->mnt_flag |= MNT_RDONLY;
579 		MNT_IUNLOCK(mp);
580 	} else if (mp->mnt_flag & MNT_UPDATE) {
581 		MNT_ILOCK(mp);
582 		mp->mnt_flag &= ~MNT_RDONLY;
583 		MNT_IUNLOCK(mp);
584 	}
585 
586 	/*
587 	 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes
588 	 * no sense in that context.  Also, set up appropriate retransmit
589 	 * and soft timeout behavior.
590 	 */
591 	if (argp->sotype == SOCK_STREAM) {
592 		nmp->nm_flag &= ~NFSMNT_NOCONN;
593 		nmp->nm_timeo = NFS_MAXTIMEO;
594 		if ((argp->flags & NFSMNT_NFSV4) != 0)
595 			nmp->nm_retry = INT_MAX;
596 		else
597 			nmp->nm_retry = NFS_RETRANS_TCP;
598 	}
599 
600 	/* Also clear RDIRPLUS if NFSv2, it crashes some servers */
601 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
602 		argp->flags &= ~NFSMNT_RDIRPLUS;
603 		nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
604 	}
605 
606 	/* Re-bind if rsrvd port requested and wasn't on one */
607 	adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT)
608 		  && (argp->flags & NFSMNT_RESVPORT);
609 	/* Also re-bind if we're switching to/from a connected UDP socket */
610 	adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) !=
611 		    (argp->flags & NFSMNT_NOCONN));
612 
613 	/* Update flags atomically.  Don't change the lock bits. */
614 	nmp->nm_flag = argp->flags | nmp->nm_flag;
615 	splx(s);
616 
617 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
618 		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
619 		if (nmp->nm_timeo < NFS_MINTIMEO)
620 			nmp->nm_timeo = NFS_MINTIMEO;
621 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
622 			nmp->nm_timeo = NFS_MAXTIMEO;
623 	}
624 
625 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
626 		nmp->nm_retry = argp->retrans;
627 		if (nmp->nm_retry > NFS_MAXREXMIT)
628 			nmp->nm_retry = NFS_MAXREXMIT;
629 	}
630 
631 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
632 		nmp->nm_wsize = argp->wsize;
633 		/*
634 		 * Clip at the power of 2 below the size. There is an
635 		 * issue (not isolated) that causes intermittent page
636 		 * faults if this is not done.
637 		 */
638 		if (nmp->nm_wsize > NFS_FABLKSIZE)
639 			nmp->nm_wsize = 1 << (fls(nmp->nm_wsize) - 1);
640 		else
641 			nmp->nm_wsize = NFS_FABLKSIZE;
642 	}
643 
644 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
645 		nmp->nm_rsize = argp->rsize;
646 		/*
647 		 * Clip at the power of 2 below the size. There is an
648 		 * issue (not isolated) that causes intermittent page
649 		 * faults if this is not done.
650 		 */
651 		if (nmp->nm_rsize > NFS_FABLKSIZE)
652 			nmp->nm_rsize = 1 << (fls(nmp->nm_rsize) - 1);
653 		else
654 			nmp->nm_rsize = NFS_FABLKSIZE;
655 	}
656 
657 	if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) {
658 		nmp->nm_readdirsize = argp->readdirsize;
659 	}
660 
661 	if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0)
662 		nmp->nm_acregmin = argp->acregmin;
663 	else
664 		nmp->nm_acregmin = NFS_MINATTRTIMO;
665 	if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0)
666 		nmp->nm_acregmax = argp->acregmax;
667 	else
668 		nmp->nm_acregmax = NFS_MAXATTRTIMO;
669 	if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0)
670 		nmp->nm_acdirmin = argp->acdirmin;
671 	else
672 		nmp->nm_acdirmin = NFS_MINDIRATTRTIMO;
673 	if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0)
674 		nmp->nm_acdirmax = argp->acdirmax;
675 	else
676 		nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO;
677 	if (nmp->nm_acdirmin > nmp->nm_acdirmax)
678 		nmp->nm_acdirmin = nmp->nm_acdirmax;
679 	if (nmp->nm_acregmin > nmp->nm_acregmax)
680 		nmp->nm_acregmin = nmp->nm_acregmax;
681 
682 	if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
683 		if (argp->readahead <= NFS_MAXRAHEAD)
684 			nmp->nm_readahead = argp->readahead;
685 		else
686 			nmp->nm_readahead = NFS_MAXRAHEAD;
687 	}
688 	if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
689 		if (argp->wcommitsize < nmp->nm_wsize)
690 			nmp->nm_wcommitsize = nmp->nm_wsize;
691 		else
692 			nmp->nm_wcommitsize = argp->wcommitsize;
693 	}
694 
695 	adjsock |= ((nmp->nm_sotype != argp->sotype) ||
696 		    (nmp->nm_soproto != argp->proto));
697 
698 	if (nmp->nm_client != NULL && adjsock) {
699 		int haslock = 0, error = 0;
700 
701 		if (nmp->nm_sotype == SOCK_STREAM) {
702 			error = newnfs_sndlock(&nmp->nm_sockreq.nr_lock);
703 			if (!error)
704 				haslock = 1;
705 		}
706 		if (!error) {
707 		    newnfs_disconnect(&nmp->nm_sockreq);
708 		    if (haslock)
709 			newnfs_sndunlock(&nmp->nm_sockreq.nr_lock);
710 		    nmp->nm_sotype = argp->sotype;
711 		    nmp->nm_soproto = argp->proto;
712 		    if (nmp->nm_sotype == SOCK_DGRAM)
713 			while (newnfs_connect(nmp, &nmp->nm_sockreq,
714 			    cred, td, 0)) {
715 				printf("newnfs_args: retrying connect\n");
716 				(void) nfs_catnap(PSOCK, 0, "nfscon");
717 			}
718 		}
719 	} else {
720 		nmp->nm_sotype = argp->sotype;
721 		nmp->nm_soproto = argp->proto;
722 	}
723 
724 	if (hostname != NULL) {
725 		strlcpy(nmp->nm_hostname, hostname,
726 		    sizeof(nmp->nm_hostname));
727 		p = strchr(nmp->nm_hostname, ':');
728 		if (p != NULL)
729 			*p = '\0';
730 	}
731 }
732 
733 static const char *nfs_opts[] = { "from", "nfs_args",
734     "noac", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union",
735     "noclusterr", "noclusterw", "multilabel", "acls", "force", "update",
736     "async", "noconn", "nolockd", "conn", "lockd", "intr", "rdirplus",
737     "readdirsize", "soft", "hard", "mntudp", "tcp", "udp", "wsize", "rsize",
738     "retrans", "actimeo", "acregmin", "acregmax", "acdirmin", "acdirmax",
739     "resvport", "readahead", "hostname", "timeo", "timeout", "addr", "fh",
740     "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath",
741     "minorversion", "nametimeo", "negnametimeo", "nocto", "noncontigwr",
742     "pnfs", "wcommitsize",
743     NULL };
744 
745 /*
746  * Parse the "from" mountarg, passed by the generic mount(8) program
747  * or the mountroot code.  This is used when rerooting into NFS.
748  *
749  * Note that the "hostname" is actually a "hostname:/share/path" string.
750  */
751 static int
nfs_mount_parse_from(struct vfsoptlist * opts,char ** hostnamep,struct sockaddr_in ** sinp,char * dirpath,size_t dirpathsize,int * dirlenp)752 nfs_mount_parse_from(struct vfsoptlist *opts, char **hostnamep,
753     struct sockaddr_in **sinp, char *dirpath, size_t dirpathsize, int *dirlenp)
754 {
755 	char nam[MNAMELEN + 1];
756 	char *delimp, *hostp, *spec;
757 	int error, have_bracket = 0, offset, rv, speclen;
758 	struct sockaddr_in *sin;
759 	size_t len;
760 
761 	error = vfs_getopt(opts, "from", (void **)&spec, &speclen);
762 	if (error != 0)
763 		return (error);
764 
765 	/*
766 	 * This part comes from sbin/mount_nfs/mount_nfs.c:getnfsargs().
767 	 */
768 	if (*spec == '[' && (delimp = strchr(spec + 1, ']')) != NULL &&
769 	    *(delimp + 1) == ':') {
770 		hostp = spec + 1;
771 		spec = delimp + 2;
772 		have_bracket = 1;
773 	} else if ((delimp = strrchr(spec, ':')) != NULL) {
774 		hostp = spec;
775 		spec = delimp + 1;
776 	} else if ((delimp = strrchr(spec, '@')) != NULL) {
777 		printf("%s: path@server syntax is deprecated, "
778 		    "use server:path\n", __func__);
779 		hostp = delimp + 1;
780 	} else {
781 		printf("%s: no <host>:<dirpath> nfs-name\n", __func__);
782 		return (EINVAL);
783 	}
784 	*delimp = '\0';
785 
786 	/*
787 	 * If there has been a trailing slash at mounttime it seems
788 	 * that some mountd implementations fail to remove the mount
789 	 * entries from their mountlist while unmounting.
790 	 */
791 	for (speclen = strlen(spec);
792 	    speclen > 1 && spec[speclen - 1] == '/';
793 	    speclen--)
794 		spec[speclen - 1] = '\0';
795 	if (strlen(hostp) + strlen(spec) + 1 > MNAMELEN) {
796 		printf("%s: %s:%s: name too long", __func__, hostp, spec);
797 		return (EINVAL);
798 	}
799 	/* Make both '@' and ':' notations equal */
800 	if (*hostp != '\0') {
801 		len = strlen(hostp);
802 		offset = 0;
803 		if (have_bracket)
804 			nam[offset++] = '[';
805 		memmove(nam + offset, hostp, len);
806 		if (have_bracket)
807 			nam[len + offset++] = ']';
808 		nam[len + offset++] = ':';
809 		memmove(nam + len + offset, spec, speclen);
810 		nam[len + speclen + offset] = '\0';
811 	} else
812 		nam[0] = '\0';
813 
814 	/*
815 	 * XXX: IPv6
816 	 */
817 	sin = malloc(sizeof(*sin), M_SONAME, M_WAITOK);
818 	rv = inet_pton(AF_INET, hostp, &sin->sin_addr);
819 	if (rv != 1) {
820 		printf("%s: cannot parse '%s', inet_pton() returned %d\n",
821 		    __func__, hostp, rv);
822 		free(sin, M_SONAME);
823 		return (EINVAL);
824 	}
825 
826 	sin->sin_len = sizeof(*sin);
827 	sin->sin_family = AF_INET;
828 	/*
829 	 * XXX: hardcoded port number.
830 	 */
831 	sin->sin_port = htons(2049);
832 
833 	*hostnamep = strdup(nam, M_NEWNFSMNT);
834 	*sinp = sin;
835 	strlcpy(dirpath, spec, dirpathsize);
836 	*dirlenp = strlen(dirpath);
837 
838 	return (0);
839 }
840 
841 /*
842  * VFS Operations.
843  *
844  * mount system call
845  * It seems a bit dumb to copyinstr() the host and path here and then
846  * bcopy() them in mountnfs(), but I wanted to detect errors before
847  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
848  * an error after that means that I have to release the mbuf.
849  */
850 /* ARGSUSED */
851 static int
nfs_mount(struct mount * mp)852 nfs_mount(struct mount *mp)
853 {
854 	struct nfs_args args = {
855 	    .version = NFS_ARGSVERSION,
856 	    .addr = NULL,
857 	    .addrlen = sizeof (struct sockaddr_in),
858 	    .sotype = SOCK_STREAM,
859 	    .proto = 0,
860 	    .fh = NULL,
861 	    .fhsize = 0,
862 	    .flags = NFSMNT_RESVPORT,
863 	    .wsize = NFS_WSIZE,
864 	    .rsize = NFS_RSIZE,
865 	    .readdirsize = NFS_READDIRSIZE,
866 	    .timeo = 10,
867 	    .retrans = NFS_RETRANS,
868 	    .readahead = NFS_DEFRAHEAD,
869 	    .wcommitsize = 0,			/* was: NQ_DEFLEASE */
870 	    .hostname = NULL,
871 	    .acregmin = NFS_MINATTRTIMO,
872 	    .acregmax = NFS_MAXATTRTIMO,
873 	    .acdirmin = NFS_MINDIRATTRTIMO,
874 	    .acdirmax = NFS_MAXDIRATTRTIMO,
875 	};
876 	int error = 0, ret, len;
877 	struct sockaddr *nam = NULL;
878 	struct vnode *vp;
879 	struct thread *td;
880 	char hst[MNAMELEN];
881 	u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100];
882 	char *cp, *opt, *name, *secname;
883 	int nametimeo = NFS_DEFAULT_NAMETIMEO;
884 	int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO;
885 	int minvers = 0;
886 	int dirlen, has_nfs_args_opt, has_nfs_from_opt,
887 	    krbnamelen, srvkrbnamelen;
888 	size_t hstlen;
889 
890 	has_nfs_args_opt = 0;
891 	has_nfs_from_opt = 0;
892 	if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) {
893 		error = EINVAL;
894 		goto out;
895 	}
896 
897 	td = curthread;
898 	if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS &&
899 	    nfs_diskless_valid != 0) {
900 		error = nfs_mountroot(mp);
901 		goto out;
902 	}
903 
904 	nfscl_init();
905 
906 	/*
907 	 * The old mount_nfs program passed the struct nfs_args
908 	 * from userspace to kernel.  The new mount_nfs program
909 	 * passes string options via nmount() from userspace to kernel
910 	 * and we populate the struct nfs_args in the kernel.
911 	 */
912 	if (vfs_getopt(mp->mnt_optnew, "nfs_args", NULL, NULL) == 0) {
913 		error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args,
914 		    sizeof(args));
915 		if (error != 0)
916 			goto out;
917 
918 		if (args.version != NFS_ARGSVERSION) {
919 			error = EPROGMISMATCH;
920 			goto out;
921 		}
922 		has_nfs_args_opt = 1;
923 	}
924 
925 	/* Handle the new style options. */
926 	if (vfs_getopt(mp->mnt_optnew, "noac", NULL, NULL) == 0) {
927 		args.acdirmin = args.acdirmax =
928 		    args.acregmin = args.acregmax = 0;
929 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
930 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
931 	}
932 	if (vfs_getopt(mp->mnt_optnew, "noconn", NULL, NULL) == 0)
933 		args.flags |= NFSMNT_NOCONN;
934 	if (vfs_getopt(mp->mnt_optnew, "conn", NULL, NULL) == 0)
935 		args.flags &= ~NFSMNT_NOCONN;
936 	if (vfs_getopt(mp->mnt_optnew, "nolockd", NULL, NULL) == 0)
937 		args.flags |= NFSMNT_NOLOCKD;
938 	if (vfs_getopt(mp->mnt_optnew, "lockd", NULL, NULL) == 0)
939 		args.flags &= ~NFSMNT_NOLOCKD;
940 	if (vfs_getopt(mp->mnt_optnew, "intr", NULL, NULL) == 0)
941 		args.flags |= NFSMNT_INT;
942 	if (vfs_getopt(mp->mnt_optnew, "rdirplus", NULL, NULL) == 0)
943 		args.flags |= NFSMNT_RDIRPLUS;
944 	if (vfs_getopt(mp->mnt_optnew, "resvport", NULL, NULL) == 0)
945 		args.flags |= NFSMNT_RESVPORT;
946 	if (vfs_getopt(mp->mnt_optnew, "noresvport", NULL, NULL) == 0)
947 		args.flags &= ~NFSMNT_RESVPORT;
948 	if (vfs_getopt(mp->mnt_optnew, "soft", NULL, NULL) == 0)
949 		args.flags |= NFSMNT_SOFT;
950 	if (vfs_getopt(mp->mnt_optnew, "hard", NULL, NULL) == 0)
951 		args.flags &= ~NFSMNT_SOFT;
952 	if (vfs_getopt(mp->mnt_optnew, "mntudp", NULL, NULL) == 0)
953 		args.sotype = SOCK_DGRAM;
954 	if (vfs_getopt(mp->mnt_optnew, "udp", NULL, NULL) == 0)
955 		args.sotype = SOCK_DGRAM;
956 	if (vfs_getopt(mp->mnt_optnew, "tcp", NULL, NULL) == 0)
957 		args.sotype = SOCK_STREAM;
958 	if (vfs_getopt(mp->mnt_optnew, "nfsv3", NULL, NULL) == 0)
959 		args.flags |= NFSMNT_NFSV3;
960 	if (vfs_getopt(mp->mnt_optnew, "nfsv4", NULL, NULL) == 0) {
961 		args.flags |= NFSMNT_NFSV4;
962 		args.sotype = SOCK_STREAM;
963 	}
964 	if (vfs_getopt(mp->mnt_optnew, "allgssname", NULL, NULL) == 0)
965 		args.flags |= NFSMNT_ALLGSSNAME;
966 	if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
967 		args.flags |= NFSMNT_NOCTO;
968 	if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
969 		args.flags |= NFSMNT_NONCONTIGWR;
970 	if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
971 		args.flags |= NFSMNT_PNFS;
972 	if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
973 		if (opt == NULL) {
974 			vfs_mount_error(mp, "illegal readdirsize");
975 			error = EINVAL;
976 			goto out;
977 		}
978 		ret = sscanf(opt, "%d", &args.readdirsize);
979 		if (ret != 1 || args.readdirsize <= 0) {
980 			vfs_mount_error(mp, "illegal readdirsize: %s",
981 			    opt);
982 			error = EINVAL;
983 			goto out;
984 		}
985 		args.flags |= NFSMNT_READDIRSIZE;
986 	}
987 	if (vfs_getopt(mp->mnt_optnew, "readahead", (void **)&opt, NULL) == 0) {
988 		if (opt == NULL) {
989 			vfs_mount_error(mp, "illegal readahead");
990 			error = EINVAL;
991 			goto out;
992 		}
993 		ret = sscanf(opt, "%d", &args.readahead);
994 		if (ret != 1 || args.readahead <= 0) {
995 			vfs_mount_error(mp, "illegal readahead: %s",
996 			    opt);
997 			error = EINVAL;
998 			goto out;
999 		}
1000 		args.flags |= NFSMNT_READAHEAD;
1001 	}
1002 	if (vfs_getopt(mp->mnt_optnew, "wsize", (void **)&opt, NULL) == 0) {
1003 		if (opt == NULL) {
1004 			vfs_mount_error(mp, "illegal wsize");
1005 			error = EINVAL;
1006 			goto out;
1007 		}
1008 		ret = sscanf(opt, "%d", &args.wsize);
1009 		if (ret != 1 || args.wsize <= 0) {
1010 			vfs_mount_error(mp, "illegal wsize: %s",
1011 			    opt);
1012 			error = EINVAL;
1013 			goto out;
1014 		}
1015 		args.flags |= NFSMNT_WSIZE;
1016 	}
1017 	if (vfs_getopt(mp->mnt_optnew, "rsize", (void **)&opt, NULL) == 0) {
1018 		if (opt == NULL) {
1019 			vfs_mount_error(mp, "illegal rsize");
1020 			error = EINVAL;
1021 			goto out;
1022 		}
1023 		ret = sscanf(opt, "%d", &args.rsize);
1024 		if (ret != 1 || args.rsize <= 0) {
1025 			vfs_mount_error(mp, "illegal wsize: %s",
1026 			    opt);
1027 			error = EINVAL;
1028 			goto out;
1029 		}
1030 		args.flags |= NFSMNT_RSIZE;
1031 	}
1032 	if (vfs_getopt(mp->mnt_optnew, "retrans", (void **)&opt, NULL) == 0) {
1033 		if (opt == NULL) {
1034 			vfs_mount_error(mp, "illegal retrans");
1035 			error = EINVAL;
1036 			goto out;
1037 		}
1038 		ret = sscanf(opt, "%d", &args.retrans);
1039 		if (ret != 1 || args.retrans <= 0) {
1040 			vfs_mount_error(mp, "illegal retrans: %s",
1041 			    opt);
1042 			error = EINVAL;
1043 			goto out;
1044 		}
1045 		args.flags |= NFSMNT_RETRANS;
1046 	}
1047 	if (vfs_getopt(mp->mnt_optnew, "actimeo", (void **)&opt, NULL) == 0) {
1048 		ret = sscanf(opt, "%d", &args.acregmin);
1049 		if (ret != 1 || args.acregmin < 0) {
1050 			vfs_mount_error(mp, "illegal actimeo: %s",
1051 			    opt);
1052 			error = EINVAL;
1053 			goto out;
1054 		}
1055 		args.acdirmin = args.acdirmax = args.acregmax = args.acregmin;
1056 		args.flags |= NFSMNT_ACDIRMIN | NFSMNT_ACDIRMAX |
1057 		    NFSMNT_ACREGMIN | NFSMNT_ACREGMAX;
1058 	}
1059 	if (vfs_getopt(mp->mnt_optnew, "acregmin", (void **)&opt, NULL) == 0) {
1060 		ret = sscanf(opt, "%d", &args.acregmin);
1061 		if (ret != 1 || args.acregmin < 0) {
1062 			vfs_mount_error(mp, "illegal acregmin: %s",
1063 			    opt);
1064 			error = EINVAL;
1065 			goto out;
1066 		}
1067 		args.flags |= NFSMNT_ACREGMIN;
1068 	}
1069 	if (vfs_getopt(mp->mnt_optnew, "acregmax", (void **)&opt, NULL) == 0) {
1070 		ret = sscanf(opt, "%d", &args.acregmax);
1071 		if (ret != 1 || args.acregmax < 0) {
1072 			vfs_mount_error(mp, "illegal acregmax: %s",
1073 			    opt);
1074 			error = EINVAL;
1075 			goto out;
1076 		}
1077 		args.flags |= NFSMNT_ACREGMAX;
1078 	}
1079 	if (vfs_getopt(mp->mnt_optnew, "acdirmin", (void **)&opt, NULL) == 0) {
1080 		ret = sscanf(opt, "%d", &args.acdirmin);
1081 		if (ret != 1 || args.acdirmin < 0) {
1082 			vfs_mount_error(mp, "illegal acdirmin: %s",
1083 			    opt);
1084 			error = EINVAL;
1085 			goto out;
1086 		}
1087 		args.flags |= NFSMNT_ACDIRMIN;
1088 	}
1089 	if (vfs_getopt(mp->mnt_optnew, "acdirmax", (void **)&opt, NULL) == 0) {
1090 		ret = sscanf(opt, "%d", &args.acdirmax);
1091 		if (ret != 1 || args.acdirmax < 0) {
1092 			vfs_mount_error(mp, "illegal acdirmax: %s",
1093 			    opt);
1094 			error = EINVAL;
1095 			goto out;
1096 		}
1097 		args.flags |= NFSMNT_ACDIRMAX;
1098 	}
1099 	if (vfs_getopt(mp->mnt_optnew, "wcommitsize", (void **)&opt, NULL) == 0) {
1100 		ret = sscanf(opt, "%d", &args.wcommitsize);
1101 		if (ret != 1 || args.wcommitsize < 0) {
1102 			vfs_mount_error(mp, "illegal wcommitsize: %s", opt);
1103 			error = EINVAL;
1104 			goto out;
1105 		}
1106 		args.flags |= NFSMNT_WCOMMITSIZE;
1107 	}
1108 	if (vfs_getopt(mp->mnt_optnew, "timeo", (void **)&opt, NULL) == 0) {
1109 		ret = sscanf(opt, "%d", &args.timeo);
1110 		if (ret != 1 || args.timeo <= 0) {
1111 			vfs_mount_error(mp, "illegal timeo: %s",
1112 			    opt);
1113 			error = EINVAL;
1114 			goto out;
1115 		}
1116 		args.flags |= NFSMNT_TIMEO;
1117 	}
1118 	if (vfs_getopt(mp->mnt_optnew, "timeout", (void **)&opt, NULL) == 0) {
1119 		ret = sscanf(opt, "%d", &args.timeo);
1120 		if (ret != 1 || args.timeo <= 0) {
1121 			vfs_mount_error(mp, "illegal timeout: %s",
1122 			    opt);
1123 			error = EINVAL;
1124 			goto out;
1125 		}
1126 		args.flags |= NFSMNT_TIMEO;
1127 	}
1128 	if (vfs_getopt(mp->mnt_optnew, "nametimeo", (void **)&opt, NULL) == 0) {
1129 		ret = sscanf(opt, "%d", &nametimeo);
1130 		if (ret != 1 || nametimeo < 0) {
1131 			vfs_mount_error(mp, "illegal nametimeo: %s", opt);
1132 			error = EINVAL;
1133 			goto out;
1134 		}
1135 	}
1136 	if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL)
1137 	    == 0) {
1138 		ret = sscanf(opt, "%d", &negnametimeo);
1139 		if (ret != 1 || negnametimeo < 0) {
1140 			vfs_mount_error(mp, "illegal negnametimeo: %s",
1141 			    opt);
1142 			error = EINVAL;
1143 			goto out;
1144 		}
1145 	}
1146 	if (vfs_getopt(mp->mnt_optnew, "minorversion", (void **)&opt, NULL) ==
1147 	    0) {
1148 		ret = sscanf(opt, "%d", &minvers);
1149 		if (ret != 1 || minvers < 0 || minvers > 1 ||
1150 		    (args.flags & NFSMNT_NFSV4) == 0) {
1151 			vfs_mount_error(mp, "illegal minorversion: %s", opt);
1152 			error = EINVAL;
1153 			goto out;
1154 		}
1155 	}
1156 	if (vfs_getopt(mp->mnt_optnew, "sec",
1157 		(void **) &secname, NULL) == 0)
1158 		nfs_sec_name(secname, &args.flags);
1159 
1160 	if (mp->mnt_flag & MNT_UPDATE) {
1161 		struct nfsmount *nmp = VFSTONFS(mp);
1162 
1163 		if (nmp == NULL) {
1164 			error = EIO;
1165 			goto out;
1166 		}
1167 
1168 		/*
1169 		 * If a change from TCP->UDP is done and there are thread(s)
1170 		 * that have I/O RPC(s) in progress with a transfer size
1171 		 * greater than NFS_MAXDGRAMDATA, those thread(s) will be
1172 		 * hung, retrying the RPC(s) forever. Usually these threads
1173 		 * will be seen doing an uninterruptible sleep on wait channel
1174 		 * "nfsreq".
1175 		 */
1176 		if (args.sotype == SOCK_DGRAM && nmp->nm_sotype == SOCK_STREAM)
1177 			tprintf(td->td_proc, LOG_WARNING,
1178 	"Warning: mount -u that changes TCP->UDP can result in hung threads\n");
1179 
1180 		/*
1181 		 * When doing an update, we can't change version,
1182 		 * security, switch lockd strategies or change cookie
1183 		 * translation
1184 		 */
1185 		args.flags = (args.flags &
1186 		    ~(NFSMNT_NFSV3 |
1187 		      NFSMNT_NFSV4 |
1188 		      NFSMNT_KERB |
1189 		      NFSMNT_INTEGRITY |
1190 		      NFSMNT_PRIVACY |
1191 		      NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) |
1192 		    (nmp->nm_flag &
1193 			(NFSMNT_NFSV3 |
1194 			 NFSMNT_NFSV4 |
1195 			 NFSMNT_KERB |
1196 			 NFSMNT_INTEGRITY |
1197 			 NFSMNT_PRIVACY |
1198 			 NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/));
1199 		nfs_decode_args(mp, nmp, &args, NULL, td->td_ucred, td);
1200 		goto out;
1201 	}
1202 
1203 	/*
1204 	 * Make the nfs_ip_paranoia sysctl serve as the default connection
1205 	 * or no-connection mode for those protocols that support
1206 	 * no-connection mode (the flag will be cleared later for protocols
1207 	 * that do not support no-connection mode).  This will allow a client
1208 	 * to receive replies from a different IP then the request was
1209 	 * sent to.  Note: default value for nfs_ip_paranoia is 1 (paranoid),
1210 	 * not 0.
1211 	 */
1212 	if (nfs_ip_paranoia == 0)
1213 		args.flags |= NFSMNT_NOCONN;
1214 
1215 	if (has_nfs_args_opt != 0) {
1216 		/*
1217 		 * In the 'nfs_args' case, the pointers in the args
1218 		 * structure are in userland - we copy them in here.
1219 		 */
1220 		if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) {
1221 			vfs_mount_error(mp, "Bad file handle");
1222 			error = EINVAL;
1223 			goto out;
1224 		}
1225 		error = copyin((caddr_t)args.fh, (caddr_t)nfh,
1226 		    args.fhsize);
1227 		if (error != 0)
1228 			goto out;
1229 		error = copyinstr(args.hostname, hst, MNAMELEN - 1, &hstlen);
1230 		if (error != 0)
1231 			goto out;
1232 		bzero(&hst[hstlen], MNAMELEN - hstlen);
1233 		args.hostname = hst;
1234 		/* getsockaddr() call must be after above copyin() calls */
1235 		error = getsockaddr(&nam, (caddr_t)args.addr,
1236 		    args.addrlen);
1237 		if (error != 0)
1238 			goto out;
1239 	} else if (nfs_mount_parse_from(mp->mnt_optnew,
1240 	    &args.hostname, (struct sockaddr_in **)&nam, dirpath,
1241 	    sizeof(dirpath), &dirlen) == 0) {
1242 		has_nfs_from_opt = 1;
1243 		bcopy(args.hostname, hst, MNAMELEN);
1244 		hst[MNAMELEN - 1] = '\0';
1245 
1246 		/*
1247 		 * This only works with NFSv4 for now.
1248 		 */
1249 		args.fhsize = 0;
1250 		args.flags |= NFSMNT_NFSV4;
1251 		args.sotype = SOCK_STREAM;
1252 	} else {
1253 		if (vfs_getopt(mp->mnt_optnew, "fh", (void **)&args.fh,
1254 		    &args.fhsize) == 0) {
1255 			if (args.fhsize < 0 || args.fhsize > NFSX_FHMAX) {
1256 				vfs_mount_error(mp, "Bad file handle");
1257 				error = EINVAL;
1258 				goto out;
1259 			}
1260 			bcopy(args.fh, nfh, args.fhsize);
1261 		} else {
1262 			args.fhsize = 0;
1263 		}
1264 		(void) vfs_getopt(mp->mnt_optnew, "hostname",
1265 		    (void **)&args.hostname, &len);
1266 		if (args.hostname == NULL) {
1267 			vfs_mount_error(mp, "Invalid hostname");
1268 			error = EINVAL;
1269 			goto out;
1270 		}
1271 		bcopy(args.hostname, hst, MNAMELEN);
1272 		hst[MNAMELEN - 1] = '\0';
1273 	}
1274 
1275 	if (vfs_getopt(mp->mnt_optnew, "principal", (void **)&name, NULL) == 0)
1276 		strlcpy(srvkrbname, name, sizeof (srvkrbname));
1277 	else {
1278 		snprintf(srvkrbname, sizeof (srvkrbname), "nfs@%s", hst);
1279 		cp = strchr(srvkrbname, ':');
1280 		if (cp != NULL)
1281 			*cp = '\0';
1282 	}
1283 	srvkrbnamelen = strlen(srvkrbname);
1284 
1285 	if (vfs_getopt(mp->mnt_optnew, "gssname", (void **)&name, NULL) == 0)
1286 		strlcpy(krbname, name, sizeof (krbname));
1287 	else
1288 		krbname[0] = '\0';
1289 	krbnamelen = strlen(krbname);
1290 
1291 	if (has_nfs_from_opt == 0) {
1292 		if (vfs_getopt(mp->mnt_optnew,
1293 		    "dirpath", (void **)&name, NULL) == 0)
1294 			strlcpy(dirpath, name, sizeof (dirpath));
1295 		else
1296 			dirpath[0] = '\0';
1297 		dirlen = strlen(dirpath);
1298 	}
1299 
1300 	if (has_nfs_args_opt == 0 && has_nfs_from_opt == 0) {
1301 		if (vfs_getopt(mp->mnt_optnew, "addr",
1302 		    (void **)&args.addr, &args.addrlen) == 0) {
1303 			if (args.addrlen > SOCK_MAXADDRLEN) {
1304 				error = ENAMETOOLONG;
1305 				goto out;
1306 			}
1307 			nam = malloc(args.addrlen, M_SONAME, M_WAITOK);
1308 			bcopy(args.addr, nam, args.addrlen);
1309 			nam->sa_len = args.addrlen;
1310 		} else {
1311 			vfs_mount_error(mp, "No server address");
1312 			error = EINVAL;
1313 			goto out;
1314 		}
1315 	}
1316 
1317 	args.fh = nfh;
1318 	error = mountnfs(&args, mp, nam, hst, krbname, krbnamelen, dirpath,
1319 	    dirlen, srvkrbname, srvkrbnamelen, &vp, td->td_ucred, td,
1320 	    nametimeo, negnametimeo, minvers);
1321 out:
1322 	if (!error) {
1323 		MNT_ILOCK(mp);
1324 		mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_NO_IOPF |
1325 		    MNTK_USES_BCACHE;
1326 		MNT_IUNLOCK(mp);
1327 	}
1328 	return (error);
1329 }
1330 
1331 
1332 /*
1333  * VFS Operations.
1334  *
1335  * mount system call
1336  * It seems a bit dumb to copyinstr() the host and path here and then
1337  * bcopy() them in mountnfs(), but I wanted to detect errors before
1338  * doing the getsockaddr() call because getsockaddr() allocates an mbuf and
1339  * an error after that means that I have to release the mbuf.
1340  */
1341 /* ARGSUSED */
1342 static int
nfs_cmount(struct mntarg * ma,void * data,uint64_t flags)1343 nfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
1344 {
1345 	int error;
1346 	struct nfs_args args;
1347 
1348 	error = copyin(data, &args, sizeof (struct nfs_args));
1349 	if (error)
1350 		return error;
1351 
1352 	ma = mount_arg(ma, "nfs_args", &args, sizeof args);
1353 
1354 	error = kernel_mount(ma, flags);
1355 	return (error);
1356 }
1357 
1358 /*
1359  * Common code for mount and mountroot
1360  */
1361 static int
mountnfs(struct nfs_args * argp,struct mount * mp,struct sockaddr * nam,char * hst,u_char * krbname,int krbnamelen,u_char * dirpath,int dirlen,u_char * srvkrbname,int srvkrbnamelen,struct vnode ** vpp,struct ucred * cred,struct thread * td,int nametimeo,int negnametimeo,int minvers)1362 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
1363     char *hst, u_char *krbname, int krbnamelen, u_char *dirpath, int dirlen,
1364     u_char *srvkrbname, int srvkrbnamelen, struct vnode **vpp,
1365     struct ucred *cred, struct thread *td, int nametimeo, int negnametimeo,
1366     int minvers)
1367 {
1368 	struct nfsmount *nmp;
1369 	struct nfsnode *np;
1370 	int error, trycnt, ret;
1371 	struct nfsvattr nfsva;
1372 	struct nfsclclient *clp;
1373 	struct nfsclds *dsp, *tdsp;
1374 	uint32_t lease;
1375 	static u_int64_t clval = 0;
1376 
1377 	NFSCL_DEBUG(3, "in mnt\n");
1378 	clp = NULL;
1379 	if (mp->mnt_flag & MNT_UPDATE) {
1380 		nmp = VFSTONFS(mp);
1381 		printf("%s: MNT_UPDATE is no longer handled here\n", __func__);
1382 		FREE(nam, M_SONAME);
1383 		return (0);
1384 	} else {
1385 		MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount) +
1386 		    krbnamelen + dirlen + srvkrbnamelen + 2,
1387 		    M_NEWNFSMNT, M_WAITOK | M_ZERO);
1388 		TAILQ_INIT(&nmp->nm_bufq);
1389 		if (clval == 0)
1390 			clval = (u_int64_t)nfsboottime.tv_sec;
1391 		nmp->nm_clval = clval++;
1392 		nmp->nm_krbnamelen = krbnamelen;
1393 		nmp->nm_dirpathlen = dirlen;
1394 		nmp->nm_srvkrbnamelen = srvkrbnamelen;
1395 		if (td->td_ucred->cr_uid != (uid_t)0) {
1396 			/*
1397 			 * nm_uid is used to get KerberosV credentials for
1398 			 * the nfsv4 state handling operations if there is
1399 			 * no host based principal set. Use the uid of
1400 			 * this user if not root, since they are doing the
1401 			 * mount. I don't think setting this for root will
1402 			 * work, since root normally does not have user
1403 			 * credentials in a credentials cache.
1404 			 */
1405 			nmp->nm_uid = td->td_ucred->cr_uid;
1406 		} else {
1407 			/*
1408 			 * Just set to -1, so it won't be used.
1409 			 */
1410 			nmp->nm_uid = (uid_t)-1;
1411 		}
1412 
1413 		/* Copy and null terminate all the names */
1414 		if (nmp->nm_krbnamelen > 0) {
1415 			bcopy(krbname, nmp->nm_krbname, nmp->nm_krbnamelen);
1416 			nmp->nm_name[nmp->nm_krbnamelen] = '\0';
1417 		}
1418 		if (nmp->nm_dirpathlen > 0) {
1419 			bcopy(dirpath, NFSMNT_DIRPATH(nmp),
1420 			    nmp->nm_dirpathlen);
1421 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1422 			    + 1] = '\0';
1423 		}
1424 		if (nmp->nm_srvkrbnamelen > 0) {
1425 			bcopy(srvkrbname, NFSMNT_SRVKRBNAME(nmp),
1426 			    nmp->nm_srvkrbnamelen);
1427 			nmp->nm_name[nmp->nm_krbnamelen + nmp->nm_dirpathlen
1428 			    + nmp->nm_srvkrbnamelen + 2] = '\0';
1429 		}
1430 		nmp->nm_sockreq.nr_cred = crhold(cred);
1431 		mtx_init(&nmp->nm_sockreq.nr_mtx, "nfssock", NULL, MTX_DEF);
1432 		mp->mnt_data = nmp;
1433 		nmp->nm_getinfo = nfs_getnlminfo;
1434 		nmp->nm_vinvalbuf = ncl_vinvalbuf;
1435 	}
1436 	vfs_getnewfsid(mp);
1437 	nmp->nm_mountp = mp;
1438 	mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK);
1439 
1440 	/*
1441 	 * Since nfs_decode_args() might optionally set them, these
1442 	 * need to be set to defaults before the call, so that the
1443 	 * optional settings aren't overwritten.
1444 	 */
1445 	nmp->nm_nametimeo = nametimeo;
1446 	nmp->nm_negnametimeo = negnametimeo;
1447 	nmp->nm_timeo = NFS_TIMEO;
1448 	nmp->nm_retry = NFS_RETRANS;
1449 	nmp->nm_readahead = NFS_DEFRAHEAD;
1450 
1451 	/* This is empirical approximation of sqrt(hibufspace) * 256. */
1452 	nmp->nm_wcommitsize = NFS_MAXBSIZE / 256;
1453 	while ((long)nmp->nm_wcommitsize * nmp->nm_wcommitsize < hibufspace)
1454 		nmp->nm_wcommitsize *= 2;
1455 	nmp->nm_wcommitsize *= 256;
1456 
1457 	if ((argp->flags & NFSMNT_NFSV4) != 0)
1458 		nmp->nm_minorvers = minvers;
1459 	else
1460 		nmp->nm_minorvers = 0;
1461 
1462 	nfs_decode_args(mp, nmp, argp, hst, cred, td);
1463 
1464 	/*
1465 	 * V2 can only handle 32 bit filesizes.  A 4GB-1 limit may be too
1466 	 * high, depending on whether we end up with negative offsets in
1467 	 * the client or server somewhere.  2GB-1 may be safer.
1468 	 *
1469 	 * For V3, ncl_fsinfo will adjust this as necessary.  Assume maximum
1470 	 * that we can handle until we find out otherwise.
1471 	 */
1472 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0)
1473 		nmp->nm_maxfilesize = 0xffffffffLL;
1474 	else
1475 		nmp->nm_maxfilesize = OFF_MAX;
1476 
1477 	if ((argp->flags & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0) {
1478 		nmp->nm_wsize = NFS_WSIZE;
1479 		nmp->nm_rsize = NFS_RSIZE;
1480 		nmp->nm_readdirsize = NFS_READDIRSIZE;
1481 	}
1482 	nmp->nm_numgrps = NFS_MAXGRPS;
1483 	nmp->nm_tprintf_delay = nfs_tprintf_delay;
1484 	if (nmp->nm_tprintf_delay < 0)
1485 		nmp->nm_tprintf_delay = 0;
1486 	nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay;
1487 	if (nmp->nm_tprintf_initial_delay < 0)
1488 		nmp->nm_tprintf_initial_delay = 0;
1489 	nmp->nm_fhsize = argp->fhsize;
1490 	if (nmp->nm_fhsize > 0)
1491 		bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize);
1492 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
1493 	nmp->nm_nam = nam;
1494 	/* Set up the sockets and per-host congestion */
1495 	nmp->nm_sotype = argp->sotype;
1496 	nmp->nm_soproto = argp->proto;
1497 	nmp->nm_sockreq.nr_prog = NFS_PROG;
1498 	if ((argp->flags & NFSMNT_NFSV4))
1499 		nmp->nm_sockreq.nr_vers = NFS_VER4;
1500 	else if ((argp->flags & NFSMNT_NFSV3))
1501 		nmp->nm_sockreq.nr_vers = NFS_VER3;
1502 	else
1503 		nmp->nm_sockreq.nr_vers = NFS_VER2;
1504 
1505 
1506 	if ((error = newnfs_connect(nmp, &nmp->nm_sockreq, cred, td, 0)))
1507 		goto bad;
1508 	/* For NFSv4.1, get the clientid now. */
1509 	if (nmp->nm_minorvers > 0) {
1510 		NFSCL_DEBUG(3, "at getcl\n");
1511 		error = nfscl_getcl(mp, cred, td, 0, &clp);
1512 		NFSCL_DEBUG(3, "aft getcl=%d\n", error);
1513 		if (error != 0)
1514 			goto bad;
1515 	}
1516 
1517 	if (nmp->nm_fhsize == 0 && (nmp->nm_flag & NFSMNT_NFSV4) &&
1518 	    nmp->nm_dirpathlen > 0) {
1519 		NFSCL_DEBUG(3, "in dirp\n");
1520 		/*
1521 		 * If the fhsize on the mount point == 0 for V4, the mount
1522 		 * path needs to be looked up.
1523 		 */
1524 		trycnt = 3;
1525 		do {
1526 			error = nfsrpc_getdirpath(nmp, NFSMNT_DIRPATH(nmp),
1527 			    cred, td);
1528 			NFSCL_DEBUG(3, "aft dirp=%d\n", error);
1529 			if (error)
1530 				(void) nfs_catnap(PZERO, error, "nfsgetdirp");
1531 		} while (error && --trycnt > 0);
1532 		if (error) {
1533 			error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
1534 			goto bad;
1535 		}
1536 	}
1537 
1538 	/*
1539 	 * A reference count is needed on the nfsnode representing the
1540 	 * remote root.  If this object is not persistent, then backward
1541 	 * traversals of the mount point (i.e. "..") will not work if
1542 	 * the nfsnode gets flushed out of the cache. Ufs does not have
1543 	 * this problem, because one can identify root inodes by their
1544 	 * number == ROOTINO (2).
1545 	 */
1546 	if (nmp->nm_fhsize > 0) {
1547 		/*
1548 		 * Set f_iosize to NFS_DIRBLKSIZ so that bo_bsize gets set
1549 		 * non-zero for the root vnode. f_iosize will be set correctly
1550 		 * by nfs_statfs() before any I/O occurs.
1551 		 */
1552 		mp->mnt_stat.f_iosize = NFS_DIRBLKSIZ;
1553 		error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np,
1554 		    LK_EXCLUSIVE);
1555 		if (error)
1556 			goto bad;
1557 		*vpp = NFSTOV(np);
1558 
1559 		/*
1560 		 * Get file attributes and transfer parameters for the
1561 		 * mountpoint.  This has the side effect of filling in
1562 		 * (*vpp)->v_type with the correct value.
1563 		 */
1564 		ret = nfsrpc_getattrnovp(nmp, nmp->nm_fh, nmp->nm_fhsize, 1,
1565 		    cred, td, &nfsva, NULL, &lease);
1566 		if (ret) {
1567 			/*
1568 			 * Just set default values to get things going.
1569 			 */
1570 			NFSBZERO((caddr_t)&nfsva, sizeof (struct nfsvattr));
1571 			nfsva.na_vattr.va_type = VDIR;
1572 			nfsva.na_vattr.va_mode = 0777;
1573 			nfsva.na_vattr.va_nlink = 100;
1574 			nfsva.na_vattr.va_uid = (uid_t)0;
1575 			nfsva.na_vattr.va_gid = (gid_t)0;
1576 			nfsva.na_vattr.va_fileid = 2;
1577 			nfsva.na_vattr.va_gen = 1;
1578 			nfsva.na_vattr.va_blocksize = NFS_FABLKSIZE;
1579 			nfsva.na_vattr.va_size = 512 * 1024;
1580 			lease = 60;
1581 		}
1582 		(void) nfscl_loadattrcache(vpp, &nfsva, NULL, NULL, 0, 1);
1583 		if (nmp->nm_minorvers > 0) {
1584 			NFSCL_DEBUG(3, "lease=%d\n", (int)lease);
1585 			NFSLOCKCLSTATE();
1586 			clp->nfsc_renew = NFSCL_RENEW(lease);
1587 			clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew;
1588 			clp->nfsc_clientidrev++;
1589 			if (clp->nfsc_clientidrev == 0)
1590 				clp->nfsc_clientidrev++;
1591 			NFSUNLOCKCLSTATE();
1592 			/*
1593 			 * Mount will succeed, so the renew thread can be
1594 			 * started now.
1595 			 */
1596 			nfscl_start_renewthread(clp);
1597 			nfscl_clientrelease(clp);
1598 		}
1599 		if (argp->flags & NFSMNT_NFSV3)
1600 			ncl_fsinfo(nmp, *vpp, cred, td);
1601 
1602 		/* Mark if the mount point supports NFSv4 ACLs. */
1603 		if ((argp->flags & NFSMNT_NFSV4) != 0 && nfsrv_useacl != 0 &&
1604 		    ret == 0 &&
1605 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL)) {
1606 			MNT_ILOCK(mp);
1607 			mp->mnt_flag |= MNT_NFS4ACLS;
1608 			MNT_IUNLOCK(mp);
1609 		}
1610 
1611 		/*
1612 		 * Lose the lock but keep the ref.
1613 		 */
1614 		NFSVOPUNLOCK(*vpp, 0);
1615 		return (0);
1616 	}
1617 	error = EIO;
1618 
1619 bad:
1620 	if (clp != NULL)
1621 		nfscl_clientrelease(clp);
1622 	newnfs_disconnect(&nmp->nm_sockreq);
1623 	crfree(nmp->nm_sockreq.nr_cred);
1624 	if (nmp->nm_sockreq.nr_auth != NULL)
1625 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1626 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1627 	mtx_destroy(&nmp->nm_mtx);
1628 	if (nmp->nm_clp != NULL) {
1629 		NFSLOCKCLSTATE();
1630 		LIST_REMOVE(nmp->nm_clp, nfsc_list);
1631 		NFSUNLOCKCLSTATE();
1632 		free(nmp->nm_clp, M_NFSCLCLIENT);
1633 	}
1634 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1635 		nfscl_freenfsclds(dsp);
1636 	FREE(nmp, M_NEWNFSMNT);
1637 	FREE(nam, M_SONAME);
1638 	return (error);
1639 }
1640 
1641 /*
1642  * unmount system call
1643  */
1644 static int
nfs_unmount(struct mount * mp,int mntflags)1645 nfs_unmount(struct mount *mp, int mntflags)
1646 {
1647 	struct thread *td;
1648 	struct nfsmount *nmp;
1649 	int error, flags = 0, i, trycnt = 0;
1650 	struct nfsclds *dsp, *tdsp;
1651 
1652 	td = curthread;
1653 
1654 	if (mntflags & MNT_FORCE)
1655 		flags |= FORCECLOSE;
1656 	nmp = VFSTONFS(mp);
1657 	/*
1658 	 * Goes something like this..
1659 	 * - Call vflush() to clear out vnodes for this filesystem
1660 	 * - Close the socket
1661 	 * - Free up the data structures
1662 	 */
1663 	/* In the forced case, cancel any outstanding requests. */
1664 	if (mntflags & MNT_FORCE) {
1665 		error = newnfs_nmcancelreqs(nmp);
1666 		if (error)
1667 			goto out;
1668 		/* For a forced close, get rid of the renew thread now */
1669 		nfscl_umount(nmp, td);
1670 	}
1671 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
1672 	do {
1673 		error = vflush(mp, 1, flags, td);
1674 		if ((mntflags & MNT_FORCE) && error != 0 && ++trycnt < 30)
1675 			(void) nfs_catnap(PSOCK, error, "newndm");
1676 	} while ((mntflags & MNT_FORCE) && error != 0 && trycnt < 30);
1677 	if (error)
1678 		goto out;
1679 
1680 	/*
1681 	 * We are now committed to the unmount.
1682 	 */
1683 	if ((mntflags & MNT_FORCE) == 0)
1684 		nfscl_umount(nmp, td);
1685 	/* Make sure no nfsiods are assigned to this mount. */
1686 	mtx_lock(&ncl_iod_mutex);
1687 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
1688 		if (ncl_iodmount[i] == nmp) {
1689 			ncl_iodwant[i] = NFSIOD_AVAILABLE;
1690 			ncl_iodmount[i] = NULL;
1691 		}
1692 	mtx_unlock(&ncl_iod_mutex);
1693 	newnfs_disconnect(&nmp->nm_sockreq);
1694 	crfree(nmp->nm_sockreq.nr_cred);
1695 	FREE(nmp->nm_nam, M_SONAME);
1696 	if (nmp->nm_sockreq.nr_auth != NULL)
1697 		AUTH_DESTROY(nmp->nm_sockreq.nr_auth);
1698 	mtx_destroy(&nmp->nm_sockreq.nr_mtx);
1699 	mtx_destroy(&nmp->nm_mtx);
1700 	TAILQ_FOREACH_SAFE(dsp, &nmp->nm_sess, nfsclds_list, tdsp)
1701 		nfscl_freenfsclds(dsp);
1702 	FREE(nmp, M_NEWNFSMNT);
1703 out:
1704 	return (error);
1705 }
1706 
1707 /*
1708  * Return root of a filesystem
1709  */
1710 static int
nfs_root(struct mount * mp,int flags,struct vnode ** vpp)1711 nfs_root(struct mount *mp, int flags, struct vnode **vpp)
1712 {
1713 	struct vnode *vp;
1714 	struct nfsmount *nmp;
1715 	struct nfsnode *np;
1716 	int error;
1717 
1718 	nmp = VFSTONFS(mp);
1719 	error = ncl_nget(mp, nmp->nm_fh, nmp->nm_fhsize, &np, flags);
1720 	if (error)
1721 		return error;
1722 	vp = NFSTOV(np);
1723 	/*
1724 	 * Get transfer parameters and attributes for root vnode once.
1725 	 */
1726 	mtx_lock(&nmp->nm_mtx);
1727 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp)) {
1728 		mtx_unlock(&nmp->nm_mtx);
1729 		ncl_fsinfo(nmp, vp, curthread->td_ucred, curthread);
1730 	} else
1731 		mtx_unlock(&nmp->nm_mtx);
1732 	if (vp->v_type == VNON)
1733 	    vp->v_type = VDIR;
1734 	vp->v_vflag |= VV_ROOT;
1735 	*vpp = vp;
1736 	return (0);
1737 }
1738 
1739 /*
1740  * Flush out the buffer cache
1741  */
1742 /* ARGSUSED */
1743 static int
nfs_sync(struct mount * mp,int waitfor)1744 nfs_sync(struct mount *mp, int waitfor)
1745 {
1746 	struct vnode *vp, *mvp;
1747 	struct thread *td;
1748 	int error, allerror = 0;
1749 
1750 	td = curthread;
1751 
1752 	MNT_ILOCK(mp);
1753 	/*
1754 	 * If a forced dismount is in progress, return from here so that
1755 	 * the umount(2) syscall doesn't get stuck in VFS_SYNC() before
1756 	 * calling VFS_UNMOUNT().
1757 	 */
1758 	if ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) {
1759 		MNT_IUNLOCK(mp);
1760 		return (EBADF);
1761 	}
1762 	MNT_IUNLOCK(mp);
1763 
1764 	/*
1765 	 * Force stale buffer cache information to be flushed.
1766 	 */
1767 loop:
1768 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
1769 		/* XXX Racy bv_cnt check. */
1770 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
1771 		    waitfor == MNT_LAZY) {
1772 			VI_UNLOCK(vp);
1773 			continue;
1774 		}
1775 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
1776 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
1777 			goto loop;
1778 		}
1779 		error = VOP_FSYNC(vp, waitfor, td);
1780 		if (error)
1781 			allerror = error;
1782 		NFSVOPUNLOCK(vp, 0);
1783 		vrele(vp);
1784 	}
1785 	return (allerror);
1786 }
1787 
1788 static int
nfs_sysctl(struct mount * mp,fsctlop_t op,struct sysctl_req * req)1789 nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
1790 {
1791 	struct nfsmount *nmp = VFSTONFS(mp);
1792 	struct vfsquery vq;
1793 	int error;
1794 
1795 	bzero(&vq, sizeof(vq));
1796 	switch (op) {
1797 #if 0
1798 	case VFS_CTL_NOLOCKS:
1799 		val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0;
1800  		if (req->oldptr != NULL) {
1801  			error = SYSCTL_OUT(req, &val, sizeof(val));
1802  			if (error)
1803  				return (error);
1804  		}
1805  		if (req->newptr != NULL) {
1806  			error = SYSCTL_IN(req, &val, sizeof(val));
1807  			if (error)
1808  				return (error);
1809 			if (val)
1810 				nmp->nm_flag |= NFSMNT_NOLOCKS;
1811 			else
1812 				nmp->nm_flag &= ~NFSMNT_NOLOCKS;
1813  		}
1814 		break;
1815 #endif
1816 	case VFS_CTL_QUERY:
1817 		mtx_lock(&nmp->nm_mtx);
1818 		if (nmp->nm_state & NFSSTA_TIMEO)
1819 			vq.vq_flags |= VQ_NOTRESP;
1820 		mtx_unlock(&nmp->nm_mtx);
1821 #if 0
1822 		if (!(nmp->nm_flag & NFSMNT_NOLOCKS) &&
1823 		    (nmp->nm_state & NFSSTA_LOCKTIMEO))
1824 			vq.vq_flags |= VQ_NOTRESPLOCK;
1825 #endif
1826 		error = SYSCTL_OUT(req, &vq, sizeof(vq));
1827 		break;
1828  	case VFS_CTL_TIMEO:
1829  		if (req->oldptr != NULL) {
1830  			error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay,
1831  			    sizeof(nmp->nm_tprintf_initial_delay));
1832  			if (error)
1833  				return (error);
1834  		}
1835  		if (req->newptr != NULL) {
1836 			error = vfs_suser(mp, req->td);
1837 			if (error)
1838 				return (error);
1839  			error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay,
1840  			    sizeof(nmp->nm_tprintf_initial_delay));
1841  			if (error)
1842  				return (error);
1843  			if (nmp->nm_tprintf_initial_delay < 0)
1844  				nmp->nm_tprintf_initial_delay = 0;
1845  		}
1846 		break;
1847 	default:
1848 		return (ENOTSUP);
1849 	}
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Purge any RPCs in progress, so that they will all return errors.
1855  * This allows dounmount() to continue as far as VFS_UNMOUNT() for a
1856  * forced dismount.
1857  */
1858 static void
nfs_purge(struct mount * mp)1859 nfs_purge(struct mount *mp)
1860 {
1861 	struct nfsmount *nmp = VFSTONFS(mp);
1862 
1863 	newnfs_nmcancelreqs(nmp);
1864 }
1865 
1866 /*
1867  * Extract the information needed by the nlm from the nfs vnode.
1868  */
1869 static void
nfs_getnlminfo(struct vnode * vp,uint8_t * fhp,size_t * fhlenp,struct sockaddr_storage * sp,int * is_v3p,off_t * sizep,struct timeval * timeop)1870 nfs_getnlminfo(struct vnode *vp, uint8_t *fhp, size_t *fhlenp,
1871     struct sockaddr_storage *sp, int *is_v3p, off_t *sizep,
1872     struct timeval *timeop)
1873 {
1874 	struct nfsmount *nmp;
1875 	struct nfsnode *np = VTONFS(vp);
1876 
1877 	nmp = VFSTONFS(vp->v_mount);
1878 	if (fhlenp != NULL)
1879 		*fhlenp = (size_t)np->n_fhp->nfh_len;
1880 	if (fhp != NULL)
1881 		bcopy(np->n_fhp->nfh_fh, fhp, np->n_fhp->nfh_len);
1882 	if (sp != NULL)
1883 		bcopy(nmp->nm_nam, sp, uimin(nmp->nm_nam->sa_len, sizeof(*sp)));
1884 	if (is_v3p != NULL)
1885 		*is_v3p = NFS_ISV3(vp);
1886 	if (sizep != NULL)
1887 		*sizep = np->n_size;
1888 	if (timeop != NULL) {
1889 		timeop->tv_sec = nmp->nm_timeo / NFS_HZ;
1890 		timeop->tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
1891 	}
1892 }
1893 
1894 /*
1895  * This function prints out an option name, based on the conditional
1896  * argument.
1897  */
nfscl_printopt(struct nfsmount * nmp,int testval,char * opt,char ** buf,size_t * blen)1898 static __inline void nfscl_printopt(struct nfsmount *nmp, int testval,
1899     char *opt, char **buf, size_t *blen)
1900 {
1901 	int len;
1902 
1903 	if (testval != 0 && *blen > strlen(opt)) {
1904 		len = snprintf(*buf, *blen, "%s", opt);
1905 		if (len != strlen(opt))
1906 			printf("EEK!!\n");
1907 		*buf += len;
1908 		*blen -= len;
1909 	}
1910 }
1911 
1912 /*
1913  * This function printf out an options integer value.
1914  */
nfscl_printoptval(struct nfsmount * nmp,int optval,char * opt,char ** buf,size_t * blen)1915 static __inline void nfscl_printoptval(struct nfsmount *nmp, int optval,
1916     char *opt, char **buf, size_t *blen)
1917 {
1918 	int len;
1919 
1920 	if (*blen > strlen(opt) + 1) {
1921 		/* Could result in truncated output string. */
1922 		len = snprintf(*buf, *blen, "%s=%d", opt, optval);
1923 		if (len < *blen) {
1924 			*buf += len;
1925 			*blen -= len;
1926 		}
1927 	}
1928 }
1929 
1930 /*
1931  * Load the option flags and values into the buffer.
1932  */
nfscl_retopts(struct nfsmount * nmp,char * buffer,size_t buflen)1933 void nfscl_retopts(struct nfsmount *nmp, char *buffer, size_t buflen)
1934 {
1935 	char *buf;
1936 	size_t blen;
1937 
1938 	buf = buffer;
1939 	blen = buflen;
1940 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV4) != 0, "nfsv4", &buf,
1941 	    &blen);
1942 	if ((nmp->nm_flag & NFSMNT_NFSV4) != 0) {
1943 		nfscl_printoptval(nmp, nmp->nm_minorvers, ",minorversion", &buf,
1944 		    &blen);
1945 		nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_PNFS) != 0, ",pnfs",
1946 		    &buf, &blen);
1947 	}
1948 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NFSV3) != 0, "nfsv3", &buf,
1949 	    &blen);
1950 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4)) == 0,
1951 	    "nfsv2", &buf, &blen);
1952 	nfscl_printopt(nmp, nmp->nm_sotype == SOCK_STREAM, ",tcp", &buf, &blen);
1953 	nfscl_printopt(nmp, nmp->nm_sotype != SOCK_STREAM, ",udp", &buf, &blen);
1954 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RESVPORT) != 0, ",resvport",
1955 	    &buf, &blen);
1956 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCONN) != 0, ",noconn",
1957 	    &buf, &blen);
1958 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) == 0, ",hard", &buf,
1959 	    &blen);
1960 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_SOFT) != 0, ",soft", &buf,
1961 	    &blen);
1962 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_INT) != 0, ",intr", &buf,
1963 	    &blen);
1964 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) == 0, ",cto", &buf,
1965 	    &blen);
1966 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
1967 	    &blen);
1968 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
1969 	    ",noncontigwr", &buf, &blen);
1970 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1971 	    0, ",lockd", &buf, &blen);
1972 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
1973 	    NFSMNT_NOLOCKD, ",nolockd", &buf, &blen);
1974 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_RDIRPLUS) != 0, ",rdirplus",
1975 	    &buf, &blen);
1976 	nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_KERB) == 0, ",sec=sys",
1977 	    &buf, &blen);
1978 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1979 	    NFSMNT_PRIVACY)) == NFSMNT_KERB, ",sec=krb5", &buf, &blen);
1980 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1981 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_INTEGRITY), ",sec=krb5i",
1982 	    &buf, &blen);
1983 	nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_KERB | NFSMNT_INTEGRITY |
1984 	    NFSMNT_PRIVACY)) == (NFSMNT_KERB | NFSMNT_PRIVACY), ",sec=krb5p",
1985 	    &buf, &blen);
1986 	nfscl_printoptval(nmp, nmp->nm_acdirmin, ",acdirmin", &buf, &blen);
1987 	nfscl_printoptval(nmp, nmp->nm_acdirmax, ",acdirmax", &buf, &blen);
1988 	nfscl_printoptval(nmp, nmp->nm_acregmin, ",acregmin", &buf, &blen);
1989 	nfscl_printoptval(nmp, nmp->nm_acregmax, ",acregmax", &buf, &blen);
1990 	nfscl_printoptval(nmp, nmp->nm_nametimeo, ",nametimeo", &buf, &blen);
1991 	nfscl_printoptval(nmp, nmp->nm_negnametimeo, ",negnametimeo", &buf,
1992 	    &blen);
1993 	nfscl_printoptval(nmp, nmp->nm_rsize, ",rsize", &buf, &blen);
1994 	nfscl_printoptval(nmp, nmp->nm_wsize, ",wsize", &buf, &blen);
1995 	nfscl_printoptval(nmp, nmp->nm_readdirsize, ",readdirsize", &buf,
1996 	    &blen);
1997 	nfscl_printoptval(nmp, nmp->nm_readahead, ",readahead", &buf, &blen);
1998 	nfscl_printoptval(nmp, nmp->nm_wcommitsize, ",wcommitsize", &buf,
1999 	    &blen);
2000 	nfscl_printoptval(nmp, nmp->nm_timeo, ",timeout", &buf, &blen);
2001 	nfscl_printoptval(nmp, nmp->nm_retry, ",retrans", &buf, &blen);
2002 }
2003 
2004