xref: /original-bsd/sys/nfs/nfs_vfsops.c (revision 460516e7)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_vfsops.c	7.31 (Berkeley) 05/06/91
11  */
12 
13 #include "param.h"
14 #include "conf.h"
15 #include "ioctl.h"
16 #include "signal.h"
17 #include "proc.h"
18 #include "namei.h"
19 #include "vnode.h"
20 #include "mount.h"
21 #include "buf.h"
22 #include "mbuf.h"
23 #include "socket.h"
24 #include "systm.h"
25 
26 #include "../net/if.h"
27 #include "../net/route.h"
28 #include "../netinet/in.h"
29 
30 #include "nfsv2.h"
31 #include "nfsnode.h"
32 #include "nfsmount.h"
33 #include "nfs.h"
34 #include "xdr_subs.h"
35 #include "nfsm_subs.h"
36 #include "nfsdiskless.h"
37 
38 /*
39  * nfs vfs operations.
40  */
41 struct vfsops nfs_vfsops = {
42 	nfs_mount,
43 	nfs_start,
44 	nfs_unmount,
45 	nfs_root,
46 	nfs_quotactl,
47 	nfs_statfs,
48 	nfs_sync,
49 	nfs_fhtovp,
50 	nfs_vptofh,
51 	nfs_init,
52 };
53 
54 static u_char nfs_mntid;
55 extern u_long nfs_procids[NFS_NPROCS];
56 extern u_long nfs_prog, nfs_vers;
57 struct nfs_diskless nfs_diskless;
58 void nfs_disconnect();
59 
60 #define TRUE	1
61 #define	FALSE	0
62 
63 /*
64  * nfs statfs call
65  */
66 nfs_statfs(mp, sbp, p)
67 	struct mount *mp;
68 	register struct statfs *sbp;
69 	struct proc *p;
70 {
71 	register struct vnode *vp;
72 	register struct nfsv2_statfs *sfp;
73 	register caddr_t cp;
74 	register long t1;
75 	caddr_t bpos, dpos, cp2;
76 	u_long xid;
77 	int error = 0;
78 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
79 	struct nfsmount *nmp;
80 	struct ucred *cred;
81 	struct nfsnode *np;
82 
83 	nmp = VFSTONFS(mp);
84 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
85 		return (error);
86 	vp = NFSTOV(np);
87 	nfsstats.rpccnt[NFSPROC_STATFS]++;
88 	cred = crget();
89 	cred->cr_ngroups = 1;
90 	nfsm_reqhead(nfs_procids[NFSPROC_STATFS], cred, NFSX_FH);
91 	nfsm_fhtom(vp);
92 	nfsm_request(vp, NFSPROC_STATFS, p, 0);
93 	nfsm_disect(sfp, struct nfsv2_statfs *, NFSX_STATFS);
94 	sbp->f_type = MOUNT_NFS;
95 	sbp->f_flags = nmp->nm_flag;
96 	sbp->f_bsize = fxdr_unsigned(long, sfp->sf_tsize);
97 	sbp->f_fsize = fxdr_unsigned(long, sfp->sf_bsize);
98 	sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
99 	sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
100 	sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
101 	sbp->f_files = 0;
102 	sbp->f_ffree = 0;
103 	if (sbp != &mp->mnt_stat) {
104 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
105 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
106 	}
107 	nfsm_reqdone;
108 	nfs_nput(vp);
109 	crfree(cred);
110 	return (error);
111 }
112 
113 /*
114  * Mount a remote root fs via. nfs. This depends on the info in the
115  * nfs_diskless structure that has been filled in properly by some primary
116  * bootstrap.
117  * It goes something like this:
118  * - do enough of "ifconfig" by calling ifioctl() so that the system
119  *   can talk to the server
120  * - If nfs_diskless.mygateway is filled in, use that address as
121  *   a default gateway.
122  *   (This is done the 4.3 way with rtioctl() and should be changed)
123  * - hand craft the swap nfs vnode hanging off a fake mount point
124  * - build the rootfs mount point and call mountnfs() to do the rest.
125  */
126 nfs_mountroot()
127 {
128 	register struct mount *mp;
129 	register struct mbuf *m;
130 	struct socket *so;
131 	struct vnode *vp;
132 	int error;
133 
134 	/*
135 	 * Do enough of ifconfig(8) so that critical net interface can
136 	 * talk to the server.
137 	 */
138 	if (socreate(nfs_diskless.myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
139 		panic("nfs ifconf");
140 	if (ifioctl(so, SIOCAIFADDR, &nfs_diskless.myif))
141 		panic("nfs ifconf2");
142 	soclose(so);
143 
144 	/*
145 	 * If the gateway field is filled in, set it as the default route.
146 	 */
147 #ifdef COMPAT_43
148 	if (nfs_diskless.mygateway.sa_family == AF_INET) {
149 		struct ortentry rt;
150 		struct sockaddr_in *sin;
151 
152 		sin = (struct sockaddr_in *) &rt.rt_dst;
153 		sin->sin_len = sizeof (struct sockaddr_in);
154 		sin->sin_family = AF_INET;
155 		sin->sin_addr.s_addr = 0;	/* default */
156 		bcopy((caddr_t)&nfs_diskless.mygateway, (caddr_t)&rt.rt_gateway,
157 			sizeof (struct sockaddr_in));
158 		rt.rt_flags = (RTF_UP | RTF_GATEWAY);
159 		if (rtioctl(SIOCADDRT, (caddr_t)&rt))
160 			panic("nfs root route");
161 	}
162 #endif	/* COMPAT_43 */
163 
164 	/*
165 	 * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
166 	 * Create a fake mount point just for the swap vnode so that the
167 	 * swap file can be on a different server from the rootfs.
168 	 */
169 	if (swdevt[0].sw_dev == NODEV) {
170 		mp = (struct mount *)malloc((u_long)sizeof(struct mount),
171 			M_MOUNT, M_NOWAIT);
172 		if (mp == NULL)
173 			panic("nfs root mount");
174 		mp->mnt_op = &nfs_vfsops;
175 		mp->mnt_flag = 0;
176 		mp->mnt_exroot = 0;
177 		mp->mnt_mounth = NULLVP;
178 
179 		/*
180 		 * Set up the diskless nfs_args for the swap mount point
181 		 * and then call mountnfs() to mount it.
182 		 * Since the swap file is not the root dir of a file system,
183 		 * hack it to a regular file.
184 		 */
185 		nfs_diskless.swap_args.fh = (nfsv2fh_t *)nfs_diskless.swap_fh;
186 		MGET(m, MT_SONAME, M_DONTWAIT);
187 		if (m == NULL)
188 			panic("nfs root mbuf");
189 		bcopy((caddr_t)&nfs_diskless.swap_saddr, mtod(m, caddr_t),
190 			nfs_diskless.swap_saddr.sa_len);
191 		m->m_len = nfs_diskless.swap_saddr.sa_len;
192 		if (mountnfs(&nfs_diskless.swap_args, mp, m, "/swap",
193 			nfs_diskless.swap_hostnam, &vp))
194 			panic("nfs swap");
195 		vp->v_type = VREG;
196 		vp->v_flag = 0;
197 		swapdev_vp = vp;
198 		VREF(vp);
199 		swdevt[0].sw_vp = vp;
200 	}
201 
202 	/*
203 	 * Create the rootfs mount point.
204 	 */
205 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
206 		M_MOUNT, M_NOWAIT);
207 	if (mp == NULL)
208 		panic("nfs root mount2");
209 	mp->mnt_op = &nfs_vfsops;
210 	mp->mnt_flag = MNT_RDONLY;
211 	mp->mnt_exroot = 0;
212 	mp->mnt_mounth = NULLVP;
213 
214 	/*
215 	 * Set up the root fs args and call mountnfs() to do the rest.
216 	 */
217 	nfs_diskless.root_args.fh = (nfsv2fh_t *)nfs_diskless.root_fh;
218 	MGET(m, MT_SONAME, M_DONTWAIT);
219 	if (m == NULL)
220 		panic("nfs root mbuf2");
221 	bcopy((caddr_t)&nfs_diskless.root_saddr, mtod(m, caddr_t),
222 		nfs_diskless.root_saddr.sa_len);
223 	m->m_len = nfs_diskless.root_saddr.sa_len;
224 	if (mountnfs(&nfs_diskless.root_args, mp, m, "/",
225 		nfs_diskless.root_hostnam, &vp))
226 		panic("nfs root");
227 	if (vfs_lock(mp))
228 		panic("nfs root2");
229 	rootfs = mp;
230 	mp->mnt_next = mp;
231 	mp->mnt_prev = mp;
232 	mp->mnt_vnodecovered = NULLVP;
233 	vfs_unlock(mp);
234 	rootvp = vp;
235 	inittodr((time_t)0);	/* There is no time in the nfs fsstat so ?? */
236 	return (0);
237 }
238 
239 /*
240  * VFS Operations.
241  *
242  * mount system call
243  * It seems a bit dumb to copyinstr() the host and path here and then
244  * bcopy() them in mountnfs(), but I wanted to detect errors before
245  * doing the sockargs() call because sockargs() allocates an mbuf and
246  * an error after that means that I have to release the mbuf.
247  */
248 /* ARGSUSED */
249 nfs_mount(mp, path, data, ndp, p)
250 	struct mount *mp;
251 	char *path;
252 	caddr_t data;
253 	struct nameidata *ndp;
254 	struct proc *p;
255 {
256 	int error;
257 	struct nfs_args args;
258 	struct mbuf *nam;
259 	struct vnode *vp;
260 	char pth[MNAMELEN], hst[MNAMELEN];
261 	u_int len;
262 	nfsv2fh_t nfh;
263 
264 	if (mp->mnt_flag & MNT_UPDATE)
265 		return (0);
266 	if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
267 		return (error);
268 	if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
269 		return (error);
270 	if (error = copyinstr(path, pth, MNAMELEN-1, &len))
271 		return (error);
272 	bzero(&pth[len], MNAMELEN - len);
273 	if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
274 		return (error);
275 	bzero(&hst[len], MNAMELEN - len);
276 	/* sockargs() call must be after above copyin() calls */
277 	if (error = sockargs(&nam, (caddr_t)args.addr,
278 		sizeof (struct sockaddr), MT_SONAME))
279 		return (error);
280 	args.fh = &nfh;
281 	error = mountnfs(&args, mp, nam, pth, hst, &vp);
282 	return (error);
283 }
284 
285 /*
286  * Common code for mount and mountroot
287  */
288 mountnfs(argp, mp, nam, pth, hst, vpp)
289 	register struct nfs_args *argp;
290 	register struct mount *mp;
291 	struct mbuf *nam;
292 	char *pth, *hst;
293 	struct vnode **vpp;
294 {
295 	register struct nfsmount *nmp;
296 	struct proc *p = curproc;		/* XXX */
297 	struct nfsnode *np;
298 	int error;
299 	fsid_t tfsid;
300 
301 	MALLOC(nmp, struct nfsmount *, sizeof *nmp, M_NFSMNT, M_WAITOK);
302 	bzero((caddr_t)nmp, sizeof *nmp);
303 	mp->mnt_data = (qaddr_t)nmp;
304 	/*
305 	 * Generate a unique nfs mount id. The problem is that a dev number
306 	 * is not unique across multiple systems. The techique is as follows:
307 	 * 1) Set to nblkdev,0 which will never be used otherwise
308 	 * 2) Generate a first guess as nblkdev,nfs_mntid where nfs_mntid is
309 	 *	NOT 0
310 	 * 3) Loop searching the mount list for another one with same id
311 	 *	If a match, increment val[0] and try again
312 	 * NB: I increment val[0] { a long } instead of nfs_mntid { a u_char }
313 	 *	so that nfs is not limited to 255 mount points
314 	 *     Incrementing the high order bits does no real harm, since it
315 	 *     simply makes the major dev number tick up. The upper bound is
316 	 *     set to major dev 127 to avoid any sign extention problems
317 	 */
318 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev, 0);
319 	mp->mnt_stat.f_fsid.val[1] = MOUNT_NFS;
320 	if (++nfs_mntid == 0)
321 		++nfs_mntid;
322 	tfsid.val[0] = makedev(nblkdev, nfs_mntid);
323 	tfsid.val[1] = MOUNT_NFS;
324 	while (rootfs && getvfs(&tfsid)) {
325 		tfsid.val[0]++;
326 		nfs_mntid++;
327 	}
328 	if (major(tfsid.val[0]) > 127) {
329 		error = ENOENT;
330 		goto bad;
331 	}
332 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
333 	nmp->nm_mountp = mp;
334 	nmp->nm_flag = argp->flags;
335 	nmp->nm_rto = NFS_TIMEO;
336 	nmp->nm_rtt = -1;
337 	nmp->nm_rttvar = nmp->nm_rto << 1;
338 	nmp->nm_retry = NFS_RETRANS;
339 	nmp->nm_wsize = NFS_WSIZE;
340 	nmp->nm_rsize = NFS_RSIZE;
341 	bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
342 	mp->mnt_stat.f_type = MOUNT_NFS;
343 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
344 	bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
345 	nmp->nm_nam = nam;
346 
347 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
348 		nmp->nm_rto = argp->timeo;
349 		/* NFS timeouts are specified in 1/10 sec. */
350 		nmp->nm_rto = (nmp->nm_rto * 10) / NFS_HZ;
351 		if (nmp->nm_rto < NFS_MINTIMEO)
352 			nmp->nm_rto = NFS_MINTIMEO;
353 		else if (nmp->nm_rto > NFS_MAXTIMEO)
354 			nmp->nm_rto = NFS_MAXTIMEO;
355 		nmp->nm_rttvar = nmp->nm_rto << 1;
356 	}
357 
358 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
359 		nmp->nm_retry = argp->retrans;
360 		if (nmp->nm_retry > NFS_MAXREXMIT)
361 			nmp->nm_retry = NFS_MAXREXMIT;
362 	}
363 
364 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
365 		nmp->nm_wsize = argp->wsize;
366 		/* Round down to multiple of blocksize */
367 		nmp->nm_wsize &= ~0x1ff;
368 		if (nmp->nm_wsize <= 0)
369 			nmp->nm_wsize = 512;
370 		else if (nmp->nm_wsize > NFS_MAXDATA)
371 			nmp->nm_wsize = NFS_MAXDATA;
372 	}
373 	if (nmp->nm_wsize > MAXBSIZE)
374 		nmp->nm_wsize = MAXBSIZE;
375 
376 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
377 		nmp->nm_rsize = argp->rsize;
378 		/* Round down to multiple of blocksize */
379 		nmp->nm_rsize &= ~0x1ff;
380 		if (nmp->nm_rsize <= 0)
381 			nmp->nm_rsize = 512;
382 		else if (nmp->nm_rsize > NFS_MAXDATA)
383 			nmp->nm_rsize = NFS_MAXDATA;
384 	}
385 	if (nmp->nm_rsize > MAXBSIZE)
386 		nmp->nm_rsize = MAXBSIZE;
387 	/* Set up the sockets and per-host congestion */
388 	nmp->nm_sotype = argp->sotype;
389 	nmp->nm_soproto = argp->proto;
390 	if (error = nfs_connect(nmp))
391 		goto bad;
392 
393 	if (error = nfs_statfs(mp, &mp->mnt_stat, p))
394 		goto bad;
395 	/*
396 	 * A reference count is needed on the nfsnode representing the
397 	 * remote root.  If this object is not persistent, then backward
398 	 * traversals of the mount point (i.e. "..") will not work if
399 	 * the nfsnode gets flushed out of the cache. Ufs does not have
400 	 * this problem, because one can identify root inodes by their
401 	 * number == ROOTINO (2).
402 	 */
403 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
404 		goto bad;
405 	/*
406 	 * Unlock it, but keep the reference count.
407 	 */
408 	nfs_unlock(NFSTOV(np));
409 	*vpp = NFSTOV(np);
410 
411 	return (0);
412 bad:
413 	nfs_disconnect(nmp);
414 	FREE(nmp, M_NFSMNT);
415 	m_freem(nam);
416 	return (error);
417 }
418 
419 /*
420  * unmount system call
421  */
422 nfs_unmount(mp, mntflags, p)
423 	struct mount *mp;
424 	int mntflags;
425 	struct proc *p;
426 {
427 	register struct nfsmount *nmp;
428 	struct nfsnode *np;
429 	struct vnode *vp;
430 	int error, flags = 0;
431 	extern int doforce;
432 
433 	if (mntflags & MNT_FORCE) {
434 		if (!doforce || mp == rootfs)
435 			return (EINVAL);
436 		flags |= FORCECLOSE;
437 	}
438 	nmp = VFSTONFS(mp);
439 	/*
440 	 * Clear out the buffer cache
441 	 */
442 	mntflushbuf(mp, 0);
443 	if (mntinvalbuf(mp))
444 		return (EBUSY);
445 	/*
446 	 * Goes something like this..
447 	 * - Check for activity on the root vnode (other than ourselves).
448 	 * - Call vflush() to clear out vnodes for this file system,
449 	 *   except for the root vnode.
450 	 * - Decrement reference on the vnode representing remote root.
451 	 * - Close the socket
452 	 * - Free up the data structures
453 	 */
454 	/*
455 	 * We need to decrement the ref. count on the nfsnode representing
456 	 * the remote root.  See comment in mountnfs().  The VFS unmount()
457 	 * has done vput on this vnode, otherwise we would get deadlock!
458 	 */
459 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
460 		return(error);
461 	vp = NFSTOV(np);
462 	if (vp->v_usecount > 2) {
463 		vput(vp);
464 		return (EBUSY);
465 	}
466 	if (error = vflush(mp, vp, flags)) {
467 		vput(vp);
468 		return (error);
469 	}
470 	/*
471 	 * Get rid of two reference counts, and unlock it on the second.
472 	 */
473 	vrele(vp);
474 	vput(vp);
475 	nfs_disconnect(nmp);
476 	m_freem(nmp->nm_nam);
477 	free((caddr_t)nmp, M_NFSMNT);
478 	return (0);
479 }
480 
481 /*
482  * Return root of a filesystem
483  */
484 nfs_root(mp, vpp)
485 	struct mount *mp;
486 	struct vnode **vpp;
487 {
488 	register struct vnode *vp;
489 	struct nfsmount *nmp;
490 	struct nfsnode *np;
491 	int error;
492 
493 	nmp = VFSTONFS(mp);
494 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
495 		return (error);
496 	vp = NFSTOV(np);
497 	vp->v_type = VDIR;
498 	vp->v_flag = VROOT;
499 	*vpp = vp;
500 	return (0);
501 }
502 
503 extern int syncprt;
504 
505 /*
506  * Flush out the buffer cache
507  */
508 /* ARGSUSED */
509 nfs_sync(mp, waitfor)
510 	struct mount *mp;
511 	int waitfor;
512 {
513 	if (syncprt)
514 		bufstats();
515 	/*
516 	 * Force stale buffer cache information to be flushed.
517 	 */
518 	mntflushbuf(mp, waitfor == MNT_WAIT ? B_SYNC : 0);
519 	return (0);
520 }
521 
522 /*
523  * At this point, this should never happen
524  */
525 /* ARGSUSED */
526 nfs_fhtovp(mp, fhp, vpp)
527 	struct mount *mp;
528 	struct fid *fhp;
529 	struct vnode **vpp;
530 {
531 
532 	return (EINVAL);
533 }
534 
535 /*
536  * Vnode pointer to File handle, should never happen either
537  */
538 /* ARGSUSED */
539 nfs_vptofh(vp, fhp)
540 	struct vnode *vp;
541 	struct fid *fhp;
542 {
543 
544 	return (EINVAL);
545 }
546 
547 /*
548  * Vfs start routine, a no-op.
549  */
550 /* ARGSUSED */
551 nfs_start(mp, flags, p)
552 	struct mount *mp;
553 	int flags;
554 	struct proc *p;
555 {
556 
557 	return (0);
558 }
559 
560 /*
561  * Do operations associated with quotas, not supported
562  */
563 nfs_quotactl(mp, cmd, uid, arg, p)
564 	struct mount *mp;
565 	int cmd;
566 	uid_t uid;
567 	caddr_t arg;
568 	struct proc *p;
569 {
570 #ifdef lint
571 	mp = mp; cmd = cmd; uid = uid; arg = arg;
572 #endif /* lint */
573 	return (EOPNOTSUPP);
574 }
575