xref: /original-bsd/sys/nfs/nfs_vfsops.c (revision ba762ddc)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)nfs_vfsops.c	7.29 (Berkeley) 04/19/91
11  */
12 
13 #include "param.h"
14 #include "conf.h"
15 #include "ioctl.h"
16 #include "signal.h"
17 #include "proc.h"
18 #include "namei.h"
19 #include "vnode.h"
20 #include "mount.h"
21 #include "buf.h"
22 #include "mbuf.h"
23 #include "socket.h"
24 #include "systm.h"
25 
26 #include "../net/if.h"
27 #include "../net/route.h"
28 #include "../netinet/in.h"
29 
30 #include "nfsv2.h"
31 #include "nfsnode.h"
32 #include "nfsmount.h"
33 #include "nfs.h"
34 #include "xdr_subs.h"
35 #include "nfsm_subs.h"
36 #include "nfsdiskless.h"
37 
38 /*
39  * nfs vfs operations.
40  */
41 struct vfsops nfs_vfsops = {
42 	nfs_mount,
43 	nfs_start,
44 	nfs_unmount,
45 	nfs_root,
46 	nfs_quotactl,
47 	nfs_statfs,
48 	nfs_sync,
49 	nfs_fhtovp,
50 	nfs_vptofh,
51 	nfs_init,
52 };
53 
54 static u_char nfs_mntid;
55 extern u_long nfs_procids[NFS_NPROCS];
56 extern u_long nfs_prog, nfs_vers;
57 struct nfs_diskless nfs_diskless;
58 void nfs_disconnect();
59 
60 #define TRUE	1
61 #define	FALSE	0
62 
63 /*
64  * nfs statfs call
65  */
66 nfs_statfs(mp, sbp, p)
67 	struct mount *mp;
68 	register struct statfs *sbp;
69 	struct proc *p;
70 {
71 	register struct vnode *vp;
72 	register struct nfsv2_statfs *sfp;
73 	register caddr_t cp;
74 	register long t1;
75 	caddr_t bpos, dpos, cp2;
76 	u_long xid;
77 	int error = 0;
78 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
79 	struct nfsmount *nmp;
80 	struct ucred *cred;
81 	struct nfsnode *np;
82 
83 	nmp = VFSTONFS(mp);
84 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
85 		return (error);
86 	vp = NFSTOV(np);
87 	nfsstats.rpccnt[NFSPROC_STATFS]++;
88 	cred = crget();
89 	cred->cr_ngroups = 1;
90 	nfsm_reqhead(nfs_procids[NFSPROC_STATFS], cred, NFSX_FH);
91 	nfsm_fhtom(vp);
92 	nfsm_request(vp, NFSPROC_STATFS, p, 0);
93 	nfsm_disect(sfp, struct nfsv2_statfs *, NFSX_STATFS);
94 	sbp->f_type = MOUNT_NFS;
95 	sbp->f_flags = nmp->nm_flag;
96 	sbp->f_bsize = fxdr_unsigned(long, sfp->sf_tsize);
97 	sbp->f_fsize = fxdr_unsigned(long, sfp->sf_bsize);
98 	sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
99 	sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
100 	sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
101 	sbp->f_files = 0;
102 	sbp->f_ffree = 0;
103 	if (sbp != &mp->mnt_stat) {
104 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
105 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
106 	}
107 	nfsm_reqdone;
108 	nfs_nput(vp);
109 	crfree(cred);
110 	return (error);
111 }
112 
113 /*
114  * Mount a remote root fs via. nfs. This depends on the info in the
115  * nfs_diskless structure that has been filled in properly by some primary
116  * bootstrap.
117  * It goes something like this:
118  * - do enough of "ifconfig" by calling ifioctl() so that the system
119  *   can talk to the server
120  * - If nfs_diskless.mygateway is filled in, use that address as
121  *   a default gateway.
122  *   (This is done the 4.3 way with rtioctl() and should be changed)
123  * - hand craft the swap nfs vnode hanging off a fake mount point
124  * - build the rootfs mount point and call mountnfs() to do the rest.
125  */
126 nfs_mountroot()
127 {
128 	register struct mount *mp;
129 	register struct mbuf *m;
130 	struct socket *so;
131 	struct vnode *vp;
132 	int error;
133 
134 	/*
135 	 * Do enough of ifconfig(8) so that critical net interface can
136 	 * talk to the server.
137 	 */
138 	if (socreate(nfs_diskless.myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
139 		panic("nfs ifconf");
140 	if (ifioctl(so, SIOCAIFADDR, &nfs_diskless.myif))
141 		panic("nfs ifconf2");
142 	soclose(so);
143 
144 	/*
145 	 * If the gateway field is filled in, set it as the default route.
146 	 */
147 #ifdef COMPAT_43
148 	if (nfs_diskless.mygateway.sa_family == AF_INET) {
149 		struct ortentry rt;
150 		struct sockaddr_in *sin;
151 
152 		sin = (struct sockaddr_in *) &rt.rt_dst;
153 		sin->sin_len = sizeof (struct sockaddr_in);
154 		sin->sin_family = AF_INET;
155 		sin->sin_addr.s_addr = 0;	/* default */
156 		bcopy((caddr_t)&nfs_diskless.mygateway, (caddr_t)&rt.rt_gateway,
157 			sizeof (struct sockaddr_in));
158 		rt.rt_flags = (RTF_UP | RTF_GATEWAY);
159 		if (rtioctl(SIOCADDRT, (caddr_t)&rt))
160 			panic("nfs root route");
161 	}
162 #endif	/* COMPAT_43 */
163 
164 	/*
165 	 * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
166 	 * Create a fake mount point just for the swap vnode so that the
167 	 * swap file can be on a different server from the rootfs.
168 	 */
169 	if (swdevt[0].sw_dev == NODEV) {
170 		mp = (struct mount *)malloc((u_long)sizeof(struct mount),
171 			M_MOUNT, M_NOWAIT);
172 		if (mp == NULL)
173 			panic("nfs root mount");
174 		mp->mnt_op = &nfs_vfsops;
175 		mp->mnt_flag = 0;
176 		mp->mnt_exroot = 0;
177 		mp->mnt_mounth = NULLVP;
178 
179 		/*
180 		 * Set up the diskless nfs_args for the swap mount point
181 		 * and then call mountnfs() to mount it.
182 		 * Since the swap file is not the root dir of a file system,
183 		 * hack it to a regular file.
184 		 */
185 		nfs_diskless.swap_args.fh = (nfsv2fh_t *)nfs_diskless.swap_fh;
186 		MGET(m, MT_SONAME, M_DONTWAIT);
187 		if (m == NULL)
188 			panic("nfs root mbuf");
189 		bcopy((caddr_t)&nfs_diskless.swap_saddr, mtod(m, caddr_t),
190 			nfs_diskless.swap_saddr.sa_len);
191 		m->m_len = nfs_diskless.swap_saddr.sa_len;
192 		if (mountnfs(&nfs_diskless.swap_args, mp, m, "/swap",
193 			nfs_diskless.swap_hostnam, &vp))
194 			panic("nfs swap");
195 		vp->v_type = VREG;
196 		vp->v_flag = 0;
197 		swapdev_vp = vp;
198 		VREF(vp);
199 		swdevt[0].sw_vp = vp;
200 		VREF(vp);
201 		argdev_vp = vp;
202 	}
203 
204 	/*
205 	 * Create the rootfs mount point.
206 	 */
207 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
208 		M_MOUNT, M_NOWAIT);
209 	if (mp == NULL)
210 		panic("nfs root mount2");
211 	mp->mnt_op = &nfs_vfsops;
212 	mp->mnt_flag = MNT_RDONLY;
213 	mp->mnt_exroot = 0;
214 	mp->mnt_mounth = NULLVP;
215 
216 	/*
217 	 * Set up the root fs args and call mountnfs() to do the rest.
218 	 */
219 	nfs_diskless.root_args.fh = (nfsv2fh_t *)nfs_diskless.root_fh;
220 	MGET(m, MT_SONAME, M_DONTWAIT);
221 	if (m == NULL)
222 		panic("nfs root mbuf2");
223 	bcopy((caddr_t)&nfs_diskless.root_saddr, mtod(m, caddr_t),
224 		nfs_diskless.root_saddr.sa_len);
225 	m->m_len = nfs_diskless.root_saddr.sa_len;
226 	if (mountnfs(&nfs_diskless.root_args, mp, m, "/",
227 		nfs_diskless.root_hostnam, &vp))
228 		panic("nfs root");
229 	if (vfs_lock(mp))
230 		panic("nfs root2");
231 	rootfs = mp;
232 	mp->mnt_next = mp;
233 	mp->mnt_prev = mp;
234 	mp->mnt_vnodecovered = NULLVP;
235 	vfs_unlock(mp);
236 	rootvp = vp;
237 	inittodr((time_t)0);	/* There is no time in the nfs fsstat so ?? */
238 	return (0);
239 }
240 
241 /*
242  * VFS Operations.
243  *
244  * mount system call
245  * It seems a bit dumb to copyinstr() the host and path here and then
246  * bcopy() them in mountnfs(), but I wanted to detect errors before
247  * doing the sockargs() call because sockargs() allocates an mbuf and
248  * an error after that means that I have to release the mbuf.
249  */
250 /* ARGSUSED */
251 nfs_mount(mp, path, data, ndp, p)
252 	struct mount *mp;
253 	char *path;
254 	caddr_t data;
255 	struct nameidata *ndp;
256 	struct proc *p;
257 {
258 	int error;
259 	struct nfs_args args;
260 	struct mbuf *nam;
261 	struct vnode *vp;
262 	char pth[MNAMELEN], hst[MNAMELEN];
263 	int len;
264 	nfsv2fh_t nfh;
265 
266 	if (mp->mnt_flag & MNT_UPDATE)
267 		return (0);
268 	if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
269 		return (error);
270 	if (error=copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
271 		return (error);
272 	if (error = copyinstr(path, pth, MNAMELEN-1, &len))
273 		return (error);
274 	bzero(&pth[len], MNAMELEN-len);
275 	if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
276 		return (error);
277 	bzero(&hst[len], MNAMELEN-len);
278 	/* sockargs() call must be after above copyin() calls */
279 	if (error = sockargs(&nam, (caddr_t)args.addr,
280 		sizeof (struct sockaddr), MT_SONAME))
281 		return (error);
282 	args.fh = &nfh;
283 	error = mountnfs(&args, mp, nam, pth, hst, &vp);
284 	return (error);
285 }
286 
287 /*
288  * Common code for mount and mountroot
289  */
290 mountnfs(argp, mp, nam, pth, hst, vpp)
291 	register struct nfs_args *argp;
292 	register struct mount *mp;
293 	struct mbuf *nam;
294 	char *pth, *hst;
295 	struct vnode **vpp;
296 {
297 	register struct nfsmount *nmp;
298 	struct proc *p = curproc;		/* XXX */
299 	struct nfsnode *np;
300 	int error;
301 	fsid_t tfsid;
302 
303 	MALLOC(nmp, struct nfsmount *, sizeof *nmp, M_NFSMNT, M_WAITOK);
304 	bzero((caddr_t)nmp, sizeof *nmp);
305 	mp->mnt_data = (qaddr_t)nmp;
306 	/*
307 	 * Generate a unique nfs mount id. The problem is that a dev number
308 	 * is not unique across multiple systems. The techique is as follows:
309 	 * 1) Set to nblkdev,0 which will never be used otherwise
310 	 * 2) Generate a first guess as nblkdev,nfs_mntid where nfs_mntid is
311 	 *	NOT 0
312 	 * 3) Loop searching the mount list for another one with same id
313 	 *	If a match, increment val[0] and try again
314 	 * NB: I increment val[0] { a long } instead of nfs_mntid { a u_char }
315 	 *	so that nfs is not limited to 255 mount points
316 	 *     Incrementing the high order bits does no real harm, since it
317 	 *     simply makes the major dev number tick up. The upper bound is
318 	 *     set to major dev 127 to avoid any sign extention problems
319 	 */
320 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev, 0);
321 	mp->mnt_stat.f_fsid.val[1] = MOUNT_NFS;
322 	if (++nfs_mntid == 0)
323 		++nfs_mntid;
324 	tfsid.val[0] = makedev(nblkdev, nfs_mntid);
325 	tfsid.val[1] = MOUNT_NFS;
326 	while (rootfs && getvfs(&tfsid)) {
327 		tfsid.val[0]++;
328 		nfs_mntid++;
329 	}
330 	if (major(tfsid.val[0]) > 127) {
331 		error = ENOENT;
332 		goto bad;
333 	}
334 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
335 	nmp->nm_mountp = mp;
336 	nmp->nm_flag = argp->flags;
337 	nmp->nm_rto = NFS_TIMEO;
338 	nmp->nm_rtt = -1;
339 	nmp->nm_rttvar = nmp->nm_rto << 1;
340 	nmp->nm_retry = NFS_RETRANS;
341 	nmp->nm_wsize = NFS_WSIZE;
342 	nmp->nm_rsize = NFS_RSIZE;
343 	bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
344 	mp->mnt_stat.f_type = MOUNT_NFS;
345 	bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
346 	bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
347 	nmp->nm_nam = nam;
348 
349 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
350 		nmp->nm_rto = argp->timeo;
351 		/* NFS timeouts are specified in 1/10 sec. */
352 		nmp->nm_rto = (nmp->nm_rto * 10) / NFS_HZ;
353 		if (nmp->nm_rto < NFS_MINTIMEO)
354 			nmp->nm_rto = NFS_MINTIMEO;
355 		else if (nmp->nm_rto > NFS_MAXTIMEO)
356 			nmp->nm_rto = NFS_MAXTIMEO;
357 		nmp->nm_rttvar = nmp->nm_rto << 1;
358 	}
359 
360 	if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
361 		nmp->nm_retry = argp->retrans;
362 		if (nmp->nm_retry > NFS_MAXREXMIT)
363 			nmp->nm_retry = NFS_MAXREXMIT;
364 	}
365 
366 	if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
367 		nmp->nm_wsize = argp->wsize;
368 		/* Round down to multiple of blocksize */
369 		nmp->nm_wsize &= ~0x1ff;
370 		if (nmp->nm_wsize <= 0)
371 			nmp->nm_wsize = 512;
372 		else if (nmp->nm_wsize > NFS_MAXDATA)
373 			nmp->nm_wsize = NFS_MAXDATA;
374 	}
375 	if (nmp->nm_wsize > MAXBSIZE)
376 		nmp->nm_wsize = MAXBSIZE;
377 
378 	if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
379 		nmp->nm_rsize = argp->rsize;
380 		/* Round down to multiple of blocksize */
381 		nmp->nm_rsize &= ~0x1ff;
382 		if (nmp->nm_rsize <= 0)
383 			nmp->nm_rsize = 512;
384 		else if (nmp->nm_rsize > NFS_MAXDATA)
385 			nmp->nm_rsize = NFS_MAXDATA;
386 	}
387 	if (nmp->nm_rsize > MAXBSIZE)
388 		nmp->nm_rsize = MAXBSIZE;
389 	/* Set up the sockets and per-host congestion */
390 	nmp->nm_sotype = argp->sotype;
391 	nmp->nm_soproto = argp->proto;
392 	if (error = nfs_connect(nmp))
393 		goto bad;
394 
395 	if (error = nfs_statfs(mp, &mp->mnt_stat, p))
396 		goto bad;
397 	/*
398 	 * A reference count is needed on the nfsnode representing the
399 	 * remote root.  If this object is not persistent, then backward
400 	 * traversals of the mount point (i.e. "..") will not work if
401 	 * the nfsnode gets flushed out of the cache. Ufs does not have
402 	 * this problem, because one can identify root inodes by their
403 	 * number == ROOTINO (2).
404 	 */
405 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
406 		goto bad;
407 	/*
408 	 * Unlock it, but keep the reference count.
409 	 */
410 	nfs_unlock(NFSTOV(np));
411 	*vpp = NFSTOV(np);
412 
413 	return (0);
414 bad:
415 	nfs_disconnect(nmp);
416 	FREE(nmp, M_NFSMNT);
417 	m_freem(nam);
418 	return (error);
419 }
420 
421 /*
422  * unmount system call
423  */
424 nfs_unmount(mp, mntflags, p)
425 	struct mount *mp;
426 	int mntflags;
427 	struct proc *p;
428 {
429 	register struct nfsmount *nmp;
430 	struct nfsnode *np;
431 	struct vnode *vp;
432 	int error, flags = 0;
433 	extern int doforce;
434 
435 	if (mntflags & MNT_FORCE) {
436 		if (!doforce || mp == rootfs)
437 			return (EINVAL);
438 		flags |= FORCECLOSE;
439 	}
440 	nmp = VFSTONFS(mp);
441 	/*
442 	 * Clear out the buffer cache
443 	 */
444 	mntflushbuf(mp, 0);
445 	if (mntinvalbuf(mp))
446 		return (EBUSY);
447 	/*
448 	 * Goes something like this..
449 	 * - Check for activity on the root vnode (other than ourselves).
450 	 * - Call vflush() to clear out vnodes for this file system,
451 	 *   except for the root vnode.
452 	 * - Decrement reference on the vnode representing remote root.
453 	 * - Close the socket
454 	 * - Free up the data structures
455 	 */
456 	/*
457 	 * We need to decrement the ref. count on the nfsnode representing
458 	 * the remote root.  See comment in mountnfs().  The VFS unmount()
459 	 * has done vput on this vnode, otherwise we would get deadlock!
460 	 */
461 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
462 		return(error);
463 	vp = NFSTOV(np);
464 	if (vp->v_usecount > 2) {
465 		vput(vp);
466 		return (EBUSY);
467 	}
468 	if (error = vflush(mp, vp, flags)) {
469 		vput(vp);
470 		return (error);
471 	}
472 	/*
473 	 * Get rid of two reference counts, and unlock it on the second.
474 	 */
475 	vrele(vp);
476 	vput(vp);
477 	nfs_disconnect(nmp);
478 	m_freem(nmp->nm_nam);
479 	free((caddr_t)nmp, M_NFSMNT);
480 	return (0);
481 }
482 
483 /*
484  * Return root of a filesystem
485  */
486 nfs_root(mp, vpp)
487 	struct mount *mp;
488 	struct vnode **vpp;
489 {
490 	register struct vnode *vp;
491 	struct nfsmount *nmp;
492 	struct nfsnode *np;
493 	int error;
494 
495 	nmp = VFSTONFS(mp);
496 	if (error = nfs_nget(mp, &nmp->nm_fh, &np))
497 		return (error);
498 	vp = NFSTOV(np);
499 	vp->v_type = VDIR;
500 	vp->v_flag = VROOT;
501 	*vpp = vp;
502 	return (0);
503 }
504 
505 extern int syncprt;
506 
507 /*
508  * Flush out the buffer cache
509  */
510 /* ARGSUSED */
511 nfs_sync(mp, waitfor)
512 	struct mount *mp;
513 	int waitfor;
514 {
515 	if (syncprt)
516 		bufstats();
517 	/*
518 	 * Force stale buffer cache information to be flushed.
519 	 */
520 	mntflushbuf(mp, waitfor == MNT_WAIT ? B_SYNC : 0);
521 	return (0);
522 }
523 
524 /*
525  * At this point, this should never happen
526  */
527 /* ARGSUSED */
528 nfs_fhtovp(mp, fhp, vpp)
529 	struct mount *mp;
530 	struct fid *fhp;
531 	struct vnode **vpp;
532 {
533 
534 	return (EINVAL);
535 }
536 
537 /*
538  * Vnode pointer to File handle, should never happen either
539  */
540 /* ARGSUSED */
541 nfs_vptofh(vp, fhp)
542 	struct vnode *vp;
543 	struct fid *fhp;
544 {
545 
546 	return (EINVAL);
547 }
548 
549 /*
550  * Vfs start routine, a no-op.
551  */
552 /* ARGSUSED */
553 nfs_start(mp, flags, p)
554 	struct mount *mp;
555 	int flags;
556 	struct proc *p;
557 {
558 
559 	return (0);
560 }
561 
562 /*
563  * Do operations associated with quotas, not supported
564  */
565 nfs_quotactl(mp, cmd, uid, arg, p)
566 	struct mount *mp;
567 	int cmd;
568 	uid_t uid;
569 	caddr_t arg;
570 	struct proc *p;
571 {
572 #ifdef lint
573 	mp = mp; cmd = cmd; uid = uid; arg = arg;
574 #endif /* lint */
575 	return (EOPNOTSUPP);
576 }
577