xref: /dragonfly/sys/vfs/mfs/mfs_vfsops.c (revision d50f9ae3)
1 /*
2  * Copyright (c) 1989, 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)mfs_vfsops.c	8.11 (Berkeley) 6/19/95
30  * $FreeBSD: src/sys/ufs/mfs/mfs_vfsops.c,v 1.81.2.3 2001/07/04 17:35:21 tegge Exp $
31  */
32 
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/conf.h>
37 #include <sys/device.h>
38 #include <sys/kernel.h>
39 #include <sys/proc.h>
40 #include <sys/buf.h>
41 #include <sys/mount.h>
42 #include <sys/signalvar.h>
43 #include <sys/signal2.h>
44 #include <sys/spinlock2.h>
45 #include <sys/vnode.h>
46 #include <sys/malloc.h>
47 #include <sys/sysmsg.h>
48 #include <sys/mman.h>
49 #include <sys/linker.h>
50 #include <sys/fcntl.h>
51 #include <sys/nlookup.h>
52 #include <sys/devfs.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pager.h>
58 #include <vm/vnode_pager.h>
59 #include <vm/vm_extern.h>
60 
61 #include <sys/buf2.h>
62 #include <sys/thread2.h>
63 
64 #include <vfs/ufs/quota.h>
65 #include <vfs/ufs/inode.h>
66 #include <vfs/ufs/ufsmount.h>
67 #include <vfs/ufs/ufs_extern.h>
68 #include <vfs/ufs/fs.h>
69 #include <vfs/ufs/ffs_extern.h>
70 
71 #include "mfsnode.h"
72 #include "mfs_extern.h"
73 
74 MALLOC_DEFINE(M_MFSNODE, "MFS node", "MFS vnode private part");
75 
76 static int	mfs_mount (struct mount *mp,
77 			char *path, caddr_t data, struct ucred *td);
78 static int	mfs_start (struct mount *mp, int flags);
79 static int	mfs_statfs (struct mount *mp, struct statfs *sbp,
80 			struct ucred *cred);
81 static int	mfs_init (struct vfsconf *);
82 static void	mfs_doio(struct bio *bio, struct mfsnode *mfsp);
83 
84 d_open_t	mfsopen;
85 d_close_t	mfsclose;
86 d_strategy_t	mfsstrategy;
87 
88 static struct dev_ops mfs_ops = {
89 	{ "MFS", -1, D_DISK | D_NOEMERGPGR },
90 	.d_open =	mfsopen,
91 	.d_close =	mfsclose,
92 	.d_read =	physread,
93 	.d_write =	physwrite,
94 	.d_strategy =	mfsstrategy,
95 };
96 
97 /*
98  * mfs vfs operations.
99  */
100 static struct vfsops mfs_vfsops = {
101 	.vfs_flags =		0,
102 	.vfs_mount =     	mfs_mount,
103 	.vfs_start =    	mfs_start,
104 	.vfs_unmount =   	ffs_unmount,
105 	.vfs_root =     	ufs_root,
106 	.vfs_quotactl =  	ufs_quotactl,
107 	.vfs_statfs =   	mfs_statfs,
108 	.vfs_sync =     	ffs_sync,
109 	.vfs_vget =      	ffs_vget,
110 	.vfs_fhtovp =   	ffs_fhtovp,
111 	.vfs_checkexp =  	ufs_check_export,
112 	.vfs_vptofh =   	ffs_vptofh,
113 	.vfs_init =     	mfs_init
114 };
115 
116 VFS_SET(mfs_vfsops, mfs, 0);
117 MODULE_VERSION(mfs, 1);
118 
119 /*
120  * We allow the underlying MFS block device to be opened and read.
121  */
122 int
123 mfsopen(struct dev_open_args *ap)
124 {
125 	cdev_t dev = ap->a_head.a_dev;
126 
127 #if 0
128 	if (ap->a_oflags & FWRITE)
129 		return(EROFS);
130 #endif
131 	if (dev->si_drv1)
132 		return(0);
133 	return(ENXIO);
134 }
135 
136 int
137 mfsclose(struct dev_close_args *ap)
138 {
139 	cdev_t dev = ap->a_head.a_dev;
140 	struct mfsnode *mfsp;
141 
142 	if ((mfsp = dev->si_drv1) == NULL)
143 		return(0);
144         mfsp->mfs_active = 0;
145         wakeup((caddr_t)mfsp);
146 	return(0);
147 }
148 
149 int
150 mfsstrategy(struct dev_strategy_args *ap)
151 {
152 	cdev_t dev = ap->a_head.a_dev;
153 	struct bio *bio = ap->a_bio;
154 	struct buf *bp = bio->bio_buf;
155 	off_t boff = bio->bio_offset;
156 	off_t eoff = boff + bp->b_bcount;
157 	struct mfsnode *mfsp;
158 
159 	if ((mfsp = dev->si_drv1) == NULL) {
160 		bp->b_error = ENXIO;
161 		goto error;
162 	}
163 	if (boff < 0)
164 		goto bad;
165 	if (eoff > mfsp->mfs_size) {
166 		if (boff > mfsp->mfs_size || (bp->b_flags & B_BNOCLIP))
167 			goto bad;
168 		/*
169 		 * Return EOF by completing the I/O with 0 bytes transfered.
170 		 * Set B_INVAL to indicate that any data in the buffer is not
171 		 * valid.
172 		 */
173 		if (boff == mfsp->mfs_size) {
174 			bp->b_resid = bp->b_bcount;
175 			bp->b_flags |= B_INVAL;
176 			goto done;
177 		}
178 		bp->b_bcount = mfsp->mfs_size - boff;
179 	}
180 
181 	/*
182 	 * Initiate I/O
183 	 */
184 	if (mfsp->mfs_td == curthread) {
185 		mfs_doio(bio, mfsp);
186 	} else {
187 		bioq_insert_tail(&mfsp->bio_queue, bio);
188 		wakeup((caddr_t)mfsp);
189 	}
190 	return(0);
191 
192 	/*
193 	 * Failure conditions on bio
194 	 */
195 bad:
196 	bp->b_error = EINVAL;
197 error:
198 	bp->b_flags |= B_ERROR | B_INVAL;
199 done:
200 	biodone(bio);
201 	return(0);
202 }
203 
204 /*
205  * mfs_mount
206  *
207  * Called when mounting local physical media
208  *
209  * PARAMETERS:
210  *		mountroot
211  *			mp	mount point structure
212  *			path	NULL (flag for root mount!!!)
213  *			data	<unused>
214  *			ndp	<unused>
215  *			p	process (user credentials check [statfs])
216  *
217  *		mount
218  *			mp	mount point structure
219  *			path	path to mount point
220  *			data	pointer to argument struct in user space
221  *			ndp	mount point namei() return (used for
222  *				credentials on reload), reused to look
223  *				up block device.
224  *			p	process (user credentials check)
225  *
226  * RETURNS:	0	Success
227  *		!0	error number (errno.h)
228  *
229  * LOCK STATE:
230  *
231  *		ENTRY
232  *			mount point is locked
233  *		EXIT
234  *			mount point is locked
235  *
236  * NOTES:
237  *		A NULL path can be used for a flag since the mount
238  *		system call will fail with EFAULT in copyinstr in
239  *		namei() if it is a genuine NULL from the user.
240  */
241 /* ARGSUSED */
242 static int
243 mfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
244 {
245 	struct vnode *devvp;
246 	struct mfs_args args;
247 	struct ufsmount *ump;
248 	struct fs *fs;
249 	struct mfsnode *mfsp;
250 	struct nlookupdata nd;
251 	size_t size;
252 	char devname[16];
253 	int flags;
254 	int minnum;
255 	int error;
256 	cdev_t dev;
257 
258 	/*
259 	 * Use NULL path to flag a root mount
260 	 */
261 	if (path == NULL) {
262 		/*
263 		 ***
264 		 * Mounting root file system
265 		 ***
266 		 */
267 
268 		/* you lose */
269 		panic("mfs_mount: mount MFS as root: not configured!");
270 	}
271 
272 	mfsp = NULL;
273 
274 	/*
275 	 ***
276 	 * Mounting non-root file system or updating a file system
277 	 ***
278 	 */
279 
280 	/* copy in user arguments*/
281 	error = copyin(data, (caddr_t)&args, sizeof (struct mfs_args));
282 	if (error)
283 		goto error_1;
284 
285 	/*
286 	 * If updating, check whether changing from read-only to
287 	 * read/write; if there is no device name, that's all we do.
288 	 */
289 	if (mp->mnt_flag & MNT_UPDATE) {
290 		/*
291 		 ********************
292 		 * UPDATE
293 		 ********************
294 		 */
295 		ump = VFSTOUFS(mp);
296 		fs = ump->um_fs;
297 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
298 			flags = WRITECLOSE;
299 			if (mp->mnt_flag & MNT_FORCE)
300 				flags |= FORCECLOSE;
301 			error = ffs_flushfiles(mp, flags);
302 			if (error)
303 				goto error_1;
304 		}
305 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
306 			/* XXX reopen the device vnode read-write */
307 			fs->fs_ronly = 0;
308 		}
309 		/* if not updating name...*/
310 		if (args.fspec == 0) {
311 			/*
312 			 * Process export requests.  Jumping to "success"
313 			 * will return the vfs_export() error code.
314 			 */
315 			error = vfs_export(mp, &ump->um_export, &args.export);
316 			goto success;
317 		}
318 
319 		/* XXX MFS does not support name updating*/
320 		goto success;
321 	}
322 
323 	/*
324 	 * Do the MALLOC before the make_dev since doing so afterward
325 	 * might cause a bogus v_data pointer to get dereferenced
326 	 * elsewhere if MALLOC should block.
327 	 */
328 	mfsp = kmalloc(sizeof *mfsp, M_MFSNODE, M_WAITOK | M_ZERO);
329 
330 	minnum = (int)curproc->p_pid;
331 
332 	dev = make_dev(&mfs_ops, minnum, UID_ROOT, GID_WHEEL, 0600,
333 		       "mfs%d", minnum);
334 	/* It is not clear that these will get initialized otherwise */
335 	dev->si_bsize_phys = DEV_BSIZE;
336 	dev->si_iosize_max = MAXPHYS;
337 	dev->si_drv1 = mfsp;
338 	mfsp->mfs_baseoff = args.base;
339 	mfsp->mfs_size = args.size;
340 	mfsp->mfs_dev = dev;
341 	mfsp->mfs_td = curthread;
342 	mfsp->mfs_active = 1;
343 	bioq_init(&mfsp->bio_queue);
344 
345 	devfs_config();	/* sync devfs work */
346 	ksnprintf(devname, sizeof(devname), "/dev/mfs%d", minnum);
347 	nlookup_init(&nd, devname, UIO_SYSSPACE, 0);
348 	devvp = NULL;
349 	error = nlookup(&nd);
350 	if (error == 0) {
351 		devvp = nd.nl_nch.ncp->nc_vp;
352 		if (devvp == NULL)
353 			error = ENOENT;
354 		error = vget(devvp, LK_SHARED);
355 	}
356 	nlookup_done(&nd);
357 
358 	if (error)
359 		goto error_1;
360 	vn_unlock(devvp);
361 
362 	/*
363 	 * Our 'block' device must be backed by a VM object.  Theoretically
364 	 * we could use the anonymous memory VM object supplied by userland,
365 	 * but it would be somewhat of a complex task to deal with it
366 	 * that way since it would result in I/O requests which supply
367 	 * the VM pages from our own object.
368 	 *
369 	 * vnode_pager_alloc() is typically called when a VM object is
370 	 * being referenced externally.  We have to undo the refs for
371 	 * the self reference between vnode and object.
372 	 */
373 	vnode_pager_setsize(devvp, args.size);
374 
375 	/* Save "mounted from" info for mount point (NULL pad)*/
376 	copyinstr(args.fspec,			/* device name*/
377 		  mp->mnt_stat.f_mntfromname,	/* save area*/
378 		  MNAMELEN - 1,			/* max size*/
379 		  &size);			/* real size*/
380 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
381 	/* vref is eaten by mount? */
382 
383 	error = ffs_mountfs(devvp, mp, M_MFSNODE);
384 	if (error) {
385 		mfsp->mfs_active = 0;
386 		goto error_2;
387 	}
388 
389 	/*
390 	 * Initialize FS stat information in mount struct; uses
391 	 * mp->mnt_stat.f_mntfromname.
392 	 *
393 	 * This code is common to root and non-root mounts
394 	 */
395 	VFS_STATFS(mp, &mp->mnt_stat, cred);
396 
397 	/*
398 	 * Mark VFS_START MPSAFE; this is to avoid accessing
399 	 * per-mount token after VFS_START exits
400 	 */
401 	mp->mnt_kern_flag |= MNTK_ST_MPSAFE;
402 
403 	goto success;
404 
405 error_2:	/* error with devvp held*/
406 	vrele(devvp);
407 
408 error_1:	/* no state to back out*/
409 	if (mfsp) {
410 		if (mfsp->mfs_dev) {
411 			destroy_dev(mfsp->mfs_dev);
412 			mfsp->mfs_dev = NULL;
413 		}
414 		kfree(mfsp, M_MFSNODE);
415 	}
416 
417 success:
418 	return(error);
419 }
420 
421 /*
422  * Used to grab the process and keep it in the kernel to service
423  * memory filesystem I/O requests.
424  *
425  * Loop servicing I/O requests.
426  * Copy the requested data into or out of the memory filesystem
427  * address space.
428  */
429 /* ARGSUSED */
430 static int
431 mfs_start(struct mount *mp, int flags)
432 {
433 	struct vnode *vp = VFSTOUFS(mp)->um_devvp;
434 	struct mfsnode *mfsp = vp->v_rdev->si_drv1;
435 	struct bio *bio;
436 	struct buf *bp;
437 	int gotsig = 0, sig;
438 	thread_t td = curthread;
439 
440 	/*
441 	 * We must prevent the system from trying to swap
442 	 * out or kill ( when swap space is low, see vm/pageout.c ) the
443 	 * process.  A deadlock can occur if the process is swapped out,
444 	 * and the system can loop trying to kill the unkillable ( while
445 	 * references exist ) MFS process when swap space is low.
446 	 */
447 	KKASSERT(curproc);
448 	PHOLD(curproc);
449 
450 	mfsp->mfs_td = td;
451 
452 	while (mfsp->mfs_active) {
453 		crit_enter();
454 
455 		while ((bio = bioq_takefirst(&mfsp->bio_queue)) != NULL) {
456 			crit_exit();
457 			bp = bio->bio_buf;
458 			mfs_doio(bio, mfsp);
459 			wakeup(bp);
460 			crit_enter();
461 		}
462 
463 		crit_exit();
464 
465 		/*
466 		 * If a non-ignored signal is received, try to unmount.
467 		 * If that fails, clear the signal (it has been "processed"),
468 		 * otherwise we will loop here, as tsleep will always return
469 		 * EINTR/ERESTART.
470 		 */
471 		/*
472 		 * Note that dounmount() may fail if work was queued after
473 		 * we slept. We have to jump hoops here to make sure that we
474 		 * process any buffers after the sleep, before we dounmount()
475 		 */
476 		if (gotsig) {
477 			gotsig = 0;
478 			if (dounmount(mp, 0, 0) != 0) {
479 				KKASSERT(td->td_proc);
480 				lwkt_gettoken(&td->td_proc->p_token);
481 				sig = CURSIG(td->td_lwp);
482 				if (sig) {
483 					spin_lock(&td->td_lwp->lwp_spin);
484 					lwp_delsig(td->td_lwp, sig, 1);
485 					spin_unlock(&td->td_lwp->lwp_spin);
486 				}
487 				lwkt_reltoken(&td->td_proc->p_token);
488 			}
489 		}
490 		else if (tsleep((caddr_t)mfsp, PCATCH, "mfsidl", 0))
491 			gotsig++;	/* try to unmount in next pass */
492 	}
493 	PRELE(curproc);
494         if (mfsp->mfs_dev) {
495                 destroy_dev(mfsp->mfs_dev);
496                 mfsp->mfs_dev = NULL;
497         }
498 	kfree(mfsp, M_MFSNODE);
499 	return (EMOUNTEXIT);
500 }
501 
502 /*
503  * Get file system statistics.
504  */
505 static int
506 mfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
507 {
508 	int error;
509 
510 	error = ffs_statfs(mp, sbp, cred);
511 	sbp->f_type = mp->mnt_vfc->vfc_typenum;
512 	return (error);
513 }
514 
515 /*
516  * Memory based filesystem initialization.
517  */
518 static int
519 mfs_init(struct vfsconf *vfsp)
520 {
521 	return (0);
522 }
523 
524 /*
525  * Memory file system I/O.
526  *
527  * Trivial on the HP since buffer has already been mapping into KVA space.
528  *
529  * Read and Write are handled with a simple copyin and copyout.
530  *
531  * We also partially support VOP_FREEBLKS().  We can't implement
532  * completely -- for example, on fragments or inode metadata, but we can
533  * implement it for page-aligned requests.
534  */
535 static void
536 mfs_doio(struct bio *bio, struct mfsnode *mfsp)
537 {
538 	struct buf *bp = bio->bio_buf;
539 	caddr_t base = mfsp->mfs_baseoff + bio->bio_offset;
540 	int bytes;
541 
542 	switch(bp->b_cmd) {
543 	case BUF_CMD_FREEBLKS:
544 		/*
545 		 * Implement FREEBLKS, which allows the filesystem to tell
546 		 * a block device when blocks are no longer needed (like when
547 		 * a file is deleted).  We use the hook to MADV_FREE the VM.
548 		 * This makes an MFS filesystem work as well or better then
549 		 * a sun-style swap-mounted filesystem.
550 		 */
551 		bytes = bp->b_bcount;
552 
553 		if ((vm_offset_t)base & PAGE_MASK) {
554 			int n = PAGE_SIZE - ((vm_offset_t)base & PAGE_MASK);
555 			bytes -= n;
556 			base += n;
557 		}
558                 if (bytes > 0) {
559                         struct madvise_args uap;
560 
561 			bytes &= ~PAGE_MASK;
562 			if (bytes != 0) {
563 				struct sysmsg sysmsg;
564 
565 				bzero(&sysmsg, sizeof(sysmsg));
566 				bzero(&uap, sizeof(uap));
567 				uap.addr  = base;
568 				uap.len   = bytes;
569 				uap.behav = MADV_FREE;
570 				sys_madvise(&sysmsg, &uap);
571 			}
572                 }
573 		bp->b_error = 0;
574 		break;
575 	case BUF_CMD_READ:
576 		/*
577 		 * Read data from our 'memory' disk
578 		 */
579 		bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
580 		break;
581 	case BUF_CMD_WRITE:
582 		/*
583 		 * Write data to our 'memory' disk
584 		 */
585 		bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
586 		break;
587 	default:
588 		panic("mfs: bad b_cmd %d", bp->b_cmd);
589 	}
590 	if (bp->b_error)
591 		bp->b_flags |= B_ERROR;
592 	biodone(bio);
593 }
594