xref: /netbsd/sys/ufs/ext2fs/ext2fs_vfsops.c (revision bf9ec67e)
1 /*	$NetBSD: ext2fs_vfsops.c,v 1.49 2002/03/08 20:48:45 thorpej Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Manuel Bouyer.
5  * Copyright (c) 1989, 1991, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)ffs_vfsops.c	8.14 (Berkeley) 11/28/94
37  * Modified for ext2fs by Manuel Bouyer.
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.49 2002/03/08 20:48:45 thorpej Exp $");
42 
43 #if defined(_KERNEL_OPT)
44 #include "opt_compat_netbsd.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/namei.h>
50 #include <sys/proc.h>
51 #include <sys/kernel.h>
52 #include <sys/vnode.h>
53 #include <sys/socket.h>
54 #include <sys/mount.h>
55 #include <sys/buf.h>
56 #include <sys/device.h>
57 #include <sys/mbuf.h>
58 #include <sys/file.h>
59 #include <sys/disklabel.h>
60 #include <sys/ioctl.h>
61 #include <sys/errno.h>
62 #include <sys/malloc.h>
63 #include <sys/pool.h>
64 #include <sys/lock.h>
65 
66 #include <miscfs/specfs/specdev.h>
67 
68 #include <ufs/ufs/quota.h>
69 #include <ufs/ufs/ufsmount.h>
70 #include <ufs/ufs/inode.h>
71 #include <ufs/ufs/dir.h>
72 #include <ufs/ufs/ufs_extern.h>
73 
74 #include <ufs/ext2fs/ext2fs.h>
75 #include <ufs/ext2fs/ext2fs_extern.h>
76 
77 extern struct lock ufs_hashlock;
78 
79 int ext2fs_sbupdate __P((struct ufsmount *, int));
80 static int ext2fs_checksb __P((struct ext2fs *, int));
81 
82 extern const struct vnodeopv_desc ext2fs_vnodeop_opv_desc;
83 extern const struct vnodeopv_desc ext2fs_specop_opv_desc;
84 extern const struct vnodeopv_desc ext2fs_fifoop_opv_desc;
85 
86 const struct vnodeopv_desc * const ext2fs_vnodeopv_descs[] = {
87 	&ext2fs_vnodeop_opv_desc,
88 	&ext2fs_specop_opv_desc,
89 	&ext2fs_fifoop_opv_desc,
90 	NULL,
91 };
92 
93 struct vfsops ext2fs_vfsops = {
94 	MOUNT_EXT2FS,
95 	ext2fs_mount,
96 	ufs_start,
97 	ext2fs_unmount,
98 	ufs_root,
99 	ufs_quotactl,
100 	ext2fs_statfs,
101 	ext2fs_sync,
102 	ext2fs_vget,
103 	ext2fs_fhtovp,
104 	ext2fs_vptofh,
105 	ext2fs_init,
106 	ext2fs_reinit,
107 	ext2fs_done,
108 	ext2fs_sysctl,
109 	ext2fs_mountroot,
110 	ufs_check_export,
111 	ext2fs_vnodeopv_descs,
112 };
113 
114 struct genfs_ops ext2fs_genfsops = {
115 	genfs_size,
116 	ext2fs_gop_alloc,
117 	genfs_gop_write,
118 };
119 
120 struct pool ext2fs_inode_pool;
121 
122 extern u_long ext2gennumber;
123 
124 void
125 ext2fs_init()
126 {
127 	ufs_init();
128 
129 	/*
130 	 * XXX Same structure as FFS inodes?  Should we share a common pool?
131 	 */
132 	pool_init(&ext2fs_inode_pool, sizeof(struct inode), 0, 0, 0,
133 	    "ext2fsinopl", &pool_allocator_nointr);
134 }
135 
136 void
137 ext2fs_reinit()
138 {
139 	ufs_reinit();
140 }
141 
142 void
143 ext2fs_done()
144 {
145 	ufs_done();
146 	pool_destroy(&ext2fs_inode_pool);
147 }
148 
149 /*
150  * Called by main() when ext2fs is going to be mounted as root.
151  *
152  * Name is updated by mount(8) after booting.
153  */
154 #define ROOTNAME	"root_device"
155 
156 int
157 ext2fs_mountroot()
158 {
159 	extern struct vnode *rootvp;
160 	struct m_ext2fs *fs;
161 	struct mount *mp;
162 	struct proc *p = curproc;	/* XXX */
163 	struct ufsmount *ump;
164 	int error;
165 
166 	if (root_device->dv_class != DV_DISK)
167 		return (ENODEV);
168 
169 	/*
170 	 * Get vnodes for rootdev.
171 	 */
172 	if (bdevvp(rootdev, &rootvp))
173 		panic("ext2fs_mountroot: can't setup bdevvp's");
174 
175 	if ((error = vfs_rootmountalloc(MOUNT_EXT2FS, "root_device", &mp))) {
176 		vrele(rootvp);
177 		return (error);
178 	}
179 
180 	if ((error = ext2fs_mountfs(rootvp, mp, p)) != 0) {
181 		mp->mnt_op->vfs_refcount--;
182 		vfs_unbusy(mp);
183 		free(mp, M_MOUNT);
184 		vrele(rootvp);
185 		return (error);
186 	}
187 	simple_lock(&mountlist_slock);
188 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
189 	simple_unlock(&mountlist_slock);
190 	ump = VFSTOUFS(mp);
191 	fs = ump->um_e2fs;
192 	memset(fs->e2fs_fsmnt, 0, sizeof(fs->e2fs_fsmnt));
193 	(void) copystr(mp->mnt_stat.f_mntonname, fs->e2fs_fsmnt,
194 	    sizeof(fs->e2fs_fsmnt) - 1, 0);
195 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
196 		memset(fs->e2fs.e2fs_fsmnt, 0, sizeof(fs->e2fs.e2fs_fsmnt));
197 		(void) copystr(mp->mnt_stat.f_mntonname, fs->e2fs.e2fs_fsmnt,
198 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, 0);
199 	}
200 	(void)ext2fs_statfs(mp, &mp->mnt_stat, p);
201 	vfs_unbusy(mp);
202 	inittodr(fs->e2fs.e2fs_wtime);
203 	return (0);
204 }
205 
206 /*
207  * VFS Operations.
208  *
209  * mount system call
210  */
211 int
212 ext2fs_mount(mp, path, data, ndp, p)
213 	struct mount *mp;
214 	const char *path;
215 	void * data;
216 	struct nameidata *ndp;
217 	struct proc *p;
218 {
219 	struct vnode *devvp;
220 	struct ufs_args args;
221 	struct ufsmount *ump = NULL;
222 	struct m_ext2fs *fs;
223 	size_t size;
224 	int error, flags;
225 	mode_t accessmode;
226 
227 	error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
228 	if (error)
229 		return (error);
230 	/*
231 	 * If updating, check whether changing from read-only to
232 	 * read/write; if there is no device name, that's all we do.
233 	 */
234 	if (mp->mnt_flag & MNT_UPDATE) {
235 		ump = VFSTOUFS(mp);
236 		fs = ump->um_e2fs;
237 		if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
238 			flags = WRITECLOSE;
239 			if (mp->mnt_flag & MNT_FORCE)
240 				flags |= FORCECLOSE;
241 			error = ext2fs_flushfiles(mp, flags, p);
242 			if (error == 0 &&
243 				ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
244 				(fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
245 				fs->e2fs.e2fs_state = E2FS_ISCLEAN;
246 				(void) ext2fs_sbupdate(ump, MNT_WAIT);
247 			}
248 			if (error)
249 				return (error);
250 			fs->e2fs_ronly = 1;
251 		}
252 		if (mp->mnt_flag & MNT_RELOAD) {
253 			error = ext2fs_reload(mp, ndp->ni_cnd.cn_cred, p);
254 			if (error)
255 				return (error);
256 		}
257 		if (fs->e2fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
258 			/*
259 			 * If upgrade to read-write by non-root, then verify
260 			 * that user has necessary permissions on the device.
261 			 */
262 			if (p->p_ucred->cr_uid != 0) {
263 				devvp = ump->um_devvp;
264 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
265 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
266 						   p->p_ucred, p);
267 				VOP_UNLOCK(devvp, 0);
268 				if (error)
269 					return (error);
270 			}
271 			fs->e2fs_ronly = 0;
272 			if (fs->e2fs.e2fs_state == E2FS_ISCLEAN)
273 				fs->e2fs.e2fs_state = 0;
274 			else
275 				fs->e2fs.e2fs_state = E2FS_ERRORS;
276 			fs->e2fs_fmod = 1;
277 		}
278 		if (args.fspec == 0) {
279 			/*
280 			 * Process export requests.
281 			 */
282 			return (vfs_export(mp, &ump->um_export, &args.export));
283 		}
284 	}
285 	/*
286 	 * Not an update, or updating the name: look up the name
287 	 * and verify that it refers to a sensible block device.
288 	 */
289 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
290 	if ((error = namei(ndp)) != 0)
291 		return (error);
292 	devvp = ndp->ni_vp;
293 
294 	if (devvp->v_type != VBLK) {
295 		vrele(devvp);
296 		return (ENOTBLK);
297 	}
298 	if (major(devvp->v_rdev) >= nblkdev) {
299 		vrele(devvp);
300 		return (ENXIO);
301 	}
302 	/*
303 	 * If mount by non-root, then verify that user has necessary
304 	 * permissions on the device.
305 	 */
306 	if (p->p_ucred->cr_uid != 0) {
307 		accessmode = VREAD;
308 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
309 			accessmode |= VWRITE;
310 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
311 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
312 		VOP_UNLOCK(devvp, 0);
313 		if (error) {
314 			vrele(devvp);
315 			return (error);
316 		}
317 	}
318 	if ((mp->mnt_flag & MNT_UPDATE) == 0)
319 		error = ext2fs_mountfs(devvp, mp, p);
320 	else {
321 		if (devvp != ump->um_devvp)
322 			error = EINVAL;	/* needs translation */
323 		else
324 			vrele(devvp);
325 	}
326 	if (error) {
327 		vrele(devvp);
328 		return (error);
329 	}
330 	ump = VFSTOUFS(mp);
331 	fs = ump->um_e2fs;
332 	(void) copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1,
333 	    &size);
334 	memset(fs->e2fs_fsmnt + size, 0, sizeof(fs->e2fs_fsmnt) - size);
335 	if (fs->e2fs.e2fs_rev > E2FS_REV0) {
336 		(void) copystr(mp->mnt_stat.f_mntonname, fs->e2fs.e2fs_fsmnt,
337 		    sizeof(fs->e2fs.e2fs_fsmnt) - 1, &size);
338 		memset(fs->e2fs.e2fs_fsmnt, 0,
339 		    sizeof(fs->e2fs.e2fs_fsmnt) - size);
340 	}
341 	memcpy(mp->mnt_stat.f_mntonname, fs->e2fs_fsmnt, MNAMELEN);
342 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
343 		&size);
344 	memset(mp->mnt_stat.f_mntfromname + size, 0, MNAMELEN - size);
345 	if (fs->e2fs_fmod != 0) {	/* XXX */
346 		fs->e2fs_fmod = 0;
347 		if (fs->e2fs.e2fs_state == 0)
348 			fs->e2fs.e2fs_wtime = time.tv_sec;
349 		else
350 			printf("%s: file system not clean; please fsck(8)\n",
351 				mp->mnt_stat.f_mntfromname);
352 		(void) ext2fs_cgupdate(ump, MNT_WAIT);
353 	}
354 	return (0);
355 }
356 
357 /*
358  * Reload all incore data for a filesystem (used after running fsck on
359  * the root filesystem and finding things to fix). The filesystem must
360  * be mounted read-only.
361  *
362  * Things to do to update the mount:
363  *	1) invalidate all cached meta-data.
364  *	2) re-read superblock from disk.
365  *	3) re-read summary information from disk.
366  *	4) invalidate all inactive vnodes.
367  *	5) invalidate all cached file data.
368  *	6) re-read inode data for all active vnodes.
369  */
370 int
371 ext2fs_reload(mountp, cred, p)
372 	struct mount *mountp;
373 	struct ucred *cred;
374 	struct proc *p;
375 {
376 	struct vnode *vp, *nvp, *devvp;
377 	struct inode *ip;
378 	struct buf *bp;
379 	struct m_ext2fs *fs;
380 	struct ext2fs *newfs;
381 	struct partinfo dpart;
382 	int i, size, error;
383 	caddr_t cp;
384 
385 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
386 		return (EINVAL);
387 	/*
388 	 * Step 1: invalidate all cached meta-data.
389 	 */
390 	devvp = VFSTOUFS(mountp)->um_devvp;
391 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
392 	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
393 	VOP_UNLOCK(devvp, 0);
394 	if (error)
395 		panic("ext2fs_reload: dirty1");
396 	/*
397 	 * Step 2: re-read superblock from disk.
398 	 */
399 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
400 		size = DEV_BSIZE;
401 	else
402 		size = dpart.disklab->d_secsize;
403 	error = bread(devvp, (ufs_daddr_t)(SBOFF / size), SBSIZE, NOCRED, &bp);
404 	if (error) {
405 		brelse(bp);
406 		return (error);
407 	}
408 	newfs = (struct ext2fs *)bp->b_data;
409 	error = ext2fs_checksb(newfs, (mountp->mnt_flag & MNT_RDONLY) != 0);
410 	if (error) {
411 		brelse(bp);
412 		return (error);
413 	}
414 
415 	fs = VFSTOUFS(mountp)->um_e2fs;
416 	/*
417 	 * copy in new superblock, and compute in-memory values
418 	 */
419 	e2fs_sbload(newfs, &fs->e2fs);
420 	fs->e2fs_ncg =
421 	    howmany(fs->e2fs.e2fs_bcount - fs->e2fs.e2fs_first_dblock,
422 	    fs->e2fs.e2fs_bpg);
423 	/* XXX assume hw bsize = 512 */
424 	fs->e2fs_fsbtodb = fs->e2fs.e2fs_log_bsize + 1;
425 	fs->e2fs_bsize = 1024 << fs->e2fs.e2fs_log_bsize;
426 	fs->e2fs_bshift = LOG_MINBSIZE + fs->e2fs.e2fs_log_bsize;
427 	fs->e2fs_qbmask = fs->e2fs_bsize - 1;
428 	fs->e2fs_bmask = ~fs->e2fs_qbmask;
429 	fs->e2fs_ngdb = howmany(fs->e2fs_ncg,
430 			fs->e2fs_bsize / sizeof(struct ext2_gd));
431 	fs->e2fs_ipb = fs->e2fs_bsize / EXT2_DINODE_SIZE;
432 	fs->e2fs_itpg = fs->e2fs.e2fs_ipg/fs->e2fs_ipb;
433 
434 	/*
435 	 * Step 3: re-read summary information from disk.
436 	 */
437 
438 	for (i=0; i < fs->e2fs_ngdb; i++) {
439 		error = bread(devvp ,
440 		    fsbtodb(fs, ((fs->e2fs_bsize>1024)? 0 : 1) + i + 1),
441 		    fs->e2fs_bsize, NOCRED, &bp);
442 		if (error) {
443 			brelse(bp);
444 			return (error);
445 		}
446 		e2fs_cgload((struct ext2_gd*)bp->b_data,
447 		    &fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)],
448 		    fs->e2fs_bsize);
449 		brelse(bp);
450 	}
451 
452 loop:
453 	simple_lock(&mntvnode_slock);
454 	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
455 		if (vp->v_mount != mountp) {
456 			simple_unlock(&mntvnode_slock);
457 			goto loop;
458 		}
459 		nvp = vp->v_mntvnodes.le_next;
460 		/*
461 		 * Step 4: invalidate all inactive vnodes.
462 		 */
463 		if (vrecycle(vp, &mntvnode_slock, p))
464 			goto loop;
465 		/*
466 		 * Step 5: invalidate all cached file data.
467 		 */
468 		simple_lock(&vp->v_interlock);
469 		simple_unlock(&mntvnode_slock);
470 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK))
471 			goto loop;
472 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
473 			panic("ext2fs_reload: dirty2");
474 		/*
475 		 * Step 6: re-read inode data for all active vnodes.
476 		 */
477 		ip = VTOI(vp);
478 		error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
479 				  (int)fs->e2fs_bsize, NOCRED, &bp);
480 		if (error) {
481 			vput(vp);
482 			return (error);
483 		}
484 		cp = (caddr_t)bp->b_data +
485 		    (ino_to_fsbo(fs, ip->i_number) * EXT2_DINODE_SIZE);
486 		e2fs_iload((struct ext2fs_dinode *)cp, &ip->i_din.e2fs_din);
487 		brelse(bp);
488 		vput(vp);
489 		simple_lock(&mntvnode_slock);
490 	}
491 	simple_unlock(&mntvnode_slock);
492 	return (0);
493 }
494 
495 /*
496  * Common code for mount and mountroot
497  */
498 int
499 ext2fs_mountfs(devvp, mp, p)
500 	struct vnode *devvp;
501 	struct mount *mp;
502 	struct proc *p;
503 {
504 	struct ufsmount *ump;
505 	struct buf *bp;
506 	struct ext2fs *fs;
507 	struct m_ext2fs *m_fs;
508 	dev_t dev;
509 	struct partinfo dpart;
510 	int error, i, size, ronly;
511 	struct ucred *cred;
512 	extern struct vnode *rootvp;
513 
514 	dev = devvp->v_rdev;
515 	cred = p ? p->p_ucred : NOCRED;
516 	/*
517 	 * Disallow multiple mounts of the same device.
518 	 * Disallow mounting of a device that is currently in use
519 	 * (except for root, which might share swap device for miniroot).
520 	 * Flush out any old buffers remaining from a previous use.
521 	 */
522 	if ((error = vfs_mountedon(devvp)) != 0)
523 		return (error);
524 	if (vcount(devvp) > 1 && devvp != rootvp)
525 		return (EBUSY);
526 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
527 	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
528 	VOP_UNLOCK(devvp, 0);
529 	if (error)
530 		return (error);
531 
532 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
533 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
534 	if (error)
535 		return (error);
536 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
537 		size = DEV_BSIZE;
538 	else
539 		size = dpart.disklab->d_secsize;
540 
541 	bp = NULL;
542 	ump = NULL;
543 
544 #ifdef DEBUG_EXT2
545 	printf("sb size: %d ino size %d\n", sizeof(struct ext2fs),
546 	    EXT2_DINODE_SIZE);
547 #endif
548 	error = bread(devvp, (SBOFF / size), SBSIZE, cred, &bp);
549 	if (error)
550 		goto out;
551 	fs = (struct ext2fs *)bp->b_data;
552 	error = ext2fs_checksb(fs, ronly);
553 	if (error)
554 		goto out;
555 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
556 	memset((caddr_t)ump, 0, sizeof *ump);
557 	ump->um_e2fs = malloc(sizeof(struct m_ext2fs), M_UFSMNT, M_WAITOK);
558 	memset((caddr_t)ump->um_e2fs, 0, sizeof(struct m_ext2fs));
559 	e2fs_sbload((struct ext2fs*)bp->b_data, &ump->um_e2fs->e2fs);
560 	brelse(bp);
561 	bp = NULL;
562 	m_fs = ump->um_e2fs;
563 	m_fs->e2fs_ronly = ronly;
564 	if (ronly == 0) {
565 		if (m_fs->e2fs.e2fs_state == E2FS_ISCLEAN)
566 			m_fs->e2fs.e2fs_state = 0;
567 		else
568 			m_fs->e2fs.e2fs_state = E2FS_ERRORS;
569 		m_fs->e2fs_fmod = 1;
570 	}
571 
572 	/* compute dynamic sb infos */
573 	m_fs->e2fs_ncg =
574 		howmany(m_fs->e2fs.e2fs_bcount - m_fs->e2fs.e2fs_first_dblock,
575 		m_fs->e2fs.e2fs_bpg);
576 	/* XXX assume hw bsize = 512 */
577 	m_fs->e2fs_fsbtodb = m_fs->e2fs.e2fs_log_bsize + 1;
578 	m_fs->e2fs_bsize = 1024 << m_fs->e2fs.e2fs_log_bsize;
579 	m_fs->e2fs_bshift = LOG_MINBSIZE + m_fs->e2fs.e2fs_log_bsize;
580 	m_fs->e2fs_qbmask = m_fs->e2fs_bsize - 1;
581 	m_fs->e2fs_bmask = ~m_fs->e2fs_qbmask;
582 	m_fs->e2fs_ngdb = howmany(m_fs->e2fs_ncg,
583 		m_fs->e2fs_bsize / sizeof(struct ext2_gd));
584 	m_fs->e2fs_ipb = m_fs->e2fs_bsize / EXT2_DINODE_SIZE;
585 	m_fs->e2fs_itpg = m_fs->e2fs.e2fs_ipg/m_fs->e2fs_ipb;
586 
587 	m_fs->e2fs_gd = malloc(m_fs->e2fs_ngdb * m_fs->e2fs_bsize,
588 		M_UFSMNT, M_WAITOK);
589 	for (i=0; i < m_fs->e2fs_ngdb; i++) {
590 		error = bread(devvp ,
591 		    fsbtodb(m_fs, ((m_fs->e2fs_bsize>1024)? 0 : 1) + i + 1),
592 		    m_fs->e2fs_bsize, NOCRED, &bp);
593 		if (error) {
594 			free(m_fs->e2fs_gd, M_UFSMNT);
595 			goto out;
596 		}
597 		e2fs_cgload((struct ext2_gd*)bp->b_data,
598 		    &m_fs->e2fs_gd[
599 			i * m_fs->e2fs_bsize / sizeof(struct ext2_gd)],
600 		    m_fs->e2fs_bsize);
601 		brelse(bp);
602 		bp = NULL;
603 	}
604 
605 	mp->mnt_data = (qaddr_t)ump;
606 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
607 	mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_EXT2FS);
608 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
609 	mp->mnt_flag |= MNT_LOCAL;
610 	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
611 	mp->mnt_fs_bshift = m_fs->e2fs_bshift;
612 	ump->um_flags = 0;
613 	ump->um_mountp = mp;
614 	ump->um_dev = dev;
615 	ump->um_devvp = devvp;
616 	ump->um_nindir = NINDIR(m_fs);
617 	ump->um_lognindir = ffs(NINDIR(m_fs)) - 1;
618 	ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
619 	ump->um_seqinc = 1; /* no frags */
620 	devvp->v_specmountpoint = mp;
621 	return (0);
622 
623 out:
624 	if (bp)
625 		brelse(bp);
626 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
627 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
628 	VOP_UNLOCK(devvp, 0);
629 	if (ump) {
630 		free(ump->um_e2fs, M_UFSMNT);
631 		free(ump, M_UFSMNT);
632 		mp->mnt_data = (qaddr_t)0;
633 	}
634 	return (error);
635 }
636 
637 /*
638  * unmount system call
639  */
640 int
641 ext2fs_unmount(mp, mntflags, p)
642 	struct mount *mp;
643 	int mntflags;
644 	struct proc *p;
645 {
646 	struct ufsmount *ump;
647 	struct m_ext2fs *fs;
648 	int error, flags;
649 
650 	flags = 0;
651 	if (mntflags & MNT_FORCE)
652 		flags |= FORCECLOSE;
653 	if ((error = ext2fs_flushfiles(mp, flags, p)) != 0)
654 		return (error);
655 	ump = VFSTOUFS(mp);
656 	fs = ump->um_e2fs;
657 	if (fs->e2fs_ronly == 0 &&
658 		ext2fs_cgupdate(ump, MNT_WAIT) == 0 &&
659 		(fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) {
660 		fs->e2fs.e2fs_state = E2FS_ISCLEAN;
661 		(void) ext2fs_sbupdate(ump, MNT_WAIT);
662 	}
663 	if (ump->um_devvp->v_type != VBAD)
664 		ump->um_devvp->v_specmountpoint = NULL;
665 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
666 	error = VOP_CLOSE(ump->um_devvp, fs->e2fs_ronly ? FREAD : FREAD|FWRITE,
667 		NOCRED, p);
668 	vput(ump->um_devvp);
669 	free(fs->e2fs_gd, M_UFSMNT);
670 	free(fs, M_UFSMNT);
671 	free(ump, M_UFSMNT);
672 	mp->mnt_data = (qaddr_t)0;
673 	mp->mnt_flag &= ~MNT_LOCAL;
674 	return (error);
675 }
676 
677 /*
678  * Flush out all the files in a filesystem.
679  */
680 int
681 ext2fs_flushfiles(mp, flags, p)
682 	struct mount *mp;
683 	int flags;
684 	struct proc *p;
685 {
686 	extern int doforce;
687 	int error;
688 
689 	if (!doforce)
690 		flags &= ~FORCECLOSE;
691 	error = vflush(mp, NULLVP, flags);
692 	return (error);
693 }
694 
695 /*
696  * Get file system statistics.
697  */
698 int
699 ext2fs_statfs(mp, sbp, p)
700 	struct mount *mp;
701 	struct statfs *sbp;
702 	struct proc *p;
703 {
704 	struct ufsmount *ump;
705 	struct m_ext2fs *fs;
706 	u_int32_t overhead, overhead_per_group;
707 	int i, ngroups;
708 
709 	ump = VFSTOUFS(mp);
710 	fs = ump->um_e2fs;
711 	if (fs->e2fs.e2fs_magic != E2FS_MAGIC)
712 		panic("ext2fs_statfs");
713 
714 #ifdef COMPAT_09
715 	sbp->f_type = 1;
716 #else
717 	sbp->f_type = 0;
718 #endif
719 
720 	/*
721 	 * Compute the overhead (FS structures)
722 	 */
723 	overhead_per_group = 1 /* block bitmap */ +
724 				 1 /* inode bitmap */ +
725 				 fs->e2fs_itpg;
726 	overhead = fs->e2fs.e2fs_first_dblock +
727 		   fs->e2fs_ncg * overhead_per_group;
728 	if (fs->e2fs.e2fs_rev > E2FS_REV0 &&
729 	    fs->e2fs.e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) {
730 		for (i = 0, ngroups = 0; i < fs->e2fs_ncg; i++) {
731 			if (cg_has_sb(i))
732 				ngroups++;
733 		}
734 	} else {
735 		ngroups = fs->e2fs_ncg;
736 	}
737 	overhead += ngroups * (1 + fs->e2fs_ngdb);
738 
739 	sbp->f_bsize = fs->e2fs_bsize;
740 	sbp->f_iosize = fs->e2fs_bsize;
741 	sbp->f_blocks = fs->e2fs.e2fs_bcount - overhead;
742 	sbp->f_bfree = fs->e2fs.e2fs_fbcount;
743 	sbp->f_bavail = sbp->f_bfree - fs->e2fs.e2fs_rbcount;
744 	sbp->f_files =  fs->e2fs.e2fs_icount;
745 	sbp->f_ffree = fs->e2fs.e2fs_ficount;
746 	if (sbp != &mp->mnt_stat) {
747 		memcpy(sbp->f_mntonname, mp->mnt_stat.f_mntonname, MNAMELEN);
748 		memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, MNAMELEN);
749 	}
750 	strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN);
751 	return (0);
752 }
753 
754 /*
755  * Go through the disk queues to initiate sandbagged IO;
756  * go through the inodes to write those that have been modified;
757  * initiate the writing of the super block if it has been modified.
758  *
759  * Note: we are always called with the filesystem marked `MPBUSY'.
760  */
761 int
762 ext2fs_sync(mp, waitfor, cred, p)
763 	struct mount *mp;
764 	int waitfor;
765 	struct ucred *cred;
766 	struct proc *p;
767 {
768 	struct vnode *vp, *nvp;
769 	struct inode *ip;
770 	struct ufsmount *ump = VFSTOUFS(mp);
771 	struct m_ext2fs *fs;
772 	int error, allerror = 0;
773 
774 	fs = ump->um_e2fs;
775 	if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) {	/* XXX */
776 		printf("fs = %s\n", fs->e2fs_fsmnt);
777 		panic("update: rofs mod");
778 	}
779 	/*
780 	 * Write back each (modified) inode.
781 	 */
782 	simple_lock(&mntvnode_slock);
783 loop:
784 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
785 		/*
786 		 * If the vnode that we are about to sync is no longer
787 		 * associated with this mount point, start over.
788 		 */
789 		if (vp->v_mount != mp)
790 			goto loop;
791 		simple_lock(&vp->v_interlock);
792 		nvp = LIST_NEXT(vp, v_mntvnodes);
793 		ip = VTOI(vp);
794 		if (waitfor == MNT_LAZY || vp->v_type == VNON ||
795 		    ((ip->i_flag &
796 		      (IN_ACCESS | IN_CHANGE | IN_UPDATE | IN_MODIFIED | IN_ACCESSED)) == 0 &&
797 		     LIST_EMPTY(&vp->v_dirtyblkhd) &&
798 		     vp->v_uobj.uo_npages == 0))
799 		{
800 			simple_unlock(&vp->v_interlock);
801 			continue;
802 		}
803 		simple_unlock(&mntvnode_slock);
804 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK);
805 		if (error) {
806 			simple_lock(&mntvnode_slock);
807 			if (error == ENOENT)
808 				goto loop;
809 			continue;
810 		}
811 		if ((error = VOP_FSYNC(vp, cred,
812 		    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0, p)) != 0)
813 			allerror = error;
814 		vput(vp);
815 		simple_lock(&mntvnode_slock);
816 	}
817 	simple_unlock(&mntvnode_slock);
818 	/*
819 	 * Force stale file system control information to be flushed.
820 	 */
821 	if (waitfor != MNT_LAZY) {
822 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
823 		if ((error = VOP_FSYNC(ump->um_devvp, cred,
824 		    waitfor == MNT_WAIT ? FSYNC_WAIT : 0, 0, 0, p)) != 0)
825 			allerror = error;
826 		VOP_UNLOCK(ump->um_devvp, 0);
827 	}
828 	/*
829 	 * Write back modified superblock.
830 	 */
831 	if (fs->e2fs_fmod != 0) {
832 		fs->e2fs_fmod = 0;
833 		fs->e2fs.e2fs_wtime = time.tv_sec;
834 		if ((error = ext2fs_cgupdate(ump, waitfor)))
835 			allerror = error;
836 	}
837 	return (allerror);
838 }
839 
840 /*
841  * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
842  * in from disk.  If it is in core, wait for the lock bit to clear, then
843  * return the inode locked.  Detection and handling of mount points must be
844  * done by the calling routine.
845  */
846 int
847 ext2fs_vget(mp, ino, vpp)
848 	struct mount *mp;
849 	ino_t ino;
850 	struct vnode **vpp;
851 {
852 	struct m_ext2fs *fs;
853 	struct inode *ip;
854 	struct ufsmount *ump;
855 	struct buf *bp;
856 	struct vnode *vp;
857 	dev_t dev;
858 	int error;
859 	caddr_t cp;
860 
861 	ump = VFSTOUFS(mp);
862 	dev = ump->um_dev;
863 
864 	if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
865 		return (0);
866 
867 	/* Allocate a new vnode/inode. */
868 	if ((error = getnewvnode(VT_EXT2FS, mp, ext2fs_vnodeop_p, &vp)) != 0) {
869 		*vpp = NULL;
870 		return (error);
871 	}
872 
873 	do {
874 		if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) {
875 			ungetnewvnode(vp);
876 			return (0);
877 		}
878 	} while (lockmgr(&ufs_hashlock, LK_EXCLUSIVE|LK_SLEEPFAIL, 0));
879 
880 	ip = pool_get(&ext2fs_inode_pool, PR_WAITOK);
881 	memset(ip, 0, sizeof(struct inode));
882 	vp->v_data = ip;
883 	ip->i_vnode = vp;
884 	ip->i_e2fs = fs = ump->um_e2fs;
885 	ip->i_dev = dev;
886 	ip->i_number = ino;
887 	ip->i_e2fs_last_lblk = 0;
888 	ip->i_e2fs_last_blk = 0;
889 
890 	/*
891 	 * Put it onto its hash chain and lock it so that other requests for
892 	 * this inode will block if they arrive while we are sleeping waiting
893 	 * for old data structures to be purged or for the contents of the
894 	 * disk portion of this inode to be read.
895 	 */
896 
897 	ufs_ihashins(ip);
898 	lockmgr(&ufs_hashlock, LK_RELEASE, 0);
899 
900 	/* Read in the disk contents for the inode, copy into the inode. */
901 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
902 			  (int)fs->e2fs_bsize, NOCRED, &bp);
903 	if (error) {
904 
905 		/*
906 		 * The inode does not contain anything useful, so it would
907 		 * be misleading to leave it on its hash chain. With mode
908 		 * still zero, it will be unlinked and returned to the free
909 		 * list by vput().
910 		 */
911 
912 		vput(vp);
913 		brelse(bp);
914 		*vpp = NULL;
915 		return (error);
916 	}
917 	cp = (caddr_t)bp->b_data +
918 	    (ino_to_fsbo(fs, ino) * EXT2_DINODE_SIZE);
919 	e2fs_iload((struct ext2fs_dinode *)cp, &ip->i_din.e2fs_din);
920 	brelse(bp);
921 
922 	/* If the inode was deleted, reset all fields */
923 	if (ip->i_e2fs_dtime != 0) {
924 		ip->i_e2fs_mode = ip->i_e2fs_size = ip->i_e2fs_nblock = 0;
925 		memset(ip->i_e2fs_blocks, 0, sizeof(ip->i_e2fs_blocks));
926 	}
927 
928 	/*
929 	 * Initialize the vnode from the inode, check for aliases.
930 	 * Note that the underlying vnode may have changed.
931 	 */
932 
933 	error = ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp);
934 	if (error) {
935 		vput(vp);
936 		*vpp = NULL;
937 		return (error);
938 	}
939 	/*
940 	 * Finish inode initialization now that aliasing has been resolved.
941 	 */
942 
943 	genfs_node_init(vp, &ext2fs_genfsops);
944 	ip->i_devvp = ump->um_devvp;
945 	VREF(ip->i_devvp);
946 
947 	/*
948 	 * Set up a generation number for this inode if it does not
949 	 * already have one. This should only happen on old filesystems.
950 	 */
951 
952 	if (ip->i_e2fs_gen == 0) {
953 		if (++ext2gennumber < (u_long)time.tv_sec)
954 			ext2gennumber = time.tv_sec;
955 		ip->i_e2fs_gen = ext2gennumber;
956 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
957 			ip->i_flag |= IN_MODIFIED;
958 	}
959 	vp->v_size = ip->i_e2fs_size;
960 	*vpp = vp;
961 	return (0);
962 }
963 
964 /*
965  * File handle to vnode
966  *
967  * Have to be really careful about stale file handles:
968  * - check that the inode number is valid
969  * - call ext2fs_vget() to get the locked inode
970  * - check for an unallocated inode (i_mode == 0)
971  */
972 int
973 ext2fs_fhtovp(mp, fhp, vpp)
974 	struct mount *mp;
975 	struct fid *fhp;
976 	struct vnode **vpp;
977 {
978 	struct inode *ip;
979 	struct vnode *nvp;
980 	int error;
981 	struct ufid *ufhp;
982 	struct m_ext2fs *fs;
983 
984 	ufhp = (struct ufid *)fhp;
985 	fs = VFSTOUFS(mp)->um_e2fs;
986 	if ((ufhp->ufid_ino < EXT2_FIRSTINO && ufhp->ufid_ino != EXT2_ROOTINO) ||
987 		ufhp->ufid_ino >= fs->e2fs_ncg * fs->e2fs.e2fs_ipg)
988 		return (ESTALE);
989 
990 	if ((error = VFS_VGET(mp, ufhp->ufid_ino, &nvp)) != 0) {
991 		*vpp = NULLVP;
992 		return (error);
993 	}
994 	ip = VTOI(nvp);
995 	if (ip->i_e2fs_mode == 0 || ip->i_e2fs_dtime != 0 ||
996 		ip->i_e2fs_gen != ufhp->ufid_gen) {
997 		vput(nvp);
998 		*vpp = NULLVP;
999 		return (ESTALE);
1000 	}
1001 	*vpp = nvp;
1002 	return (0);
1003 }
1004 
1005 /*
1006  * Vnode pointer to File handle
1007  */
1008 /* ARGSUSED */
1009 int
1010 ext2fs_vptofh(vp, fhp)
1011 	struct vnode *vp;
1012 	struct fid *fhp;
1013 {
1014 	struct inode *ip;
1015 	struct ufid *ufhp;
1016 
1017 	ip = VTOI(vp);
1018 	ufhp = (struct ufid *)fhp;
1019 	ufhp->ufid_len = sizeof(struct ufid);
1020 	ufhp->ufid_ino = ip->i_number;
1021 	ufhp->ufid_gen = ip->i_e2fs_gen;
1022 	return (0);
1023 }
1024 
1025 int
1026 ext2fs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1027 	int *name;
1028 	u_int namelen;
1029 	void *oldp;
1030 	size_t *oldlenp;
1031 	void *newp;
1032 	size_t newlen;
1033 	struct proc *p;
1034 {
1035 	return (EOPNOTSUPP);
1036 }
1037 
1038 /*
1039  * Write a superblock and associated information back to disk.
1040  */
1041 int
1042 ext2fs_sbupdate(mp, waitfor)
1043 	struct ufsmount *mp;
1044 	int waitfor;
1045 {
1046 	struct m_ext2fs *fs = mp->um_e2fs;
1047 	struct buf *bp;
1048 	int error = 0;
1049 
1050 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
1051 	e2fs_sbsave(&fs->e2fs, (struct ext2fs*)bp->b_data);
1052 	if (waitfor == MNT_WAIT)
1053 		error = bwrite(bp);
1054 	else
1055 		bawrite(bp);
1056 	return (error);
1057 }
1058 
1059 int
1060 ext2fs_cgupdate(mp, waitfor)
1061 	struct ufsmount *mp;
1062 	int waitfor;
1063 {
1064 	struct m_ext2fs *fs = mp->um_e2fs;
1065 	struct buf *bp;
1066 	int i, error = 0, allerror = 0;
1067 
1068 	allerror = ext2fs_sbupdate(mp, waitfor);
1069 	for (i = 0; i < fs->e2fs_ngdb; i++) {
1070 		bp = getblk(mp->um_devvp, fsbtodb(fs, ((fs->e2fs_bsize>1024)?0:1)+i+1),
1071 			fs->e2fs_bsize, 0, 0);
1072 		e2fs_cgsave(&fs->e2fs_gd[i* fs->e2fs_bsize / sizeof(struct ext2_gd)],
1073 				(struct ext2_gd*)bp->b_data, fs->e2fs_bsize);
1074 		if (waitfor == MNT_WAIT)
1075 			error = bwrite(bp);
1076 		else
1077 			bawrite(bp);
1078 	}
1079 
1080 	if (!allerror && error)
1081 		allerror = error;
1082 	return (allerror);
1083 }
1084 
1085 static int
1086 ext2fs_checksb(fs, ronly)
1087 	struct ext2fs *fs;
1088 	int ronly;
1089 {
1090 	if (fs2h16(fs->e2fs_magic) != E2FS_MAGIC) {
1091 		return (EIO);		/* XXX needs translation */
1092 	}
1093 	if (fs2h32(fs->e2fs_rev) > E2FS_REV1) {
1094 #ifdef DIAGNOSTIC
1095 		printf("Ext2 fs: unsupported revision number: %x\n",
1096 					fs2h32(fs->e2fs_rev));
1097 #endif
1098 		return (EIO);		/* XXX needs translation */
1099 	}
1100 	if (fs2h32(fs->e2fs_log_bsize) > 2) { /* block size = 1024|2048|4096 */
1101 #ifdef DIAGNOSTIC
1102 		printf("Ext2 fs: bad block size: %d (expected <=2 for ext2 fs)\n",
1103 			fs2h32(fs->e2fs_log_bsize));
1104 #endif
1105 		return (EIO);	   /* XXX needs translation */
1106 	}
1107 	if (fs2h32(fs->e2fs_rev) > E2FS_REV0) {
1108 		if (fs2h32(fs->e2fs_first_ino) != EXT2_FIRSTINO ||
1109 		    fs2h16(fs->e2fs_inode_size) != EXT2_DINODE_SIZE) {
1110 			printf("Ext2 fs: unsupported inode size\n");
1111 			return (EINVAL);      /* XXX needs translation */
1112 		}
1113 		if (fs2h32(fs->e2fs_features_incompat) &
1114 		    ~EXT2F_INCOMPAT_SUPP) {
1115 			printf("Ext2 fs: unsupported optionnal feature\n");
1116 			return (EINVAL);      /* XXX needs translation */
1117 		}
1118 		if (!ronly && fs2h32(fs->e2fs_features_rocompat) &
1119 		    ~EXT2F_ROCOMPAT_SUPP) {
1120 			return (EROFS);      /* XXX needs translation */
1121 		}
1122 	}
1123 	return (0);
1124 }
1125