xref: /minix/sys/ufs/ffs/ffs_vfsops.c (revision 84d9c625)
1 /*	$NetBSD: ffs_vfsops.c,v 1.291 2013/11/23 13:35:37 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Wasabi Systems, Inc, and by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1989, 1991, 1993, 1994
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.291 2013/11/23 13:35:37 christos Exp $");
65 
66 #if defined(_KERNEL_OPT)
67 #include "opt_ffs.h"
68 #include "opt_quota.h"
69 #include "opt_wapbl.h"
70 #endif
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
75 #include <sys/proc.h>
76 #include <sys/kernel.h>
77 #include <sys/vnode.h>
78 #include <sys/socket.h>
79 #include <sys/mount.h>
80 #include <sys/buf.h>
81 #include <sys/device.h>
82 #include <sys/disk.h>
83 #include <sys/mbuf.h>
84 #include <sys/file.h>
85 #include <sys/disklabel.h>
86 #include <sys/ioctl.h>
87 #include <sys/errno.h>
88 #include <sys/kmem.h>
89 #include <sys/pool.h>
90 #include <sys/lock.h>
91 #include <sys/sysctl.h>
92 #include <sys/conf.h>
93 #include <sys/kauth.h>
94 #include <sys/wapbl.h>
95 #include <sys/fstrans.h>
96 #include <sys/module.h>
97 
98 #include <miscfs/genfs/genfs.h>
99 #include <miscfs/specfs/specdev.h>
100 
101 #include <ufs/ufs/quota.h>
102 #include <ufs/ufs/ufsmount.h>
103 #include <ufs/ufs/inode.h>
104 #include <ufs/ufs/dir.h>
105 #include <ufs/ufs/ufs_extern.h>
106 #include <ufs/ufs/ufs_bswap.h>
107 #include <ufs/ufs/ufs_wapbl.h>
108 
109 #include <ufs/ffs/fs.h>
110 #include <ufs/ffs/ffs_extern.h>
111 
112 MODULE(MODULE_CLASS_VFS, ffs, NULL);
113 
114 static int	ffs_vfs_fsync(vnode_t *, int);
115 
116 static struct sysctllog *ffs_sysctl_log;
117 
118 static kauth_listener_t ffs_snapshot_listener;
119 
120 /* how many times ffs_init() was called */
121 int ffs_initcount = 0;
122 
123 extern const struct vnodeopv_desc ffs_vnodeop_opv_desc;
124 extern const struct vnodeopv_desc ffs_specop_opv_desc;
125 extern const struct vnodeopv_desc ffs_fifoop_opv_desc;
126 
127 const struct vnodeopv_desc * const ffs_vnodeopv_descs[] = {
128 	&ffs_vnodeop_opv_desc,
129 	&ffs_specop_opv_desc,
130 	&ffs_fifoop_opv_desc,
131 	NULL,
132 };
133 
134 struct vfsops ffs_vfsops = {
135 	MOUNT_FFS,
136 	sizeof (struct ufs_args),
137 	ffs_mount,
138 	ufs_start,
139 	ffs_unmount,
140 	ufs_root,
141 	ufs_quotactl,
142 	ffs_statvfs,
143 	ffs_sync,
144 	ffs_vget,
145 	ffs_fhtovp,
146 	ffs_vptofh,
147 	ffs_init,
148 	ffs_reinit,
149 	ffs_done,
150 	ffs_mountroot,
151 	ffs_snapshot,
152 	ffs_extattrctl,
153 	ffs_suspendctl,
154 	genfs_renamelock_enter,
155 	genfs_renamelock_exit,
156 	ffs_vfs_fsync,
157 	ffs_vnodeopv_descs,
158 	0,
159 	{ NULL, NULL },
160 };
161 
162 static const struct genfs_ops ffs_genfsops = {
163 	.gop_size = ffs_gop_size,
164 	.gop_alloc = ufs_gop_alloc,
165 	.gop_write = genfs_gop_write,
166 	.gop_markupdate = ufs_gop_markupdate,
167 };
168 
169 static const struct ufs_ops ffs_ufsops = {
170 	.uo_itimes = ffs_itimes,
171 	.uo_update = ffs_update,
172 	.uo_truncate = ffs_truncate,
173 	.uo_valloc = ffs_valloc,
174 	.uo_vfree = ffs_vfree,
175 	.uo_balloc = ffs_balloc,
176 	.uo_snapgone = ffs_snapgone,
177 };
178 
179 static int
180 ffs_snapshot_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
181     void *arg0, void *arg1, void *arg2, void *arg3)
182 {
183 	vnode_t *vp = arg2;
184 	int result = KAUTH_RESULT_DEFER;;
185 
186 	if (action != KAUTH_SYSTEM_FS_SNAPSHOT)
187 		return result;
188 
189 	if (VTOI(vp)->i_uid == kauth_cred_geteuid(cred))
190 		result = KAUTH_RESULT_ALLOW;
191 
192 	return result;
193 }
194 
195 static int
196 ffs_modcmd(modcmd_t cmd, void *arg)
197 {
198 	int error;
199 
200 #if 0
201 	extern int doasyncfree;
202 #endif
203 #ifdef UFS_EXTATTR
204 	extern int ufs_extattr_autocreate;
205 #endif
206 	extern int ffs_log_changeopt;
207 
208 	switch (cmd) {
209 	case MODULE_CMD_INIT:
210 		error = vfs_attach(&ffs_vfsops);
211 		if (error != 0)
212 			break;
213 
214 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
215 			       CTLFLAG_PERMANENT,
216 			       CTLTYPE_NODE, "vfs", NULL,
217 			       NULL, 0, NULL, 0,
218 			       CTL_VFS, CTL_EOL);
219 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
220 			       CTLFLAG_PERMANENT,
221 			       CTLTYPE_NODE, "ffs",
222 			       SYSCTL_DESCR("Berkeley Fast File System"),
223 			       NULL, 0, NULL, 0,
224 			       CTL_VFS, 1, CTL_EOL);
225 		/*
226 		 * @@@ should we even bother with these first three?
227 		 */
228 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
229 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
230 			       CTLTYPE_INT, "doclusterread", NULL,
231 			       sysctl_notavail, 0, NULL, 0,
232 			       CTL_VFS, 1, FFS_CLUSTERREAD, CTL_EOL);
233 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
234 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
235 			       CTLTYPE_INT, "doclusterwrite", NULL,
236 			       sysctl_notavail, 0, NULL, 0,
237 			       CTL_VFS, 1, FFS_CLUSTERWRITE, CTL_EOL);
238 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
239 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
240 			       CTLTYPE_INT, "doreallocblks", NULL,
241 			       sysctl_notavail, 0, NULL, 0,
242 			       CTL_VFS, 1, FFS_REALLOCBLKS, CTL_EOL);
243 #if 0
244 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
245 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
246 			       CTLTYPE_INT, "doasyncfree",
247 			       SYSCTL_DESCR("Release dirty blocks asynchronously"),
248 			       NULL, 0, &doasyncfree, 0,
249 			       CTL_VFS, 1, FFS_ASYNCFREE, CTL_EOL);
250 #endif
251 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
252 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
253 			       CTLTYPE_INT, "log_changeopt",
254 			       SYSCTL_DESCR("Log changes in optimization strategy"),
255 			       NULL, 0, &ffs_log_changeopt, 0,
256 			       CTL_VFS, 1, FFS_LOG_CHANGEOPT, CTL_EOL);
257 #ifdef UFS_EXTATTR
258 		sysctl_createv(&ffs_sysctl_log, 0, NULL, NULL,
259 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
260 			       CTLTYPE_INT, "extattr_autocreate",
261 			       SYSCTL_DESCR("Size of attribute for "
262 					    "backing file autocreation"),
263 			       NULL, 0, &ufs_extattr_autocreate, 0,
264 			       CTL_VFS, 1, FFS_EXTATTR_AUTOCREATE, CTL_EOL);
265 
266 #endif /* UFS_EXTATTR */
267 
268 		ffs_snapshot_listener = kauth_listen_scope(KAUTH_SCOPE_SYSTEM,
269 		    ffs_snapshot_cb, NULL);
270 		if (ffs_snapshot_listener == NULL)
271 			printf("ffs_modcmd: can't listen on system scope.\n");
272 
273 		break;
274 	case MODULE_CMD_FINI:
275 		error = vfs_detach(&ffs_vfsops);
276 		if (error != 0)
277 			break;
278 		sysctl_teardown(&ffs_sysctl_log);
279 		if (ffs_snapshot_listener != NULL)
280 			kauth_unlisten_scope(ffs_snapshot_listener);
281 		break;
282 	default:
283 		error = ENOTTY;
284 		break;
285 	}
286 
287 	return (error);
288 }
289 
290 pool_cache_t ffs_inode_cache;
291 pool_cache_t ffs_dinode1_cache;
292 pool_cache_t ffs_dinode2_cache;
293 
294 static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, daddr_t);
295 static void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
296 
297 /*
298  * Called by main() when ffs is going to be mounted as root.
299  */
300 
301 int
302 ffs_mountroot(void)
303 {
304 	struct fs *fs;
305 	struct mount *mp;
306 	struct lwp *l = curlwp;			/* XXX */
307 	struct ufsmount *ump;
308 	int error;
309 
310 	if (device_class(root_device) != DV_DISK)
311 		return (ENODEV);
312 
313 	if ((error = vfs_rootmountalloc(MOUNT_FFS, "root_device", &mp))) {
314 		vrele(rootvp);
315 		return (error);
316 	}
317 
318 	/*
319 	 * We always need to be able to mount the root file system.
320 	 */
321 	mp->mnt_flag |= MNT_FORCE;
322 	if ((error = ffs_mountfs(rootvp, mp, l)) != 0) {
323 		vfs_unbusy(mp, false, NULL);
324 		vfs_destroy(mp);
325 		return (error);
326 	}
327 	mp->mnt_flag &= ~MNT_FORCE;
328 	mountlist_append(mp);
329 	ump = VFSTOUFS(mp);
330 	fs = ump->um_fs;
331 	memset(fs->fs_fsmnt, 0, sizeof(fs->fs_fsmnt));
332 	(void)copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
333 	(void)ffs_statvfs(mp, &mp->mnt_stat);
334 	vfs_unbusy(mp, false, NULL);
335 	setrootfstime((time_t)fs->fs_time);
336 	return (0);
337 }
338 
339 /*
340  * VFS Operations.
341  *
342  * mount system call
343  */
344 int
345 ffs_mount(struct mount *mp, const char *path, void *data, size_t *data_len)
346 {
347 	struct lwp *l = curlwp;
348 	struct vnode *devvp = NULL;
349 	struct ufs_args *args = data;
350 	struct ufsmount *ump = NULL;
351 	struct fs *fs;
352 	int error = 0, flags, update;
353 	mode_t accessmode;
354 
355 	if (*data_len < sizeof *args)
356 		return EINVAL;
357 
358 	if (mp->mnt_flag & MNT_GETARGS) {
359 		ump = VFSTOUFS(mp);
360 		if (ump == NULL)
361 			return EIO;
362 		args->fspec = NULL;
363 		*data_len = sizeof *args;
364 		return 0;
365 	}
366 
367 	update = mp->mnt_flag & MNT_UPDATE;
368 
369 	/* Check arguments */
370 	if (args->fspec != NULL) {
371 		/*
372 		 * Look up the name and verify that it's sane.
373 		 */
374 		error = namei_simple_user(args->fspec,
375 					NSM_FOLLOW_NOEMULROOT, &devvp);
376 		if (error != 0)
377 			return (error);
378 
379 		if (!update) {
380 			/*
381 			 * Be sure this is a valid block device
382 			 */
383 			if (devvp->v_type != VBLK)
384 				error = ENOTBLK;
385 			else if (bdevsw_lookup(devvp->v_rdev) == NULL)
386 				error = ENXIO;
387 		} else {
388 			/*
389 			 * Be sure we're still naming the same device
390 			 * used for our initial mount
391 			 */
392 			ump = VFSTOUFS(mp);
393 			if (devvp != ump->um_devvp) {
394 				if (devvp->v_rdev != ump->um_devvp->v_rdev)
395 					error = EINVAL;
396 				else {
397 					vrele(devvp);
398 					devvp = ump->um_devvp;
399 					vref(devvp);
400 				}
401 			}
402 		}
403 	} else {
404 		if (!update) {
405 			/* New mounts must have a filename for the device */
406 			return (EINVAL);
407 		} else {
408 			/* Use the extant mount */
409 			ump = VFSTOUFS(mp);
410 			devvp = ump->um_devvp;
411 			vref(devvp);
412 		}
413 	}
414 
415 	/*
416 	 * If mount by non-root, then verify that user has necessary
417 	 * permissions on the device.
418 	 *
419 	 * Permission to update a mount is checked higher, so here we presume
420 	 * updating the mount is okay (for example, as far as securelevel goes)
421 	 * which leaves us with the normal check.
422 	 */
423 	if (error == 0) {
424 		accessmode = VREAD;
425 		if (update ?
426 		    (mp->mnt_iflag & IMNT_WANTRDWR) != 0 :
427 		    (mp->mnt_flag & MNT_RDONLY) == 0)
428 			accessmode |= VWRITE;
429 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
430 		error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
431 		    KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp,
432 		    KAUTH_ARG(accessmode));
433 		VOP_UNLOCK(devvp);
434 	}
435 
436 	if (error) {
437 		vrele(devvp);
438 		return (error);
439 	}
440 
441 #ifdef WAPBL
442 	/* WAPBL can only be enabled on a r/w mount. */
443 	if ((mp->mnt_flag & MNT_RDONLY) && !(mp->mnt_iflag & IMNT_WANTRDWR)) {
444 		mp->mnt_flag &= ~MNT_LOG;
445 	}
446 #else /* !WAPBL */
447 	mp->mnt_flag &= ~MNT_LOG;
448 #endif /* !WAPBL */
449 
450 	if (!update) {
451 		int xflags;
452 
453 		if (mp->mnt_flag & MNT_RDONLY)
454 			xflags = FREAD;
455 		else
456 			xflags = FREAD | FWRITE;
457 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
458 		error = VOP_OPEN(devvp, xflags, FSCRED);
459 		VOP_UNLOCK(devvp);
460 		if (error)
461 			goto fail;
462 		error = ffs_mountfs(devvp, mp, l);
463 		if (error) {
464 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
465 			(void)VOP_CLOSE(devvp, xflags, NOCRED);
466 			VOP_UNLOCK(devvp);
467 			goto fail;
468 		}
469 
470 		ump = VFSTOUFS(mp);
471 		fs = ump->um_fs;
472 	} else {
473 		/*
474 		 * Update the mount.
475 		 */
476 
477 		/*
478 		 * The initial mount got a reference on this
479 		 * device, so drop the one obtained via
480 		 * namei(), above.
481 		 */
482 		vrele(devvp);
483 
484 		ump = VFSTOUFS(mp);
485 		fs = ump->um_fs;
486 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
487 			/*
488 			 * Changing from r/w to r/o
489 			 */
490 			flags = WRITECLOSE;
491 			if (mp->mnt_flag & MNT_FORCE)
492 				flags |= FORCECLOSE;
493 			error = ffs_flushfiles(mp, flags, l);
494 			if (error == 0)
495 				error = UFS_WAPBL_BEGIN(mp);
496 			if (error == 0 &&
497 			    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
498 			    fs->fs_clean & FS_WASCLEAN) {
499 				if (mp->mnt_flag & MNT_SOFTDEP)
500 					fs->fs_flags &= ~FS_DOSOFTDEP;
501 				fs->fs_clean = FS_ISCLEAN;
502 				(void) ffs_sbupdate(ump, MNT_WAIT);
503 			}
504 			if (error == 0)
505 				UFS_WAPBL_END(mp);
506 			if (error)
507 				return (error);
508 		}
509 
510 #ifdef WAPBL
511 		if ((mp->mnt_flag & MNT_LOG) == 0) {
512 			error = ffs_wapbl_stop(mp, mp->mnt_flag & MNT_FORCE);
513 			if (error)
514 				return error;
515 		}
516 #endif /* WAPBL */
517 
518 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
519 			/*
520 			 * Finish change from r/w to r/o
521 			 */
522 			fs->fs_ronly = 1;
523 			fs->fs_fmod = 0;
524 		}
525 
526 		if (mp->mnt_flag & MNT_RELOAD) {
527 			error = ffs_reload(mp, l->l_cred, l);
528 			if (error)
529 				return (error);
530 		}
531 
532 		if (fs->fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) {
533 			/*
534 			 * Changing from read-only to read/write
535 			 */
536 #ifndef QUOTA2
537 			if (fs->fs_flags & FS_DOQUOTA2) {
538 				ump->um_flags |= UFS_QUOTA2;
539 				uprintf("%s: options QUOTA2 not enabled%s\n",
540 				    mp->mnt_stat.f_mntonname,
541 				    (mp->mnt_flag & MNT_FORCE) ? "" :
542 				    ", not mounting");
543 				return EINVAL;
544 			}
545 #endif
546 			fs->fs_ronly = 0;
547 			fs->fs_clean <<= 1;
548 			fs->fs_fmod = 1;
549 #ifdef WAPBL
550 			if (fs->fs_flags & FS_DOWAPBL) {
551 				printf("%s: replaying log to disk\n",
552 				    mp->mnt_stat.f_mntonname);
553 				KDASSERT(mp->mnt_wapbl_replay);
554 				error = wapbl_replay_write(mp->mnt_wapbl_replay,
555 							   devvp);
556 				if (error) {
557 					return error;
558 				}
559 				wapbl_replay_stop(mp->mnt_wapbl_replay);
560 				fs->fs_clean = FS_WASCLEAN;
561 			}
562 #endif /* WAPBL */
563 			if (fs->fs_snapinum[0] != 0)
564 				ffs_snapshot_mount(mp);
565 		}
566 
567 #ifdef WAPBL
568 		error = ffs_wapbl_start(mp);
569 		if (error)
570 			return error;
571 #endif /* WAPBL */
572 
573 #ifdef QUOTA2
574 		if (!fs->fs_ronly) {
575 			error = ffs_quota2_mount(mp);
576 			if (error) {
577 				return error;
578 			}
579 		}
580 #endif
581 
582 		if ((mp->mnt_flag & MNT_DISCARD) && !(ump->um_discarddata))
583 			ump->um_discarddata = ffs_discard_init(devvp, fs);
584 
585 		if (args->fspec == NULL)
586 			return 0;
587 	}
588 
589 	error = set_statvfs_info(path, UIO_USERSPACE, args->fspec,
590 	    UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l);
591 	if (error == 0)
592 		(void)strncpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname,
593 		    sizeof(fs->fs_fsmnt));
594 	fs->fs_flags &= ~FS_DOSOFTDEP;
595 	if (fs->fs_fmod != 0) {	/* XXX */
596 		int err;
597 
598 		fs->fs_fmod = 0;
599 		if (fs->fs_clean & FS_WASCLEAN)
600 			fs->fs_time = time_second;
601 		else {
602 			printf("%s: file system not clean (fs_clean=%#x); "
603 			    "please fsck(8)\n", mp->mnt_stat.f_mntfromname,
604 			    fs->fs_clean);
605 			printf("%s: lost blocks %" PRId64 " files %d\n",
606 			    mp->mnt_stat.f_mntfromname, fs->fs_pendingblocks,
607 			    fs->fs_pendinginodes);
608 		}
609 		err = UFS_WAPBL_BEGIN(mp);
610 		if (err == 0) {
611 			(void) ffs_cgupdate(ump, MNT_WAIT);
612 			UFS_WAPBL_END(mp);
613 		}
614 	}
615 	if ((mp->mnt_flag & MNT_SOFTDEP) != 0) {
616 		printf("%s: `-o softdep' is no longer supported, "
617 		    "consider `-o log'\n", mp->mnt_stat.f_mntfromname);
618 		mp->mnt_flag &= ~MNT_SOFTDEP;
619 	}
620 
621 	return (error);
622 
623 fail:
624 	vrele(devvp);
625 	return (error);
626 }
627 
628 /*
629  * Reload all incore data for a filesystem (used after running fsck on
630  * the root filesystem and finding things to fix). The filesystem must
631  * be mounted read-only.
632  *
633  * Things to do to update the mount:
634  *	1) invalidate all cached meta-data.
635  *	2) re-read superblock from disk.
636  *	3) re-read summary information from disk.
637  *	4) invalidate all inactive vnodes.
638  *	5) invalidate all cached file data.
639  *	6) re-read inode data for all active vnodes.
640  */
641 int
642 ffs_reload(struct mount *mp, kauth_cred_t cred, struct lwp *l)
643 {
644 	struct vnode *vp, *mvp, *devvp;
645 	struct inode *ip;
646 	void *space;
647 	struct buf *bp;
648 	struct fs *fs, *newfs;
649 	struct dkwedge_info dkw;
650 	int i, bsize, blks, error;
651 	int32_t *lp;
652 	struct ufsmount *ump;
653 	daddr_t sblockloc;
654 
655 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
656 		return (EINVAL);
657 
658 	ump = VFSTOUFS(mp);
659 	/*
660 	 * Step 1: invalidate all cached meta-data.
661 	 */
662 	devvp = ump->um_devvp;
663 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
664 	error = vinvalbuf(devvp, 0, cred, l, 0, 0);
665 	VOP_UNLOCK(devvp);
666 	if (error)
667 		panic("ffs_reload: dirty1");
668 	/*
669 	 * Step 2: re-read superblock from disk.
670 	 */
671 	fs = ump->um_fs;
672 
673 	/* XXX we don't handle possibility that superblock moved. */
674 	error = bread(devvp, fs->fs_sblockloc / DEV_BSIZE, fs->fs_sbsize,
675 		      NOCRED, 0, &bp);
676 	if (error) {
677 		return (error);
678 	}
679 	newfs = kmem_alloc(fs->fs_sbsize, KM_SLEEP);
680 	memcpy(newfs, bp->b_data, fs->fs_sbsize);
681 #ifdef FFS_EI
682 	if (ump->um_flags & UFS_NEEDSWAP) {
683 		ffs_sb_swap((struct fs*)bp->b_data, newfs);
684 		fs->fs_flags |= FS_SWAPPED;
685 	} else
686 #endif
687 		fs->fs_flags &= ~FS_SWAPPED;
688 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
689 	     newfs->fs_magic != FS_UFS2_MAGIC)||
690 	     newfs->fs_bsize > MAXBSIZE ||
691 	     newfs->fs_bsize < sizeof(struct fs)) {
692 		brelse(bp, 0);
693 		kmem_free(newfs, fs->fs_sbsize);
694 		return (EIO);		/* XXX needs translation */
695 	}
696 	/* Store off old fs_sblockloc for fs_oldfscompat_read. */
697 	sblockloc = fs->fs_sblockloc;
698 	/*
699 	 * Copy pointer fields back into superblock before copying in	XXX
700 	 * new superblock. These should really be in the ufsmount.	XXX
701 	 * Note that important parameters (eg fs_ncg) are unchanged.
702 	 */
703 	newfs->fs_csp = fs->fs_csp;
704 	newfs->fs_maxcluster = fs->fs_maxcluster;
705 	newfs->fs_contigdirs = fs->fs_contigdirs;
706 	newfs->fs_ronly = fs->fs_ronly;
707 	newfs->fs_active = fs->fs_active;
708 	memcpy(fs, newfs, (u_int)fs->fs_sbsize);
709 	brelse(bp, 0);
710 	kmem_free(newfs, fs->fs_sbsize);
711 
712 	/* Recheck for apple UFS filesystem */
713 	ump->um_flags &= ~UFS_ISAPPLEUFS;
714 	/* First check to see if this is tagged as an Apple UFS filesystem
715 	 * in the disklabel
716 	 */
717 	if (getdiskinfo(devvp, &dkw) == 0 &&
718 	    strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
719 		ump->um_flags |= UFS_ISAPPLEUFS;
720 #ifdef APPLE_UFS
721 	else {
722 		/* Manually look for an apple ufs label, and if a valid one
723 		 * is found, then treat it like an Apple UFS filesystem anyway
724 		 *
725 		 * EINVAL is most probably a blocksize or alignment problem,
726 		 * it is unlikely that this is an Apple UFS filesystem then.
727 		 */
728 		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
729 			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
730 		if (error && error != EINVAL) {
731 			return (error);
732 		}
733 		if (error == 0) {
734 			error = ffs_appleufs_validate(fs->fs_fsmnt,
735 				(struct appleufslabel *)bp->b_data, NULL);
736 			if (error == 0)
737 				ump->um_flags |= UFS_ISAPPLEUFS;
738 			brelse(bp, 0);
739 		}
740 		bp = NULL;
741 	}
742 #else
743 	if (ump->um_flags & UFS_ISAPPLEUFS)
744 		return (EIO);
745 #endif
746 
747 	if (UFS_MPISAPPLEUFS(ump)) {
748 		/* see comment about NeXT below */
749 		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
750 		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
751 		mp->mnt_iflag |= IMNT_DTYPE;
752 	} else {
753 		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
754 		ump->um_dirblksiz = UFS_DIRBLKSIZ;
755 		if (ump->um_maxsymlinklen > 0)
756 			mp->mnt_iflag |= IMNT_DTYPE;
757 		else
758 			mp->mnt_iflag &= ~IMNT_DTYPE;
759 	}
760 	ffs_oldfscompat_read(fs, ump, sblockloc);
761 
762 	mutex_enter(&ump->um_lock);
763 	ump->um_maxfilesize = fs->fs_maxfilesize;
764 	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
765 		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
766 		    mp->mnt_stat.f_mntonname, fs->fs_flags,
767 		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
768 		if ((mp->mnt_flag & MNT_FORCE) == 0) {
769 			mutex_exit(&ump->um_lock);
770 			return (EINVAL);
771 		}
772 	}
773 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
774 		fs->fs_pendingblocks = 0;
775 		fs->fs_pendinginodes = 0;
776 	}
777 	mutex_exit(&ump->um_lock);
778 
779 	ffs_statvfs(mp, &mp->mnt_stat);
780 	/*
781 	 * Step 3: re-read summary information from disk.
782 	 */
783 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
784 	space = fs->fs_csp;
785 	for (i = 0; i < blks; i += fs->fs_frag) {
786 		bsize = fs->fs_bsize;
787 		if (i + fs->fs_frag > blks)
788 			bsize = (blks - i) * fs->fs_fsize;
789 		error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
790 			      NOCRED, 0, &bp);
791 		if (error) {
792 			return (error);
793 		}
794 #ifdef FFS_EI
795 		if (UFS_FSNEEDSWAP(fs))
796 			ffs_csum_swap((struct csum *)bp->b_data,
797 			    (struct csum *)space, bsize);
798 		else
799 #endif
800 			memcpy(space, bp->b_data, (size_t)bsize);
801 		space = (char *)space + bsize;
802 		brelse(bp, 0);
803 	}
804 	/*
805 	 * We no longer know anything about clusters per cylinder group.
806 	 */
807 	if (fs->fs_contigsumsize > 0) {
808 		lp = fs->fs_maxcluster;
809 		for (i = 0; i < fs->fs_ncg; i++)
810 			*lp++ = fs->fs_contigsumsize;
811 	}
812 
813 	/* Allocate a marker vnode. */
814 	mvp = vnalloc(mp);
815 	/*
816 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
817 	 * and vclean() can be called indirectly
818 	 */
819 	mutex_enter(&mntvnode_lock);
820  loop:
821 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
822 		vmark(mvp, vp);
823 		if (vp->v_mount != mp || vismarker(vp))
824 			continue;
825 		/*
826 		 * Step 4: invalidate all inactive vnodes.
827 		 */
828 		if (vrecycle(vp, &mntvnode_lock)) {
829 			mutex_enter(&mntvnode_lock);
830 			(void)vunmark(mvp);
831 			goto loop;
832 		}
833 		/*
834 		 * Step 5: invalidate all cached file data.
835 		 */
836 		mutex_enter(vp->v_interlock);
837 		mutex_exit(&mntvnode_lock);
838 		if (vget(vp, LK_EXCLUSIVE)) {
839 			(void)vunmark(mvp);
840 			goto loop;
841 		}
842 		if (vinvalbuf(vp, 0, cred, l, 0, 0))
843 			panic("ffs_reload: dirty2");
844 		/*
845 		 * Step 6: re-read inode data for all active vnodes.
846 		 */
847 		ip = VTOI(vp);
848 		error = bread(devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ip->i_number)),
849 			      (int)fs->fs_bsize, NOCRED, 0, &bp);
850 		if (error) {
851 			vput(vp);
852 			(void)vunmark(mvp);
853 			break;
854 		}
855 		ffs_load_inode(bp, ip, fs, ip->i_number);
856 		brelse(bp, 0);
857 		vput(vp);
858 		mutex_enter(&mntvnode_lock);
859 	}
860 	mutex_exit(&mntvnode_lock);
861 	vnfree(mvp);
862 	return (error);
863 }
864 
865 /*
866  * Possible superblock locations ordered from most to least likely.
867  */
868 static const int sblock_try[] = SBLOCKSEARCH;
869 
870 /*
871  * Common code for mount and mountroot
872  */
873 int
874 ffs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l)
875 {
876 	struct ufsmount *ump;
877 	struct buf *bp;
878 	struct fs *fs;
879 	dev_t dev;
880 	struct dkwedge_info dkw;
881 	void *space;
882 	daddr_t sblockloc, fsblockloc;
883 	int blks, fstype;
884 	int error, i, bsize, ronly, bset = 0;
885 #ifdef FFS_EI
886 	int needswap = 0;		/* keep gcc happy */
887 #endif
888 	int32_t *lp;
889 	kauth_cred_t cred;
890 	u_int32_t sbsize = 8192;	/* keep gcc happy*/
891 	u_int32_t allocsbsize;
892 	int32_t fsbsize;
893 
894 	dev = devvp->v_rdev;
895 	cred = l ? l->l_cred : NOCRED;
896 
897 	/* Flush out any old buffers remaining from a previous use. */
898 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
899 	error = vinvalbuf(devvp, V_SAVE, cred, l, 0, 0);
900 	VOP_UNLOCK(devvp);
901 	if (error)
902 		return (error);
903 
904 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
905 
906 	bp = NULL;
907 	ump = NULL;
908 	fs = NULL;
909 	sblockloc = 0;
910 	fstype = 0;
911 
912 	error = fstrans_mount(mp);
913 	if (error)
914 		return error;
915 
916 	ump = kmem_zalloc(sizeof(*ump), KM_SLEEP);
917 	mutex_init(&ump->um_lock, MUTEX_DEFAULT, IPL_NONE);
918 	error = ffs_snapshot_init(ump);
919 	if (error)
920 		goto out;
921 	ump->um_ops = &ffs_ufsops;
922 
923 #ifdef WAPBL
924  sbagain:
925 #endif
926 	/*
927 	 * Try reading the superblock in each of its possible locations.
928 	 */
929 	for (i = 0; ; i++) {
930 		if (bp != NULL) {
931 			brelse(bp, BC_NOCACHE);
932 			bp = NULL;
933 		}
934 		if (sblock_try[i] == -1) {
935 			error = EINVAL;
936 			fs = NULL;
937 			goto out;
938 		}
939 		error = bread(devvp, sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, cred,
940 			      0, &bp);
941 		if (error) {
942 			fs = NULL;
943 			goto out;
944 		}
945 		fs = (struct fs*)bp->b_data;
946 		fsblockloc = sblockloc = sblock_try[i];
947 		if (fs->fs_magic == FS_UFS1_MAGIC) {
948 			sbsize = fs->fs_sbsize;
949 			fstype = UFS1;
950 			fsbsize = fs->fs_bsize;
951 #ifdef FFS_EI
952 			needswap = 0;
953 		} else if (fs->fs_magic == FS_UFS1_MAGIC_SWAPPED) {
954 			sbsize = bswap32(fs->fs_sbsize);
955 			fstype = UFS1;
956 			fsbsize = bswap32(fs->fs_bsize);
957 			needswap = 1;
958 #endif
959 		} else if (fs->fs_magic == FS_UFS2_MAGIC) {
960 			sbsize = fs->fs_sbsize;
961 			fstype = UFS2;
962 			fsbsize = fs->fs_bsize;
963 #ifdef FFS_EI
964 			needswap = 0;
965 		} else if (fs->fs_magic == FS_UFS2_MAGIC_SWAPPED) {
966 			sbsize = bswap32(fs->fs_sbsize);
967 			fstype = UFS2;
968 			fsbsize = bswap32(fs->fs_bsize);
969 			needswap = 1;
970 #endif
971 		} else
972 			continue;
973 
974 
975 		/* fs->fs_sblockloc isn't defined for old filesystems */
976 		if (fstype == UFS1 && !(fs->fs_old_flags & FS_FLAGS_UPDATED)) {
977 			if (sblockloc == SBLOCK_UFS2)
978 				/*
979 				 * This is likely to be the first alternate
980 				 * in a filesystem with 64k blocks.
981 				 * Don't use it.
982 				 */
983 				continue;
984 			fsblockloc = sblockloc;
985 		} else {
986 			fsblockloc = fs->fs_sblockloc;
987 #ifdef FFS_EI
988 			if (needswap)
989 				fsblockloc = bswap64(fsblockloc);
990 #endif
991 		}
992 
993 		/* Check we haven't found an alternate superblock */
994 		if (fsblockloc != sblockloc)
995 			continue;
996 
997 		/* Validate size of superblock */
998 		if (sbsize > MAXBSIZE || sbsize < sizeof(struct fs))
999 			continue;
1000 
1001 		/* Check that we can handle the file system blocksize */
1002 		if (fsbsize > MAXBSIZE) {
1003 			printf("ffs_mountfs: block size (%d) > MAXBSIZE (%d)\n",
1004 			    fsbsize, MAXBSIZE);
1005 			continue;
1006 		}
1007 
1008 		/* Ok seems to be a good superblock */
1009 		break;
1010 	}
1011 
1012 	fs = kmem_alloc((u_long)sbsize, KM_SLEEP);
1013 	memcpy(fs, bp->b_data, sbsize);
1014 	ump->um_fs = fs;
1015 
1016 #ifdef FFS_EI
1017 	if (needswap) {
1018 		ffs_sb_swap((struct fs*)bp->b_data, fs);
1019 		fs->fs_flags |= FS_SWAPPED;
1020 	} else
1021 #endif
1022 		fs->fs_flags &= ~FS_SWAPPED;
1023 
1024 #ifdef WAPBL
1025 	if ((mp->mnt_wapbl_replay == 0) && (fs->fs_flags & FS_DOWAPBL)) {
1026 		error = ffs_wapbl_replay_start(mp, fs, devvp);
1027 		if (error && (mp->mnt_flag & MNT_FORCE) == 0)
1028 			goto out;
1029 		if (!error) {
1030 			if (!ronly) {
1031 				/* XXX fsmnt may be stale. */
1032 				printf("%s: replaying log to disk\n",
1033 				    fs->fs_fsmnt);
1034 				error = wapbl_replay_write(mp->mnt_wapbl_replay,
1035 				    devvp);
1036 				if (error)
1037 					goto out;
1038 				wapbl_replay_stop(mp->mnt_wapbl_replay);
1039 				fs->fs_clean = FS_WASCLEAN;
1040 			} else {
1041 				/* XXX fsmnt may be stale */
1042 				printf("%s: replaying log to memory\n",
1043 				    fs->fs_fsmnt);
1044 			}
1045 
1046 			/* Force a re-read of the superblock */
1047 			brelse(bp, BC_INVAL);
1048 			bp = NULL;
1049 			kmem_free(fs, sbsize);
1050 			fs = NULL;
1051 			goto sbagain;
1052 		}
1053 	}
1054 #else /* !WAPBL */
1055 	if ((fs->fs_flags & FS_DOWAPBL) && (mp->mnt_flag & MNT_FORCE) == 0) {
1056 		error = EPERM;
1057 		goto out;
1058 	}
1059 #endif /* !WAPBL */
1060 
1061 	ffs_oldfscompat_read(fs, ump, sblockloc);
1062 	ump->um_maxfilesize = fs->fs_maxfilesize;
1063 
1064 	if (fs->fs_flags & ~(FS_KNOWN_FLAGS | FS_INTERNAL)) {
1065 		uprintf("%s: unknown ufs flags: 0x%08"PRIx32"%s\n",
1066 		    mp->mnt_stat.f_mntonname, fs->fs_flags,
1067 		    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1068 		if ((mp->mnt_flag & MNT_FORCE) == 0) {
1069 			error = EINVAL;
1070 			goto out;
1071 		}
1072 	}
1073 
1074 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
1075 		fs->fs_pendingblocks = 0;
1076 		fs->fs_pendinginodes = 0;
1077 	}
1078 
1079 	ump->um_fstype = fstype;
1080 	if (fs->fs_sbsize < SBLOCKSIZE)
1081 		brelse(bp, BC_INVAL);
1082 	else
1083 		brelse(bp, 0);
1084 	bp = NULL;
1085 
1086 	/* First check to see if this is tagged as an Apple UFS filesystem
1087 	 * in the disklabel
1088 	 */
1089 	if (getdiskinfo(devvp, &dkw) == 0 &&
1090 	    strcmp(dkw.dkw_ptype, DKW_PTYPE_APPLEUFS) == 0)
1091 		ump->um_flags |= UFS_ISAPPLEUFS;
1092 #ifdef APPLE_UFS
1093 	else {
1094 		/* Manually look for an apple ufs label, and if a valid one
1095 		 * is found, then treat it like an Apple UFS filesystem anyway
1096 		 */
1097 		error = bread(devvp, (daddr_t)(APPLEUFS_LABEL_OFFSET / DEV_BSIZE),
1098 			APPLEUFS_LABEL_SIZE, cred, 0, &bp);
1099 		if (error)
1100 			goto out;
1101 		error = ffs_appleufs_validate(fs->fs_fsmnt,
1102 			(struct appleufslabel *)bp->b_data, NULL);
1103 		if (error == 0) {
1104 			ump->um_flags |= UFS_ISAPPLEUFS;
1105 		}
1106 		brelse(bp, 0);
1107 		bp = NULL;
1108 	}
1109 #else
1110 	if (ump->um_flags & UFS_ISAPPLEUFS) {
1111 		error = EINVAL;
1112 		goto out;
1113 	}
1114 #endif
1115 
1116 #if 0
1117 /*
1118  * XXX This code changes the behaviour of mounting dirty filesystems, to
1119  * XXX require "mount -f ..." to mount them.  This doesn't match what
1120  * XXX mount(8) describes and is disabled for now.
1121  */
1122 	/*
1123 	 * If the file system is not clean, don't allow it to be mounted
1124 	 * unless MNT_FORCE is specified.  (Note: MNT_FORCE is always set
1125 	 * for the root file system.)
1126 	 */
1127 	if (fs->fs_flags & FS_DOWAPBL) {
1128 		/*
1129 		 * wapbl normally expects to be FS_WASCLEAN when the FS_DOWAPBL
1130 		 * bit is set, although there's a window in unmount where it
1131 		 * could be FS_ISCLEAN
1132 		 */
1133 		if ((mp->mnt_flag & MNT_FORCE) == 0 &&
1134 		    (fs->fs_clean & (FS_WASCLEAN | FS_ISCLEAN)) == 0) {
1135 			error = EPERM;
1136 			goto out;
1137 		}
1138 	} else
1139 		if ((fs->fs_clean & FS_ISCLEAN) == 0 &&
1140 		    (mp->mnt_flag & MNT_FORCE) == 0) {
1141 			error = EPERM;
1142 			goto out;
1143 		}
1144 #endif
1145 
1146 	/*
1147 	 * verify that we can access the last block in the fs
1148 	 * if we're mounting read/write.
1149 	 */
1150 
1151 	if (!ronly) {
1152 		error = bread(devvp, FFS_FSBTODB(fs, fs->fs_size - 1), fs->fs_fsize,
1153 		    cred, 0, &bp);
1154 		if (bp->b_bcount != fs->fs_fsize)
1155 			error = EINVAL;
1156 		if (error) {
1157 			bset = BC_INVAL;
1158 			goto out;
1159 		}
1160 		brelse(bp, BC_INVAL);
1161 		bp = NULL;
1162 	}
1163 
1164 	fs->fs_ronly = ronly;
1165 	/* Don't bump fs_clean if we're replaying journal */
1166 	if (!((fs->fs_flags & FS_DOWAPBL) && (fs->fs_clean & FS_WASCLEAN)))
1167 		if (ronly == 0) {
1168 			fs->fs_clean <<= 1;
1169 			fs->fs_fmod = 1;
1170 		}
1171 	bsize = fs->fs_cssize;
1172 	blks = howmany(bsize, fs->fs_fsize);
1173 	if (fs->fs_contigsumsize > 0)
1174 		bsize += fs->fs_ncg * sizeof(int32_t);
1175 	bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1176 	allocsbsize = bsize;
1177 	space = kmem_alloc((u_long)allocsbsize, KM_SLEEP);
1178 	fs->fs_csp = space;
1179 	for (i = 0; i < blks; i += fs->fs_frag) {
1180 		bsize = fs->fs_bsize;
1181 		if (i + fs->fs_frag > blks)
1182 			bsize = (blks - i) * fs->fs_fsize;
1183 		error = bread(devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i), bsize,
1184 			      cred, 0, &bp);
1185 		if (error) {
1186 			kmem_free(fs->fs_csp, allocsbsize);
1187 			goto out;
1188 		}
1189 #ifdef FFS_EI
1190 		if (needswap)
1191 			ffs_csum_swap((struct csum *)bp->b_data,
1192 				(struct csum *)space, bsize);
1193 		else
1194 #endif
1195 			memcpy(space, bp->b_data, (u_int)bsize);
1196 
1197 		space = (char *)space + bsize;
1198 		brelse(bp, 0);
1199 		bp = NULL;
1200 	}
1201 	if (fs->fs_contigsumsize > 0) {
1202 		fs->fs_maxcluster = lp = space;
1203 		for (i = 0; i < fs->fs_ncg; i++)
1204 			*lp++ = fs->fs_contigsumsize;
1205 		space = lp;
1206 	}
1207 	bsize = fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1208 	fs->fs_contigdirs = space;
1209 	space = (char *)space + bsize;
1210 	memset(fs->fs_contigdirs, 0, bsize);
1211 		/* Compatibility for old filesystems - XXX */
1212 	if (fs->fs_avgfilesize <= 0)
1213 		fs->fs_avgfilesize = AVFILESIZ;
1214 	if (fs->fs_avgfpdir <= 0)
1215 		fs->fs_avgfpdir = AFPDIR;
1216 	fs->fs_active = NULL;
1217 	mp->mnt_data = ump;
1218 	mp->mnt_stat.f_fsidx.__fsid_val[0] = (long)dev;
1219 	mp->mnt_stat.f_fsidx.__fsid_val[1] = makefstype(MOUNT_FFS);
1220 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
1221 	mp->mnt_stat.f_namemax = FFS_MAXNAMLEN;
1222 	if (UFS_MPISAPPLEUFS(ump)) {
1223 		/* NeXT used to keep short symlinks in the inode even
1224 		 * when using FS_42INODEFMT.  In that case fs->fs_maxsymlinklen
1225 		 * is probably -1, but we still need to be able to identify
1226 		 * short symlinks.
1227 		 */
1228 		ump->um_maxsymlinklen = APPLEUFS_MAXSYMLINKLEN;
1229 		ump->um_dirblksiz = APPLEUFS_DIRBLKSIZ;
1230 		mp->mnt_iflag |= IMNT_DTYPE;
1231 	} else {
1232 		ump->um_maxsymlinklen = fs->fs_maxsymlinklen;
1233 		ump->um_dirblksiz = UFS_DIRBLKSIZ;
1234 		if (ump->um_maxsymlinklen > 0)
1235 			mp->mnt_iflag |= IMNT_DTYPE;
1236 		else
1237 			mp->mnt_iflag &= ~IMNT_DTYPE;
1238 	}
1239 	mp->mnt_fs_bshift = fs->fs_bshift;
1240 	mp->mnt_dev_bshift = DEV_BSHIFT;	/* XXX */
1241 	mp->mnt_flag |= MNT_LOCAL;
1242 	mp->mnt_iflag |= IMNT_MPSAFE;
1243 #ifdef FFS_EI
1244 	if (needswap)
1245 		ump->um_flags |= UFS_NEEDSWAP;
1246 #endif
1247 	ump->um_mountp = mp;
1248 	ump->um_dev = dev;
1249 	ump->um_devvp = devvp;
1250 	ump->um_nindir = fs->fs_nindir;
1251 	ump->um_lognindir = ffs(fs->fs_nindir) - 1;
1252 	ump->um_bptrtodb = fs->fs_fshift - DEV_BSHIFT;
1253 	ump->um_seqinc = fs->fs_frag;
1254 	for (i = 0; i < MAXQUOTAS; i++)
1255 		ump->um_quotas[i] = NULLVP;
1256 	spec_node_setmountedfs(devvp, mp);
1257 	if (ronly == 0 && fs->fs_snapinum[0] != 0)
1258 		ffs_snapshot_mount(mp);
1259 #ifdef WAPBL
1260 	if (!ronly) {
1261 		KDASSERT(fs->fs_ronly == 0);
1262 		/*
1263 		 * ffs_wapbl_start() needs mp->mnt_stat initialised if it
1264 		 * needs to create a new log file in-filesystem.
1265 		 */
1266 		ffs_statvfs(mp, &mp->mnt_stat);
1267 
1268 		error = ffs_wapbl_start(mp);
1269 		if (error) {
1270 			kmem_free(fs->fs_csp, allocsbsize);
1271 			goto out;
1272 		}
1273 	}
1274 #endif /* WAPBL */
1275 	if (ronly == 0) {
1276 #ifdef QUOTA2
1277 		error = ffs_quota2_mount(mp);
1278 		if (error) {
1279 			kmem_free(fs->fs_csp, allocsbsize);
1280 			goto out;
1281 		}
1282 #else
1283 		if (fs->fs_flags & FS_DOQUOTA2) {
1284 			ump->um_flags |= UFS_QUOTA2;
1285 			uprintf("%s: options QUOTA2 not enabled%s\n",
1286 			    mp->mnt_stat.f_mntonname,
1287 			    (mp->mnt_flag & MNT_FORCE) ? "" : ", not mounting");
1288 			if ((mp->mnt_flag & MNT_FORCE) == 0) {
1289 				error = EINVAL;
1290 				kmem_free(fs->fs_csp, allocsbsize);
1291 				goto out;
1292 			}
1293 		}
1294 #endif
1295 	 }
1296 #ifdef UFS_EXTATTR
1297 	/*
1298 	 * Initialize file-backed extended attributes on UFS1 file
1299 	 * systems.
1300 	 */
1301 	if (ump->um_fstype == UFS1)
1302 		ufs_extattr_uepm_init(&ump->um_extattr);
1303 #endif /* UFS_EXTATTR */
1304 
1305 	if (mp->mnt_flag & MNT_DISCARD)
1306 		ump->um_discarddata = ffs_discard_init(devvp, fs);
1307 
1308 	return (0);
1309 out:
1310 #ifdef WAPBL
1311 	if (mp->mnt_wapbl_replay) {
1312 		wapbl_replay_stop(mp->mnt_wapbl_replay);
1313 		wapbl_replay_free(mp->mnt_wapbl_replay);
1314 		mp->mnt_wapbl_replay = 0;
1315 	}
1316 #endif
1317 
1318 	fstrans_unmount(mp);
1319 	if (fs)
1320 		kmem_free(fs, fs->fs_sbsize);
1321 	spec_node_setmountedfs(devvp, NULL);
1322 	if (bp)
1323 		brelse(bp, bset);
1324 	if (ump) {
1325 		if (ump->um_oldfscompat)
1326 			kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1327 		mutex_destroy(&ump->um_lock);
1328 		kmem_free(ump, sizeof(*ump));
1329 		mp->mnt_data = NULL;
1330 	}
1331 	return (error);
1332 }
1333 
1334 /*
1335  * Sanity checks for loading old filesystem superblocks.
1336  * See ffs_oldfscompat_write below for unwound actions.
1337  *
1338  * XXX - Parts get retired eventually.
1339  * Unfortunately new bits get added.
1340  */
1341 static void
1342 ffs_oldfscompat_read(struct fs *fs, struct ufsmount *ump, daddr_t sblockloc)
1343 {
1344 	off_t maxfilesize;
1345 	int32_t *extrasave;
1346 
1347 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1348 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1349 		return;
1350 
1351 	if (!ump->um_oldfscompat)
1352 		ump->um_oldfscompat = kmem_alloc(512 + 3*sizeof(int32_t),
1353 		    KM_SLEEP);
1354 
1355 	memcpy(ump->um_oldfscompat, &fs->fs_old_postbl_start, 512);
1356 	extrasave = ump->um_oldfscompat;
1357 	extrasave += 512/sizeof(int32_t);
1358 	extrasave[0] = fs->fs_old_npsect;
1359 	extrasave[1] = fs->fs_old_interleave;
1360 	extrasave[2] = fs->fs_old_trackskew;
1361 
1362 	/* These fields will be overwritten by their
1363 	 * original values in fs_oldfscompat_write, so it is harmless
1364 	 * to modify them here.
1365 	 */
1366 	fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
1367 	fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
1368 	fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
1369 	fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
1370 
1371 	fs->fs_maxbsize = fs->fs_bsize;
1372 	fs->fs_time = fs->fs_old_time;
1373 	fs->fs_size = fs->fs_old_size;
1374 	fs->fs_dsize = fs->fs_old_dsize;
1375 	fs->fs_csaddr = fs->fs_old_csaddr;
1376 	fs->fs_sblockloc = sblockloc;
1377 
1378 	fs->fs_flags = fs->fs_old_flags | (fs->fs_flags & FS_INTERNAL);
1379 
1380 	if (fs->fs_old_postblformat == FS_42POSTBLFMT) {
1381 		fs->fs_old_nrpos = 8;
1382 		fs->fs_old_npsect = fs->fs_old_nsect;
1383 		fs->fs_old_interleave = 1;
1384 		fs->fs_old_trackskew = 0;
1385 	}
1386 
1387 	if (fs->fs_old_inodefmt < FS_44INODEFMT) {
1388 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
1389 		fs->fs_qbmask = ~fs->fs_bmask;
1390 		fs->fs_qfmask = ~fs->fs_fmask;
1391 	}
1392 
1393 	maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1;
1394 	if (fs->fs_maxfilesize > maxfilesize)
1395 		fs->fs_maxfilesize = maxfilesize;
1396 
1397 	/* Compatibility for old filesystems */
1398 	if (fs->fs_avgfilesize <= 0)
1399 		fs->fs_avgfilesize = AVFILESIZ;
1400 	if (fs->fs_avgfpdir <= 0)
1401 		fs->fs_avgfpdir = AFPDIR;
1402 
1403 #if 0
1404 	if (bigcgs) {
1405 		fs->fs_save_cgsize = fs->fs_cgsize;
1406 		fs->fs_cgsize = fs->fs_bsize;
1407 	}
1408 #endif
1409 }
1410 
1411 /*
1412  * Unwinding superblock updates for old filesystems.
1413  * See ffs_oldfscompat_read above for details.
1414  *
1415  * XXX - Parts get retired eventually.
1416  * Unfortunately new bits get added.
1417  */
1418 static void
1419 ffs_oldfscompat_write(struct fs *fs, struct ufsmount *ump)
1420 {
1421 	int32_t *extrasave;
1422 
1423 	if ((fs->fs_magic != FS_UFS1_MAGIC) ||
1424 	    (fs->fs_old_flags & FS_FLAGS_UPDATED))
1425 		return;
1426 
1427 	fs->fs_old_time = fs->fs_time;
1428 	fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
1429 	fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
1430 	fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
1431 	fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
1432 	fs->fs_old_flags = fs->fs_flags;
1433 
1434 #if 0
1435 	if (bigcgs) {
1436 		fs->fs_cgsize = fs->fs_save_cgsize;
1437 	}
1438 #endif
1439 
1440 	memcpy(&fs->fs_old_postbl_start, ump->um_oldfscompat, 512);
1441 	extrasave = ump->um_oldfscompat;
1442 	extrasave += 512/sizeof(int32_t);
1443 	fs->fs_old_npsect = extrasave[0];
1444 	fs->fs_old_interleave = extrasave[1];
1445 	fs->fs_old_trackskew = extrasave[2];
1446 
1447 }
1448 
1449 /*
1450  * unmount vfs operation
1451  */
1452 int
1453 ffs_unmount(struct mount *mp, int mntflags)
1454 {
1455 	struct lwp *l = curlwp;
1456 	struct ufsmount *ump = VFSTOUFS(mp);
1457 	struct fs *fs = ump->um_fs;
1458 	int error, flags;
1459 	u_int32_t bsize;
1460 #ifdef WAPBL
1461 	extern int doforce;
1462 #endif
1463 
1464 	if (ump->um_discarddata) {
1465 		ffs_discard_finish(ump->um_discarddata, mntflags);
1466 		ump->um_discarddata = NULL;
1467 	}
1468 
1469 	flags = 0;
1470 	if (mntflags & MNT_FORCE)
1471 		flags |= FORCECLOSE;
1472 	if ((error = ffs_flushfiles(mp, flags, l)) != 0)
1473 		return (error);
1474 	error = UFS_WAPBL_BEGIN(mp);
1475 	if (error == 0)
1476 		if (fs->fs_ronly == 0 &&
1477 		    ffs_cgupdate(ump, MNT_WAIT) == 0 &&
1478 		    fs->fs_clean & FS_WASCLEAN) {
1479 			fs->fs_clean = FS_ISCLEAN;
1480 			fs->fs_fmod = 0;
1481 			(void) ffs_sbupdate(ump, MNT_WAIT);
1482 		}
1483 	if (error == 0)
1484 		UFS_WAPBL_END(mp);
1485 #ifdef WAPBL
1486 	KASSERT(!(mp->mnt_wapbl_replay && mp->mnt_wapbl));
1487 	if (mp->mnt_wapbl_replay) {
1488 		KDASSERT(fs->fs_ronly);
1489 		wapbl_replay_stop(mp->mnt_wapbl_replay);
1490 		wapbl_replay_free(mp->mnt_wapbl_replay);
1491 		mp->mnt_wapbl_replay = 0;
1492 	}
1493 	error = ffs_wapbl_stop(mp, doforce && (mntflags & MNT_FORCE));
1494 	if (error) {
1495 		return error;
1496 	}
1497 #endif /* WAPBL */
1498 
1499 	if (ump->um_devvp->v_type != VBAD)
1500 		spec_node_setmountedfs(ump->um_devvp, NULL);
1501 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1502 	(void)VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD | FWRITE,
1503 		NOCRED);
1504 	vput(ump->um_devvp);
1505 
1506 	bsize = fs->fs_cssize;
1507 	if (fs->fs_contigsumsize > 0)
1508 		bsize += fs->fs_ncg * sizeof(int32_t);
1509 	bsize += fs->fs_ncg * sizeof(*fs->fs_contigdirs);
1510 	kmem_free(fs->fs_csp, bsize);
1511 
1512 	kmem_free(fs, fs->fs_sbsize);
1513 	if (ump->um_oldfscompat != NULL)
1514 		kmem_free(ump->um_oldfscompat, 512 + 3*sizeof(int32_t));
1515 	mutex_destroy(&ump->um_lock);
1516 	ffs_snapshot_fini(ump);
1517 	kmem_free(ump, sizeof(*ump));
1518 	mp->mnt_data = NULL;
1519 	mp->mnt_flag &= ~MNT_LOCAL;
1520 	fstrans_unmount(mp);
1521 	return (0);
1522 }
1523 
1524 /*
1525  * Flush out all the files in a filesystem.
1526  */
1527 int
1528 ffs_flushfiles(struct mount *mp, int flags, struct lwp *l)
1529 {
1530 	extern int doforce;
1531 	struct ufsmount *ump;
1532 	int error;
1533 
1534 	if (!doforce)
1535 		flags &= ~FORCECLOSE;
1536 	ump = VFSTOUFS(mp);
1537 #ifdef QUOTA
1538 	if ((error = quota1_umount(mp, flags)) != 0)
1539 		return (error);
1540 #endif
1541 #ifdef QUOTA2
1542 	if ((error = quota2_umount(mp, flags)) != 0)
1543 		return (error);
1544 #endif
1545 #ifdef UFS_EXTATTR
1546 	if (ump->um_fstype == UFS1) {
1547 		if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_STARTED)
1548 			ufs_extattr_stop(mp, l);
1549 		if (ump->um_extattr.uepm_flags & UFS_EXTATTR_UEPM_INITIALIZED)
1550 			ufs_extattr_uepm_destroy(&ump->um_extattr);
1551 	}
1552 #endif
1553 	if ((error = vflush(mp, 0, SKIPSYSTEM | flags)) != 0)
1554 		return (error);
1555 	ffs_snapshot_unmount(mp);
1556 	/*
1557 	 * Flush all the files.
1558 	 */
1559 	error = vflush(mp, NULLVP, flags);
1560 	if (error)
1561 		return (error);
1562 	/*
1563 	 * Flush filesystem metadata.
1564 	 */
1565 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1566 	error = VOP_FSYNC(ump->um_devvp, l->l_cred, FSYNC_WAIT, 0, 0);
1567 	VOP_UNLOCK(ump->um_devvp);
1568 	if (flags & FORCECLOSE) /* XXXDBJ */
1569 		error = 0;
1570 
1571 #ifdef WAPBL
1572 	if (error)
1573 		return error;
1574 	if (mp->mnt_wapbl) {
1575 		error = wapbl_flush(mp->mnt_wapbl, 1);
1576 		if (flags & FORCECLOSE)
1577 			error = 0;
1578 	}
1579 #endif
1580 
1581 	return (error);
1582 }
1583 
1584 /*
1585  * Get file system statistics.
1586  */
1587 int
1588 ffs_statvfs(struct mount *mp, struct statvfs *sbp)
1589 {
1590 	struct ufsmount *ump;
1591 	struct fs *fs;
1592 
1593 	ump = VFSTOUFS(mp);
1594 	fs = ump->um_fs;
1595 	mutex_enter(&ump->um_lock);
1596 	sbp->f_bsize = fs->fs_bsize;
1597 	sbp->f_frsize = fs->fs_fsize;
1598 	sbp->f_iosize = fs->fs_bsize;
1599 	sbp->f_blocks = fs->fs_dsize;
1600 	sbp->f_bfree = ffs_blkstofrags(fs, fs->fs_cstotal.cs_nbfree) +
1601 	    fs->fs_cstotal.cs_nffree + FFS_DBTOFSB(fs, fs->fs_pendingblocks);
1602 	sbp->f_bresvd = ((u_int64_t) fs->fs_dsize * (u_int64_t)
1603 	    fs->fs_minfree) / (u_int64_t) 100;
1604 	if (sbp->f_bfree > sbp->f_bresvd)
1605 		sbp->f_bavail = sbp->f_bfree - sbp->f_bresvd;
1606 	else
1607 		sbp->f_bavail = 0;
1608 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
1609 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
1610 	sbp->f_favail = sbp->f_ffree;
1611 	sbp->f_fresvd = 0;
1612 	mutex_exit(&ump->um_lock);
1613 	copy_statvfs_info(sbp, mp);
1614 
1615 	return (0);
1616 }
1617 
1618 /*
1619  * Go through the disk queues to initiate sandbagged IO;
1620  * go through the inodes to write those that have been modified;
1621  * initiate the writing of the super block if it has been modified.
1622  *
1623  * Note: we are always called with the filesystem marked `MPBUSY'.
1624  */
1625 int
1626 ffs_sync(struct mount *mp, int waitfor, kauth_cred_t cred)
1627 {
1628 	struct vnode *vp, *mvp, *nvp;
1629 	struct inode *ip;
1630 	struct ufsmount *ump = VFSTOUFS(mp);
1631 	struct fs *fs;
1632 	int error, allerror = 0;
1633 	bool is_suspending;
1634 
1635 	fs = ump->um_fs;
1636 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
1637 		printf("fs = %s\n", fs->fs_fsmnt);
1638 		panic("update: rofs mod");
1639 	}
1640 
1641 	/* Allocate a marker vnode. */
1642 	mvp = vnalloc(mp);
1643 
1644 	fstrans_start(mp, FSTRANS_SHARED);
1645 	is_suspending = (fstrans_getstate(mp) == FSTRANS_SUSPENDING);
1646 	/*
1647 	 * Write back each (modified) inode.
1648 	 */
1649 	mutex_enter(&mntvnode_lock);
1650 loop:
1651 	/*
1652 	 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
1653 	 * and vclean() can be called indirectly
1654 	 */
1655 	for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
1656 		nvp = TAILQ_NEXT(vp, v_mntvnodes);
1657 		/*
1658 		 * If the vnode that we are about to sync is no longer
1659 		 * associated with this mount point, start over.
1660 		 */
1661 		if (vp->v_mount != mp)
1662 			goto loop;
1663 		/*
1664 		 * Don't interfere with concurrent scans of this FS.
1665 		 */
1666 		if (vismarker(vp))
1667 			continue;
1668 		mutex_enter(vp->v_interlock);
1669 		ip = VTOI(vp);
1670 
1671 		/*
1672 		 * Skip the vnode/inode if inaccessible.
1673 		 */
1674 		if (ip == NULL || (vp->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0 ||
1675 		    vp->v_type == VNON) {
1676 			mutex_exit(vp->v_interlock);
1677 			continue;
1678 		}
1679 
1680 		/*
1681 		 * We deliberately update inode times here.  This will
1682 		 * prevent a massive queue of updates accumulating, only
1683 		 * to be handled by a call to unmount.
1684 		 *
1685 		 * XXX It would be better to have the syncer trickle these
1686 		 * out.  Adjustment needed to allow registering vnodes for
1687 		 * sync when the vnode is clean, but the inode dirty.  Or
1688 		 * have ufs itself trickle out inode updates.
1689 		 *
1690 		 * If doing a lazy sync, we don't care about metadata or
1691 		 * data updates, because they are handled by each vnode's
1692 		 * synclist entry.  In this case we are only interested in
1693 		 * writing back modified inodes.
1694 		 */
1695 		if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE |
1696 		    IN_MODIFY | IN_MODIFIED | IN_ACCESSED)) == 0 &&
1697 		    (waitfor == MNT_LAZY || (LIST_EMPTY(&vp->v_dirtyblkhd) &&
1698 		    UVM_OBJ_IS_CLEAN(&vp->v_uobj)))) {
1699 			mutex_exit(vp->v_interlock);
1700 			continue;
1701 		}
1702 		if (vp->v_type == VBLK && is_suspending) {
1703 			mutex_exit(vp->v_interlock);
1704 			continue;
1705 		}
1706 		vmark(mvp, vp);
1707 		mutex_exit(&mntvnode_lock);
1708 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT);
1709 		if (error) {
1710 			mutex_enter(&mntvnode_lock);
1711 			nvp = vunmark(mvp);
1712 			if (error == ENOENT) {
1713 				goto loop;
1714 			}
1715 			continue;
1716 		}
1717 		if (waitfor == MNT_LAZY) {
1718 			error = UFS_WAPBL_BEGIN(vp->v_mount);
1719 			if (!error) {
1720 				error = ffs_update(vp, NULL, NULL,
1721 				    UPDATE_CLOSE);
1722 				UFS_WAPBL_END(vp->v_mount);
1723 			}
1724 		} else {
1725 			error = VOP_FSYNC(vp, cred, FSYNC_NOLOG |
1726 			    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0), 0, 0);
1727 		}
1728 		if (error)
1729 			allerror = error;
1730 		vput(vp);
1731 		mutex_enter(&mntvnode_lock);
1732 		nvp = vunmark(mvp);
1733 	}
1734 	mutex_exit(&mntvnode_lock);
1735 	/*
1736 	 * Force stale file system control information to be flushed.
1737 	 */
1738 	if (waitfor != MNT_LAZY && (ump->um_devvp->v_numoutput > 0 ||
1739 	    !LIST_EMPTY(&ump->um_devvp->v_dirtyblkhd))) {
1740 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
1741 		if ((error = VOP_FSYNC(ump->um_devvp, cred,
1742 		    (waitfor == MNT_WAIT ? FSYNC_WAIT : 0) | FSYNC_NOLOG,
1743 		    0, 0)) != 0)
1744 			allerror = error;
1745 		VOP_UNLOCK(ump->um_devvp);
1746 		if (allerror == 0 && waitfor == MNT_WAIT && !mp->mnt_wapbl) {
1747 			mutex_enter(&mntvnode_lock);
1748 			goto loop;
1749 		}
1750 	}
1751 #if defined(QUOTA) || defined(QUOTA2)
1752 	qsync(mp);
1753 #endif
1754 	/*
1755 	 * Write back modified superblock.
1756 	 */
1757 	if (fs->fs_fmod != 0) {
1758 		fs->fs_fmod = 0;
1759 		fs->fs_time = time_second;
1760 		error = UFS_WAPBL_BEGIN(mp);
1761 		if (error)
1762 			allerror = error;
1763 		else {
1764 			if ((error = ffs_cgupdate(ump, waitfor)))
1765 				allerror = error;
1766 			UFS_WAPBL_END(mp);
1767 		}
1768 	}
1769 
1770 #ifdef WAPBL
1771 	if (mp->mnt_wapbl) {
1772 		error = wapbl_flush(mp->mnt_wapbl, 0);
1773 		if (error)
1774 			allerror = error;
1775 	}
1776 #endif
1777 
1778 	fstrans_done(mp);
1779 	vnfree(mvp);
1780 	return (allerror);
1781 }
1782 
1783 /*
1784  * Look up a FFS dinode number to find its incore vnode, otherwise read it
1785  * in from disk.  If it is in core, wait for the lock bit to clear, then
1786  * return the inode locked.  Detection and handling of mount points must be
1787  * done by the calling routine.
1788  */
1789 int
1790 ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
1791 {
1792 	struct fs *fs;
1793 	struct inode *ip;
1794 	struct ufsmount *ump;
1795 	struct buf *bp;
1796 	struct vnode *vp;
1797 	dev_t dev;
1798 	int error;
1799 
1800 	ump = VFSTOUFS(mp);
1801 	dev = ump->um_dev;
1802 
1803  retry:
1804 	if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL)
1805 		return (0);
1806 
1807 	/* Allocate a new vnode/inode. */
1808 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, NULL, &vp);
1809 	if (error) {
1810 		*vpp = NULL;
1811 		return (error);
1812 	}
1813 	ip = pool_cache_get(ffs_inode_cache, PR_WAITOK);
1814 
1815 	/*
1816 	 * If someone beat us to it, put back the freshly allocated
1817 	 * vnode/inode pair and retry.
1818 	 */
1819 	mutex_enter(&ufs_hashlock);
1820 	if (ufs_ihashget(dev, ino, 0) != NULL) {
1821 		mutex_exit(&ufs_hashlock);
1822 		ungetnewvnode(vp);
1823 		pool_cache_put(ffs_inode_cache, ip);
1824 		goto retry;
1825 	}
1826 
1827 	vp->v_vflag |= VV_LOCKSWORK;
1828 
1829 	/*
1830 	 * XXX MFS ends up here, too, to allocate an inode.  Should we
1831 	 * XXX create another pool for MFS inodes?
1832 	 */
1833 
1834 	memset(ip, 0, sizeof(struct inode));
1835 	vp->v_data = ip;
1836 	ip->i_vnode = vp;
1837 	ip->i_ump = ump;
1838 	ip->i_fs = fs = ump->um_fs;
1839 	ip->i_dev = dev;
1840 	ip->i_number = ino;
1841 #if defined(QUOTA) || defined(QUOTA2)
1842 	ufsquota_init(ip);
1843 #endif
1844 
1845 	/*
1846 	 * Initialize genfs node, we might proceed to destroy it in
1847 	 * error branches.
1848 	 */
1849 	genfs_node_init(vp, &ffs_genfsops);
1850 
1851 	/*
1852 	 * Put it onto its hash chain and lock it so that other requests for
1853 	 * this inode will block if they arrive while we are sleeping waiting
1854 	 * for old data structures to be purged or for the contents of the
1855 	 * disk portion of this inode to be read.
1856 	 */
1857 
1858 	ufs_ihashins(ip);
1859 	mutex_exit(&ufs_hashlock);
1860 
1861 	/* Read in the disk contents for the inode, copy into the inode. */
1862 	error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)),
1863 		      (int)fs->fs_bsize, NOCRED, 0, &bp);
1864 	if (error) {
1865 
1866 		/*
1867 		 * The inode does not contain anything useful, so it would
1868 		 * be misleading to leave it on its hash chain. With mode
1869 		 * still zero, it will be unlinked and returned to the free
1870 		 * list by vput().
1871 		 */
1872 
1873 		vput(vp);
1874 		*vpp = NULL;
1875 		return (error);
1876 	}
1877 	if (ip->i_ump->um_fstype == UFS1)
1878 		ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache,
1879 		    PR_WAITOK);
1880 	else
1881 		ip->i_din.ffs2_din = pool_cache_get(ffs_dinode2_cache,
1882 		    PR_WAITOK);
1883 	ffs_load_inode(bp, ip, fs, ino);
1884 	brelse(bp, 0);
1885 
1886 	/*
1887 	 * Initialize the vnode from the inode, check for aliases.
1888 	 * Note that the underlying vnode may have changed.
1889 	 */
1890 
1891 	ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1892 
1893 	/*
1894 	 * Finish inode initialization now that aliasing has been resolved.
1895 	 */
1896 
1897 	ip->i_devvp = ump->um_devvp;
1898 	vref(ip->i_devvp);
1899 
1900 	/*
1901 	 * Ensure that uid and gid are correct. This is a temporary
1902 	 * fix until fsck has been changed to do the update.
1903 	 */
1904 
1905 	if (fs->fs_old_inodefmt < FS_44INODEFMT) {		/* XXX */
1906 		ip->i_uid = ip->i_ffs1_ouid;			/* XXX */
1907 		ip->i_gid = ip->i_ffs1_ogid;			/* XXX */
1908 	}							/* XXX */
1909 	uvm_vnp_setsize(vp, ip->i_size);
1910 	*vpp = vp;
1911 	return (0);
1912 }
1913 
1914 /*
1915  * File handle to vnode
1916  *
1917  * Have to be really careful about stale file handles:
1918  * - check that the inode number is valid
1919  * - call ffs_vget() to get the locked inode
1920  * - check for an unallocated inode (i_mode == 0)
1921  * - check that the given client host has export rights and return
1922  *   those rights via. exflagsp and credanonp
1923  */
1924 int
1925 ffs_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp)
1926 {
1927 	struct ufid ufh;
1928 	struct fs *fs;
1929 
1930 	if (fhp->fid_len != sizeof(struct ufid))
1931 		return EINVAL;
1932 
1933 	memcpy(&ufh, fhp, sizeof(ufh));
1934 	fs = VFSTOUFS(mp)->um_fs;
1935 	if (ufh.ufid_ino < UFS_ROOTINO ||
1936 	    ufh.ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1937 		return (ESTALE);
1938 	return (ufs_fhtovp(mp, &ufh, vpp));
1939 }
1940 
1941 /*
1942  * Vnode pointer to File handle
1943  */
1944 /* ARGSUSED */
1945 int
1946 ffs_vptofh(struct vnode *vp, struct fid *fhp, size_t *fh_size)
1947 {
1948 	struct inode *ip;
1949 	struct ufid ufh;
1950 
1951 	if (*fh_size < sizeof(struct ufid)) {
1952 		*fh_size = sizeof(struct ufid);
1953 		return E2BIG;
1954 	}
1955 	ip = VTOI(vp);
1956 	*fh_size = sizeof(struct ufid);
1957 	memset(&ufh, 0, sizeof(ufh));
1958 	ufh.ufid_len = sizeof(struct ufid);
1959 	ufh.ufid_ino = ip->i_number;
1960 	ufh.ufid_gen = ip->i_gen;
1961 	memcpy(fhp, &ufh, sizeof(ufh));
1962 	return (0);
1963 }
1964 
1965 void
1966 ffs_init(void)
1967 {
1968 	if (ffs_initcount++ > 0)
1969 		return;
1970 
1971 	ffs_inode_cache = pool_cache_init(sizeof(struct inode), 0, 0, 0,
1972 	    "ffsino", NULL, IPL_NONE, NULL, NULL, NULL);
1973 	ffs_dinode1_cache = pool_cache_init(sizeof(struct ufs1_dinode), 0, 0, 0,
1974 	    "ffsdino1", NULL, IPL_NONE, NULL, NULL, NULL);
1975 	ffs_dinode2_cache = pool_cache_init(sizeof(struct ufs2_dinode), 0, 0, 0,
1976 	    "ffsdino2", NULL, IPL_NONE, NULL, NULL, NULL);
1977 	ufs_init();
1978 }
1979 
1980 void
1981 ffs_reinit(void)
1982 {
1983 
1984 	ufs_reinit();
1985 }
1986 
1987 void
1988 ffs_done(void)
1989 {
1990 	if (--ffs_initcount > 0)
1991 		return;
1992 
1993 	ufs_done();
1994 	pool_cache_destroy(ffs_dinode2_cache);
1995 	pool_cache_destroy(ffs_dinode1_cache);
1996 	pool_cache_destroy(ffs_inode_cache);
1997 }
1998 
1999 /*
2000  * Write a superblock and associated information back to disk.
2001  */
2002 int
2003 ffs_sbupdate(struct ufsmount *mp, int waitfor)
2004 {
2005 	struct fs *fs = mp->um_fs;
2006 	struct buf *bp;
2007 	int error = 0;
2008 	u_int32_t saveflag;
2009 
2010 	error = ffs_getblk(mp->um_devvp,
2011 	    fs->fs_sblockloc / DEV_BSIZE, FFS_NOBLK,
2012 	    fs->fs_sbsize, false, &bp);
2013 	if (error)
2014 		return error;
2015 	saveflag = fs->fs_flags & FS_INTERNAL;
2016 	fs->fs_flags &= ~FS_INTERNAL;
2017 
2018 	memcpy(bp->b_data, fs, fs->fs_sbsize);
2019 
2020 	ffs_oldfscompat_write((struct fs *)bp->b_data, mp);
2021 #ifdef FFS_EI
2022 	if (mp->um_flags & UFS_NEEDSWAP)
2023 		ffs_sb_swap((struct fs *)bp->b_data, (struct fs *)bp->b_data);
2024 #endif
2025 	fs->fs_flags |= saveflag;
2026 
2027 	if (waitfor == MNT_WAIT)
2028 		error = bwrite(bp);
2029 	else
2030 		bawrite(bp);
2031 	return (error);
2032 }
2033 
2034 int
2035 ffs_cgupdate(struct ufsmount *mp, int waitfor)
2036 {
2037 	struct fs *fs = mp->um_fs;
2038 	struct buf *bp;
2039 	int blks;
2040 	void *space;
2041 	int i, size, error = 0, allerror = 0;
2042 
2043 	allerror = ffs_sbupdate(mp, waitfor);
2044 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
2045 	space = fs->fs_csp;
2046 	for (i = 0; i < blks; i += fs->fs_frag) {
2047 		size = fs->fs_bsize;
2048 		if (i + fs->fs_frag > blks)
2049 			size = (blks - i) * fs->fs_fsize;
2050 		error = ffs_getblk(mp->um_devvp, FFS_FSBTODB(fs, fs->fs_csaddr + i),
2051 		    FFS_NOBLK, size, false, &bp);
2052 		if (error)
2053 			break;
2054 #ifdef FFS_EI
2055 		if (mp->um_flags & UFS_NEEDSWAP)
2056 			ffs_csum_swap((struct csum*)space,
2057 			    (struct csum*)bp->b_data, size);
2058 		else
2059 #endif
2060 			memcpy(bp->b_data, space, (u_int)size);
2061 		space = (char *)space + size;
2062 		if (waitfor == MNT_WAIT)
2063 			error = bwrite(bp);
2064 		else
2065 			bawrite(bp);
2066 	}
2067 	if (!allerror && error)
2068 		allerror = error;
2069 	return (allerror);
2070 }
2071 
2072 int
2073 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *vp,
2074     int attrnamespace, const char *attrname)
2075 {
2076 #ifdef UFS_EXTATTR
2077 	/*
2078 	 * File-backed extended attributes are only supported on UFS1.
2079 	 * UFS2 has native extended attributes.
2080 	 */
2081 	if (VFSTOUFS(mp)->um_fstype == UFS1)
2082 		return (ufs_extattrctl(mp, cmd, vp, attrnamespace, attrname));
2083 #endif
2084 	return (vfs_stdextattrctl(mp, cmd, vp, attrnamespace, attrname));
2085 }
2086 
2087 int
2088 ffs_suspendctl(struct mount *mp, int cmd)
2089 {
2090 	int error;
2091 	struct lwp *l = curlwp;
2092 
2093 	switch (cmd) {
2094 	case SUSPEND_SUSPEND:
2095 		if ((error = fstrans_setstate(mp, FSTRANS_SUSPENDING)) != 0)
2096 			return error;
2097 		error = ffs_sync(mp, MNT_WAIT, l->l_proc->p_cred);
2098 		if (error == 0)
2099 			error = fstrans_setstate(mp, FSTRANS_SUSPENDED);
2100 #ifdef WAPBL
2101 		if (error == 0 && mp->mnt_wapbl)
2102 			error = wapbl_flush(mp->mnt_wapbl, 1);
2103 #endif
2104 		if (error != 0) {
2105 			(void) fstrans_setstate(mp, FSTRANS_NORMAL);
2106 			return error;
2107 		}
2108 		return 0;
2109 
2110 	case SUSPEND_RESUME:
2111 		return fstrans_setstate(mp, FSTRANS_NORMAL);
2112 
2113 	default:
2114 		return EINVAL;
2115 	}
2116 }
2117 
2118 /*
2119  * Synch vnode for a mounted file system.
2120  */
2121 static int
2122 ffs_vfs_fsync(vnode_t *vp, int flags)
2123 {
2124 	int error, i, pflags;
2125 #ifdef WAPBL
2126 	struct mount *mp;
2127 #endif
2128 
2129 	KASSERT(vp->v_type == VBLK);
2130 	KASSERT(spec_node_getmountedfs(vp) != NULL);
2131 
2132 	/*
2133 	 * Flush all dirty data associated with the vnode.
2134 	 */
2135 	pflags = PGO_ALLPAGES | PGO_CLEANIT;
2136 	if ((flags & FSYNC_WAIT) != 0)
2137 		pflags |= PGO_SYNCIO;
2138 	mutex_enter(vp->v_interlock);
2139 	error = VOP_PUTPAGES(vp, 0, 0, pflags);
2140 	if (error)
2141 		return error;
2142 
2143 #ifdef WAPBL
2144 	mp = spec_node_getmountedfs(vp);
2145 	if (mp && mp->mnt_wapbl) {
2146 		/*
2147 		 * Don't bother writing out metadata if the syncer is
2148 		 * making the request.  We will let the sync vnode
2149 		 * write it out in a single burst through a call to
2150 		 * VFS_SYNC().
2151 		 */
2152 		if ((flags & (FSYNC_DATAONLY | FSYNC_LAZY | FSYNC_NOLOG)) != 0)
2153 			return 0;
2154 
2155 		/*
2156 		 * Don't flush the log if the vnode being flushed
2157 		 * contains no dirty buffers that could be in the log.
2158 		 */
2159 		if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
2160 			error = wapbl_flush(mp->mnt_wapbl, 0);
2161 			if (error)
2162 				return error;
2163 		}
2164 
2165 		if ((flags & FSYNC_WAIT) != 0) {
2166 			mutex_enter(vp->v_interlock);
2167 			while (vp->v_numoutput)
2168 				cv_wait(&vp->v_cv, vp->v_interlock);
2169 			mutex_exit(vp->v_interlock);
2170 		}
2171 
2172 		return 0;
2173 	}
2174 #endif /* WAPBL */
2175 
2176 	error = vflushbuf(vp, flags);
2177 	if (error == 0 && (flags & FSYNC_CACHE) != 0) {
2178 		i = 1;
2179 		(void)VOP_IOCTL(vp, DIOCCACHESYNC, &i, FWRITE,
2180 		    kauth_cred_get());
2181 	}
2182 
2183 	return error;
2184 }
2185