1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf2.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/stat.h> 56 #include <sys/mutex2.h> 57 #include <sys/nlookup.h> 58 59 #include <vfs/ext2fs/fs.h> 60 #include <vfs/ext2fs/ext2_mount.h> 61 #include <vfs/ext2fs/inode.h> 62 63 #include <vfs/ext2fs/ext2fs.h> 64 #include <vfs/ext2fs/ext2_dinode.h> 65 #include <vfs/ext2fs/ext2_extern.h> 66 #include <vfs/ext2fs/ext2_extents.h> 67 68 SDT_PROVIDER_DECLARE(ext2fs); 69 /* 70 * ext2fs trace probe: 71 * arg0: verbosity. Higher numbers give more verbose messages 72 * arg1: Textual message 73 */ 74 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 75 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 76 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 77 78 static int ext2_flushfiles(struct mount *mp, int flags); 79 static int ext2_mountfs(struct vnode *, struct mount *); 80 static int ext2_reload(struct mount *mp); 81 static int ext2_sbupdate(struct ext2mount *, int); 82 static int ext2_cgupdate(struct ext2mount *, int); 83 static int ext2_init(struct vfsconf *); 84 static int ext2_uninit(struct vfsconf *); 85 static vfs_unmount_t ext2_unmount; 86 static vfs_root_t ext2_root; 87 static vfs_statfs_t ext2_statfs; 88 static vfs_statvfs_t ext2_statvfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_vptofh_t ext2_vptofh; 93 static vfs_checkexp_t ext2_check_export; 94 static vfs_mount_t ext2_mount; 95 96 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 97 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 98 99 static struct vfsops ext2fs_vfsops = { 100 .vfs_flags = 0, 101 .vfs_mount = ext2_mount, 102 .vfs_unmount = ext2_unmount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_statvfs = ext2_statvfs, 106 .vfs_sync = ext2_sync, 107 .vfs_vget = ext2_vget, 108 .vfs_fhtovp = ext2_fhtovp, 109 .vfs_vptofh = ext2_vptofh, 110 .vfs_checkexp = ext2_check_export, 111 .vfs_init = ext2_init, 112 .vfs_uninit = ext2_uninit 113 }; 114 115 VFS_SET(ext2fs_vfsops, ext2fs, VFCF_MPSAFE); 116 MODULE_VERSION(ext2fs, 1); 117 118 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 119 int ronly); 120 static int ext2_compute_sb_data(struct vnode * devvp, 121 struct ext2fs * es, struct m_ext2fs * fs); 122 123 static int ext2fs_inode_hash_lock; 124 125 /* 126 * VFS Operations. 127 * 128 * mount system call 129 */ 130 static int 131 ext2_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 132 { 133 struct ext2_args args; 134 struct vnode *devvp; 135 struct ext2mount *ump = NULL; 136 struct m_ext2fs *fs; 137 struct nlookupdata nd; 138 mode_t accmode; 139 int error, flags; 140 size_t size; 141 142 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ext2_args))) != 0) 143 return (error); 144 145 /* 146 * If updating, check whether changing from read-only to 147 * read/write; if there is no device name, that's all we do. 148 */ 149 if (mp->mnt_flag & MNT_UPDATE) { 150 ump = VFSTOEXT2(mp); 151 fs = ump->um_e2fs; 152 devvp = ump->um_devvp; 153 error = 0; 154 if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 155 error = VFS_SYNC(mp, MNT_WAIT); 156 if (error) 157 return (error); 158 flags = WRITECLOSE; 159 if (mp->mnt_flag & MNT_FORCE) 160 flags |= FORCECLOSE; 161 if (vfs_busy(mp, LK_NOWAIT)) 162 return (EBUSY); 163 error = ext2_flushfiles(mp, flags); 164 vfs_unbusy(mp); 165 if (error == 0 && fs->e2fs_wasvalid && 166 ext2_cgupdate(ump, MNT_WAIT) == 0) { 167 fs->e2fs->e2fs_state = 168 htole16((le16toh(fs->e2fs->e2fs_state) | 169 E2FS_ISCLEAN)); 170 ext2_sbupdate(ump, MNT_WAIT); 171 } 172 fs->e2fs_ronly = 1; 173 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 174 VOP_OPEN(devvp, FREAD, FSCRED, NULL); 175 VOP_CLOSE(devvp, FREAD | FWRITE, NULL); 176 vn_unlock(devvp); 177 } 178 if (!error && (mp->mnt_flag & MNT_RELOAD)) 179 error = ext2_reload(mp); 180 if (error) 181 return (error); 182 devvp = ump->um_devvp; 183 if (fs->e2fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 184 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 185 return (EPERM); 186 187 /* 188 * If upgrade to read-write by non-root, then verify 189 * that user has necessary permissions on the device. 190 */ 191 if (cred->cr_uid != 0) { 192 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 193 error = VOP_EACCESS(devvp, VREAD | VWRITE, cred); 194 if (error) { 195 vn_unlock(devvp); 196 return (error); 197 } 198 vn_unlock(devvp); 199 } 200 201 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 202 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 203 if (mp->mnt_flag & MNT_FORCE) { 204 printf( 205 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 206 } else { 207 printf( 208 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 209 fs->e2fs_fsmnt); 210 return (EPERM); 211 } 212 } 213 fs->e2fs->e2fs_state = 214 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 215 (void)ext2_cgupdate(ump, MNT_WAIT); 216 fs->e2fs_ronly = 0; 217 mp->mnt_flag &= ~MNT_RDONLY; 218 219 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 220 VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, NULL); 221 VOP_CLOSE(devvp, FREAD, NULL); 222 vn_unlock(devvp); 223 } 224 if (args.fspec == NULL) { 225 /* 226 * Process export requests. 227 */ 228 return (vfs_export(mp, &ump->um_export, &args.export)); 229 } 230 } 231 232 /* 233 * Not an update, or updating the name: look up the name 234 * and verify that it refers to a sensible disk device. 235 */ 236 devvp = NULL; 237 error = nlookup_init(&nd, args.fspec, UIO_USERSPACE, NLC_FOLLOW); 238 if (error == 0) 239 error = nlookup(&nd); 240 if (error == 0) 241 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 242 nlookup_done(&nd); 243 if (error) 244 return (error); 245 246 if (!vn_isdisk(devvp, &error)) { 247 vrele(devvp); 248 return (error); 249 } 250 251 /* 252 * If mount by non-root, then verify that user has necessary 253 * permissions on the device. 254 * 255 * XXXRW: VOP_ACCESS() enough? 256 */ 257 if (cred->cr_uid != 0) { 258 accmode = VREAD; 259 if ((mp->mnt_flag & MNT_RDONLY) == 0) 260 accmode |= VWRITE; 261 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 262 if ((error = VOP_EACCESS(devvp, accmode, cred)) != 0) { 263 vput(devvp); 264 return (error); 265 } 266 vn_unlock(devvp); 267 } 268 269 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 270 error = ext2_mountfs(devvp, mp); 271 } else { 272 if (devvp != ump->um_devvp) 273 error = EINVAL; /* needs translation */ 274 else 275 vrele(devvp); 276 } 277 if (error) { 278 vrele(devvp); 279 return (error); 280 } 281 ump = VFSTOEXT2(mp); 282 fs = ump->um_e2fs; 283 284 /* 285 * Note that this strncpy() is ok because of a check at the start 286 * of ext2_mount(). 287 */ 288 copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1, &size); 289 bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size); 290 copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 291 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 292 ext2_statfs(mp, &mp->mnt_stat, cred); 293 return (0); 294 } 295 296 static int 297 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 298 { 299 uint32_t i, mask; 300 301 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 302 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 303 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 304 return (1); 305 } 306 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 307 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 308 if (mask) { 309 printf("WARNING: mount of %s denied due to " 310 "unsupported optional features:\n", devtoname(dev)); 311 for (i = 0; 312 i < sizeof(incompat)/sizeof(struct ext2_feature); 313 i++) 314 if (mask & incompat[i].mask) 315 printf("%s ", incompat[i].name); 316 printf("\n"); 317 return (1); 318 } 319 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 320 if (!ronly && mask) { 321 printf("WARNING: R/W mount of %s denied due to " 322 "unsupported optional features:\n", devtoname(dev)); 323 for (i = 0; 324 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 325 i++) 326 if (mask & ro_compat[i].mask) 327 printf("%s ", ro_compat[i].name); 328 printf("\n"); 329 return (1); 330 } 331 } 332 return (0); 333 } 334 335 static e4fs_daddr_t 336 ext2_cg_location(struct m_ext2fs *fs, int number) 337 { 338 int cg, descpb, logical_sb, has_super = 0; 339 340 /* 341 * Adjust logical superblock block number. 342 * Godmar thinks: if the blocksize is greater than 1024, then 343 * the superblock is logically part of block zero. 344 */ 345 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 346 347 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 348 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 349 return (logical_sb + number + 1); 350 351 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 352 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 353 else 354 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 355 356 cg = descpb * number; 357 358 if (ext2_cg_has_sb(fs, cg)) 359 has_super = 1; 360 361 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 362 le32toh(fs->e2fs->e2fs_first_dblock)); 363 } 364 365 static int 366 ext2_cg_validate(struct m_ext2fs *fs) 367 { 368 uint64_t b_bitmap; 369 uint64_t i_bitmap; 370 uint64_t i_tables; 371 uint64_t first_block, last_block, last_cg_block; 372 struct ext2_gd *gd; 373 unsigned int i, cg_count; 374 375 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 376 last_cg_block = ext2_cg_number_gdb(fs, 0); 377 cg_count = fs->e2fs_gcount; 378 379 for (i = 0; i < fs->e2fs_gcount; i++) { 380 gd = &fs->e2fs_gd[i]; 381 382 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 383 i == fs->e2fs_gcount - 1) { 384 last_block = fs->e2fs_bcount - 1; 385 } else { 386 last_block = first_block + 387 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 388 } 389 390 if ((cg_count == fs->e2fs_gcount) && 391 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 392 cg_count = i; 393 394 b_bitmap = e2fs_gd_get_b_bitmap(gd); 395 if (b_bitmap == 0) { 396 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 397 "block bitmap is zero", i); 398 return (EINVAL); 399 } 400 if (b_bitmap <= last_cg_block) { 401 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 402 "block bitmap overlaps gds", i); 403 return (EINVAL); 404 } 405 if (b_bitmap < first_block || b_bitmap > last_block) { 406 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 407 "block bitmap not in group", i); 408 return (EINVAL); 409 } 410 411 i_bitmap = e2fs_gd_get_i_bitmap(gd); 412 if (i_bitmap == 0) { 413 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 414 "inode bitmap is zero", i); 415 return (EINVAL); 416 } 417 if (i_bitmap <= last_cg_block) { 418 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 419 "inode bitmap overlaps gds", i); 420 return (EINVAL); 421 } 422 if (i_bitmap < first_block || i_bitmap > last_block) { 423 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 424 "inode bitmap not in group blk", i); 425 return (EINVAL); 426 } 427 428 i_tables = e2fs_gd_get_i_tables(gd); 429 if (i_tables == 0) { 430 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 431 "inode table is zero", i); 432 return (EINVAL); 433 } 434 if (i_tables <= last_cg_block) { 435 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 436 "inode tables overlaps gds", i); 437 return (EINVAL); 438 } 439 if (i_tables < first_block || 440 i_tables + fs->e2fs_itpg - 1 > last_block) { 441 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 442 "inode tables not in group blk", i); 443 return (EINVAL); 444 } 445 446 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 447 first_block += EXT2_BLOCKS_PER_GROUP(fs); 448 } 449 450 return (0); 451 } 452 453 /* 454 * This computes the fields of the m_ext2fs structure from the 455 * data in the ext2fs structure read in. 456 */ 457 static int 458 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 459 struct m_ext2fs *fs) 460 { 461 struct buf *bp; 462 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 463 int i, j; 464 int g_count = 0; 465 int error; 466 467 /* Check if first dblock is valid */ 468 if (fs->e2fs->e2fs_bcount >= 1024 && fs->e2fs->e2fs_first_dblock) { 469 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 470 "first dblock is invalid"); 471 return (EINVAL); 472 } 473 474 /* Check checksum features */ 475 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 476 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 477 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 478 "incorrect checksum features combination"); 479 return (EINVAL); 480 } 481 482 /* Precompute checksum seed for all metadata */ 483 ext2_sb_csum_set_seed(fs); 484 485 /* Verify sb csum if possible */ 486 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 487 error = ext2_sb_csum_verify(fs); 488 if (error) { 489 return (error); 490 } 491 } 492 493 /* Check for block size = 1K|2K|4K */ 494 if (le32toh(es->e2fs_log_bsize) > 2) { 495 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 496 "bad block size"); 497 return (EINVAL); 498 } 499 500 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 501 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 502 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 503 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 504 505 /* Check for fragment size */ 506 if (le32toh(es->e2fs_log_fsize) > 507 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 508 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 509 "invalid log cluster size"); 510 return (EINVAL); 511 } 512 513 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 514 if (fs->e2fs_fsize != fs->e2fs_bsize) { 515 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 516 "fragment size != block size"); 517 return (EINVAL); 518 } 519 520 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 521 522 /* Check reserved gdt blocks for future filesystem expansion */ 523 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 524 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 525 "number of reserved GDT blocks too large"); 526 return (EINVAL); 527 } 528 529 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 530 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 531 } else { 532 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 533 534 /* 535 * Check first ino. 536 */ 537 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 538 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 539 "invalid first ino"); 540 return (EINVAL); 541 } 542 543 /* 544 * Simple sanity check for superblock inode size value. 545 */ 546 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 547 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 548 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 549 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 550 "invalid inode size"); 551 return (EINVAL); 552 } 553 } 554 555 /* Check group descriptors */ 556 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 557 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 558 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 559 "unsupported 64bit descriptor size"); 560 return (EINVAL); 561 } 562 563 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 564 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 565 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 566 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 567 "zero blocks/fragments per group"); 568 return (EINVAL); 569 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 570 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 571 "blocks per group not equal fragments per group"); 572 return (EINVAL); 573 } 574 575 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 576 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 577 "non-standard group size unsupported"); 578 return (EINVAL); 579 } 580 581 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 582 if (fs->e2fs_ipb == 0 || 583 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 584 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 585 "bad inodes per block size"); 586 return (EINVAL); 587 } 588 589 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 590 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 591 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 592 "invalid inodes per group"); 593 return (EINVAL); 594 } 595 596 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 597 598 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 599 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 600 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 601 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 602 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 603 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 604 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 605 } 606 if (fs->e2fs_rbcount > fs->e2fs_bcount || 607 fs->e2fs_fbcount > fs->e2fs_bcount) { 608 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 609 "invalid block count"); 610 return (EINVAL); 611 } 612 613 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 614 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 615 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 616 "invalid number of free inodes"); 617 return (EINVAL); 618 } 619 620 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) || 621 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 622 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 623 "first data block out of range"); 624 return (EINVAL); 625 } 626 627 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 628 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 629 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 630 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 631 "groups count too large"); 632 return (EINVAL); 633 } 634 635 /* Check for extra isize in big inodes. */ 636 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 637 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 638 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 639 "no space for extra inode timestamps"); 640 return (EINVAL); 641 } 642 643 /* s_resuid / s_resgid ? */ 644 645 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 646 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 647 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 648 } else { 649 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 650 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 651 fs->e2fs_bsize / sizeof(struct ext2_gd)); 652 } 653 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 654 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 655 M_EXT2MNT, M_WAITOK | M_ZERO); 656 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 657 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 658 659 for (i = 0; i < fs->e2fs_gdbcount; i++) { 660 error = bread(devvp, fsbtodoff(fs, ext2_cg_location(fs, i)), 661 fs->e2fs_bsize, &bp); 662 if (error) { 663 /* 664 * fs->e2fs_gd and fs->e2fs_contigdirs 665 * will be freed later by the caller, 666 * because this function could be called from 667 * MNT_UPDATE path. 668 */ 669 brelse(bp); 670 return (error); 671 } 672 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 673 memcpy(&fs->e2fs_gd[ 674 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 675 bp->b_data, fs->e2fs_bsize); 676 } else { 677 for (j = 0; j < e2fs_descpb && 678 g_count < fs->e2fs_gcount; j++, g_count++) 679 memcpy(&fs->e2fs_gd[g_count], 680 bp->b_data + j * E2FS_REV0_GD_SIZE, 681 E2FS_REV0_GD_SIZE); 682 } 683 brelse(bp); 684 bp = NULL; 685 } 686 687 /* Validate cgs consistency */ 688 error = ext2_cg_validate(fs); 689 if (error) 690 return (error); 691 692 /* Verfy cgs csum */ 693 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 694 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 695 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 696 if (error) 697 return (error); 698 } 699 /* Initialization for the ext2 Orlov allocator variant. */ 700 fs->e2fs_total_dir = 0; 701 for (i = 0; i < fs->e2fs_gcount; i++) 702 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 703 704 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 705 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 706 fs->e2fs_maxfilesize = 0x7fffffff; 707 else { 708 fs->e2fs_maxfilesize = 0xffffffffffff; 709 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 710 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 711 } 712 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 713 fs->e2fs_uhash = 3; 714 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 715 #ifdef __CHAR_UNSIGNED__ 716 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 717 fs->e2fs_uhash = 3; 718 #else 719 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 720 #endif 721 } 722 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 723 error = ext2_sb_csum_verify(fs); 724 725 return (error); 726 } 727 728 struct scaninfo { 729 int rescan; 730 int allerror; 731 int waitfor; 732 struct vnode *devvp; 733 struct m_ext2fs *fs; 734 }; 735 736 static int 737 ext2_reload_scan(struct mount *mp, struct vnode *vp, void *data) 738 { 739 struct scaninfo *info = data; 740 struct inode *ip; 741 struct buf *bp; 742 int error; 743 744 /* 745 * Try to recycle 746 */ 747 if (vrecycle(vp)) 748 return (0); 749 750 /* 751 * Step 1: invalidate all cached file data. 752 */ 753 if (vinvalbuf(vp, 0, 0, 0)) 754 panic("ext2_reload: dirty2"); 755 /* 756 * Step 2: re-read inode data for all active vnodes. 757 */ 758 ip = VTOI(vp); 759 error = bread(info->devvp, 760 fsbtodoff(info->fs, ino_to_fsba(info->fs, ip->i_number)), 761 (int)info->fs->e2fs_bsize, &bp); 762 if (error) { 763 brelse(bp); 764 return (error); 765 } 766 767 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 768 EXT2_INODE_SIZE(info->fs) * ino_to_fsbo(info->fs, ip->i_number)), 769 ip); 770 771 brelse(bp); 772 return (error); 773 } 774 775 /* 776 * Reload all incore data for a filesystem (used after running fsck on 777 * the root filesystem and finding things to fix). The filesystem must 778 * be mounted read-only. 779 * 780 * Things to do to update the mount: 781 * 1) invalidate all cached meta-data. 782 * 2) re-read superblock from disk. 783 * 3) invalidate all cluster summary information. 784 * 4) invalidate all inactive vnodes. 785 * 5) invalidate all cached file data. 786 * 6) re-read inode data for all active vnodes. 787 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 788 */ 789 static int 790 ext2_reload(struct mount *mp) 791 { 792 struct vnode *devvp; 793 struct buf *bp; 794 struct ext2fs *es; 795 struct m_ext2fs *fs; 796 struct csum *sump; 797 struct scaninfo scaninfo; 798 int error, i; 799 int32_t *lp; 800 801 if ((mp->mnt_flag & MNT_RDONLY) == 0) 802 return (EINVAL); 803 /* 804 * Step 1: invalidate all cached meta-data. 805 */ 806 devvp = VFSTOEXT2(mp)->um_devvp; 807 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 808 if (vinvalbuf(devvp, 0, 0, 0) != 0) 809 panic("ext2_reload: dirty1"); 810 vn_unlock(devvp); 811 812 /* 813 * Step 2: re-read superblock from disk. 814 * constants have been adjusted for ext2 815 */ 816 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) { 817 brelse(bp); 818 return (error); 819 } 820 es = (struct ext2fs *)bp->b_data; 821 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 822 brelse(bp); 823 return (EIO); /* XXX needs translation */ 824 } 825 fs = VFSTOEXT2(mp)->um_e2fs; 826 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 827 828 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 829 brelse(bp); 830 return (error); 831 } 832 #ifdef UNKLAR 833 if (fs->fs_sbsize < SBSIZE) 834 bp->b_flags |= B_INVAL; 835 #endif 836 brelse(bp); 837 838 /* 839 * Step 3: invalidate all cluster summary information. 840 */ 841 if (fs->e2fs_contigsumsize > 0) { 842 lp = fs->e2fs_maxcluster; 843 sump = fs->e2fs_clustersum; 844 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 845 *lp++ = fs->e2fs_contigsumsize; 846 sump->cs_init = 0; 847 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 848 } 849 } 850 851 scaninfo.rescan = 1; 852 scaninfo.devvp = devvp; 853 scaninfo.fs = fs; 854 while (error == 0 && scaninfo.rescan) { 855 scaninfo.rescan = 0; 856 error = vmntvnodescan(mp, VMSC_GETVX, NULL, ext2_reload_scan, 857 &scaninfo); 858 } 859 return (error); 860 } 861 862 /* 863 * Common code for mount and mountroot. 864 */ 865 static int 866 ext2_mountfs(struct vnode *devvp, struct mount *mp) 867 { 868 struct ext2mount *ump; 869 struct buf *bp; 870 struct m_ext2fs *fs; 871 struct ext2fs *es; 872 struct cdev *dev = devvp->v_rdev; 873 struct csum *sump; 874 int error; 875 int ronly; 876 int i; 877 u_long size; 878 int32_t *lp; 879 int32_t e2fs_maxcontig; 880 881 /* 882 * Disallow multiple mounts of the same device. 883 * Disallow mounting of a device that is currently in use 884 * (except for root, which might share swap device for miniroot). 885 * Flush out any old buffers remaining from a previous use. 886 */ 887 if ((error = vfs_mountedon(devvp)) != 0) 888 return (error); 889 if (vcount(devvp) > 0) 890 return (EBUSY); 891 if ((error = vinvalbuf(devvp, V_SAVE, 0, 0)) != 0) 892 return (error); 893 #ifdef READONLY 894 /* Turn on this to force it to be read-only. */ 895 mp->mnt_flag |= MNT_RDONLY; 896 #endif 897 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 898 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 899 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, NULL); 900 vn_unlock(devvp); 901 if (error) 902 return (error); 903 904 if (devvp->v_rdev->si_iosize_max != 0) 905 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 906 if (mp->mnt_iosize_max > MAXPHYS) 907 mp->mnt_iosize_max = MAXPHYS; 908 909 bp = NULL; 910 ump = NULL; 911 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) 912 goto out; 913 es = (struct ext2fs *)bp->b_data; 914 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 915 error = EINVAL; /* XXX needs translation */ 916 goto out; 917 } 918 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 919 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 920 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 921 printf( 922 "WARNING: Filesystem was not properly dismounted\n"); 923 } else { 924 printf( 925 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 926 error = EPERM; 927 goto out; 928 } 929 } 930 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 931 932 /* 933 * I don't know whether this is the right strategy. Note that 934 * we dynamically allocate both an m_ext2fs and an ext2fs 935 * while Linux keeps the super block in a locked buffer. 936 */ 937 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 938 M_EXT2MNT, M_WAITOK | M_ZERO); 939 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 940 M_EXT2MNT, M_WAITOK); 941 mtx_init(EXT2_MTX(ump), "EXT2FS Lock"); 942 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 943 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 944 goto out; 945 946 /* 947 * Calculate the maximum contiguous blocks and size of cluster summary 948 * array. In FFS this is done by newfs; however, the superblock 949 * in ext2fs doesn't have these variables, so we can calculate 950 * them here. 951 */ 952 e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); 953 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 954 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 955 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 956 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 957 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 958 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 959 lp = ump->um_e2fs->e2fs_maxcluster; 960 sump = ump->um_e2fs->e2fs_clustersum; 961 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 962 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 963 sump->cs_init = 0; 964 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 965 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 966 } 967 } 968 969 brelse(bp); 970 bp = NULL; 971 fs = ump->um_e2fs; 972 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 973 974 /* 975 * If the fs is not mounted read-only, make sure the super block is 976 * always written back on a sync(). 977 */ 978 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 979 if (ronly == 0) { 980 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 981 fs->e2fs->e2fs_state = 982 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 983 } 984 mp->mnt_data = (qaddr_t)ump; 985 mp->mnt_stat.f_fsid.val[0] = devid_from_dev(dev); 986 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 987 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 988 mp->mnt_flag |= MNT_LOCAL; 989 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 990 ump->um_mountp = mp; 991 ump->um_dev = dev; 992 ump->um_devvp = devvp; 993 994 /* 995 * Setting those two parameters allowed us to use 996 * ufs_bmap w/o changse! 997 */ 998 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 999 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 1000 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 1001 dev->si_mountpoint = mp; 1002 1003 vfs_add_vnodeops(mp, &ext2_vnodeops, &mp->mnt_vn_norm_ops); 1004 vfs_add_vnodeops(mp, &ext2_specops, &mp->mnt_vn_spec_ops); 1005 vfs_add_vnodeops(mp, &ext2_fifoops, &mp->mnt_vn_fifo_ops); 1006 1007 if (ronly == 0) 1008 ext2_sbupdate(ump, MNT_WAIT); 1009 return (0); 1010 out: 1011 if (bp) 1012 brelse(bp); 1013 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1014 VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1015 vn_unlock(devvp); 1016 if (ump) { 1017 mtx_uninit(EXT2_MTX(ump)); 1018 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1019 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1020 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1021 free(ump->um_e2fs, M_EXT2MNT); 1022 free(ump, M_EXT2MNT); 1023 mp->mnt_data = NULL; 1024 } 1025 return (error); 1026 } 1027 1028 /* 1029 * Unmount system call. 1030 */ 1031 static int 1032 ext2_unmount(struct mount *mp, int mntflags) 1033 { 1034 struct ext2mount *ump; 1035 struct m_ext2fs *fs; 1036 struct csum *sump; 1037 int error, flags, i, ronly; 1038 1039 flags = 0; 1040 if (mntflags & MNT_FORCE) { 1041 if (mp->mnt_flag & MNT_ROOTFS) 1042 return (EINVAL); 1043 flags |= FORCECLOSE; 1044 } 1045 if ((error = ext2_flushfiles(mp, flags)) != 0) 1046 return (error); 1047 ump = VFSTOEXT2(mp); 1048 fs = ump->um_e2fs; 1049 ronly = fs->e2fs_ronly; 1050 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1051 if (fs->e2fs_wasvalid) 1052 fs->e2fs->e2fs_state = 1053 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1054 ext2_sbupdate(ump, MNT_WAIT); 1055 } 1056 1057 ump->um_devvp->v_rdev->si_mountpoint = NULL; 1058 1059 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1060 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1061 vn_unlock(ump->um_devvp); 1062 1063 vrele(ump->um_devvp); 1064 sump = fs->e2fs_clustersum; 1065 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1066 free(sump->cs_sum, M_EXT2MNT); 1067 free(fs->e2fs_clustersum, M_EXT2MNT); 1068 free(fs->e2fs_maxcluster, M_EXT2MNT); 1069 free(fs->e2fs_gd, M_EXT2MNT); 1070 free(fs->e2fs_contigdirs, M_EXT2MNT); 1071 free(fs->e2fs, M_EXT2MNT); 1072 free(fs, M_EXT2MNT); 1073 free(ump, M_EXT2MNT); 1074 mp->mnt_data = NULL; 1075 mp->mnt_flag &= ~MNT_LOCAL; 1076 return (error); 1077 } 1078 1079 /* 1080 * Flush out all the files in a filesystem. 1081 */ 1082 static int 1083 ext2_flushfiles(struct mount *mp, int flags) 1084 { 1085 int error; 1086 1087 error = vflush(mp, 0, flags); 1088 return (error); 1089 } 1090 1091 /* 1092 * Get filesystem statistics. 1093 */ 1094 static int 1095 ext2_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1096 { 1097 struct ext2mount *ump; 1098 struct m_ext2fs *fs; 1099 uint32_t overhead, overhead_per_group, ngdb; 1100 int i, ngroups; 1101 1102 ump = VFSTOEXT2(mp); 1103 fs = ump->um_e2fs; 1104 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1105 panic("ext2_statfs"); 1106 1107 /* 1108 * Compute the overhead (FS structures) 1109 */ 1110 overhead_per_group = 1111 1 /* block bitmap */ + 1112 1 /* inode bitmap */ + 1113 fs->e2fs_itpg; 1114 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1115 fs->e2fs_gcount * overhead_per_group; 1116 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1117 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1118 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1119 if (ext2_cg_has_sb(fs, i)) 1120 ngroups++; 1121 } 1122 } else { 1123 ngroups = fs->e2fs_gcount; 1124 } 1125 ngdb = fs->e2fs_gdbcount; 1126 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1127 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1128 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1129 overhead += ngroups * (1 /* superblock */ + ngdb); 1130 1131 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1132 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1133 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1134 sbp->f_blocks = fs->e2fs_bcount - overhead; 1135 sbp->f_bfree = fs->e2fs_fbcount; 1136 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1137 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1138 sbp->f_ffree = fs->e2fs_ficount; 1139 if (sbp != &mp->mnt_stat) { 1140 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 1141 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 1142 } 1143 return (0); 1144 } 1145 1146 static int 1147 ext2_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1148 { 1149 struct ext2mount *ump; 1150 struct m_ext2fs *fs; 1151 uint32_t overhead, overhead_per_group, ngdb; 1152 int i, ngroups; 1153 1154 ump = VFSTOEXT2(mp); 1155 fs = ump->um_e2fs; 1156 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1157 panic("ext2_statfs"); 1158 1159 /* 1160 * Compute the overhead (FS structures) 1161 */ 1162 overhead_per_group = 1163 1 /* block bitmap */ + 1164 1 /* inode bitmap */ + 1165 fs->e2fs_itpg; 1166 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1167 fs->e2fs_gcount * overhead_per_group; 1168 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1169 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1170 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1171 if (ext2_cg_has_sb(fs, i)) 1172 ngroups++; 1173 } 1174 } else { 1175 ngroups = fs->e2fs_gcount; 1176 } 1177 ngdb = fs->e2fs_gdbcount; 1178 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1179 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1180 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1181 overhead += ngroups * (1 /* superblock */ + ngdb); 1182 1183 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1184 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1185 sbp->f_frsize = EXT2_BLOCK_SIZE(fs); 1186 sbp->f_blocks = fs->e2fs_bcount - overhead; 1187 sbp->f_bfree = fs->e2fs_fbcount; 1188 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1189 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1190 sbp->f_ffree = fs->e2fs_ficount; 1191 return (0); 1192 } 1193 1194 static int 1195 ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) 1196 { 1197 struct scaninfo *info = data; 1198 struct inode *ip; 1199 int error; 1200 1201 ip = VTOI(vp); 1202 if (vp->v_type == VNON || 1203 ((ip->i_flag & 1204 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1205 (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { 1206 return (0); 1207 } 1208 if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) 1209 info->allerror = error; 1210 return (0); 1211 } 1212 1213 /* 1214 * Go through the disk queues to initiate sandbagged IO; 1215 * go through the inodes to write those that have been modified; 1216 * initiate the writing of the super block if it has been modified. 1217 * 1218 * Note: we are always called with the filesystem marked `MPBUSY'. 1219 */ 1220 static int 1221 ext2_sync(struct mount *mp, int waitfor) 1222 { 1223 struct ext2mount *ump = VFSTOEXT2(mp); 1224 struct m_ext2fs *fs; 1225 struct scaninfo scaninfo; 1226 int error; 1227 1228 fs = ump->um_e2fs; 1229 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1230 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1231 } 1232 1233 /* 1234 * Write back each (modified) inode. 1235 */ 1236 scaninfo.allerror = 0; 1237 scaninfo.rescan = 1; 1238 scaninfo.waitfor = waitfor; 1239 while (scaninfo.rescan) { 1240 scaninfo.rescan = 0; 1241 vmntvnodescan(mp, VMSC_GETVP | VMSC_NOWAIT, 1242 NULL, ext2_sync_scan, &scaninfo); 1243 } 1244 1245 /* 1246 * Force stale filesystem control information to be flushed. 1247 */ 1248 if ((waitfor & MNT_LAZY) == 0) { 1249 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1250 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) 1251 scaninfo.allerror = error; 1252 vn_unlock(ump->um_devvp); 1253 } 1254 1255 /* 1256 * Write back modified superblock. 1257 */ 1258 if (fs->e2fs_fmod != 0) { 1259 fs->e2fs_fmod = 0; 1260 fs->e2fs->e2fs_wtime = htole32(time_second); 1261 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1262 scaninfo.allerror = error; 1263 } 1264 return (scaninfo.allerror); 1265 } 1266 1267 int 1268 ext2_alloc_vnode(struct mount *mp, ino_t ino, struct vnode **vpp) 1269 { 1270 struct ext2mount *ump; 1271 struct vnode *vp; 1272 struct inode *ip; 1273 int error; 1274 1275 ump = VFSTOEXT2(mp); 1276 /* 1277 * Lock out the creation of new entries in the FFS hash table in 1278 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1279 * may occur! 1280 */ 1281 if (ext2fs_inode_hash_lock) { 1282 while (ext2fs_inode_hash_lock) { 1283 ext2fs_inode_hash_lock = -1; 1284 tsleep(&ext2fs_inode_hash_lock, 0, "e2vget", 0); 1285 } 1286 return (-1); 1287 } 1288 ext2fs_inode_hash_lock = 1; 1289 1290 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1291 1292 /* Allocate a new vnode/inode. */ 1293 if ((error = getnewvnode(VT_EXT2FS, mp, &vp, VLKTIMEOUT, 1294 LK_CANRECURSE)) != 0) { 1295 if (ext2fs_inode_hash_lock < 0) 1296 wakeup(&ext2fs_inode_hash_lock); 1297 ext2fs_inode_hash_lock = 0; 1298 *vpp = NULL; 1299 free(ip, M_EXT2NODE); 1300 return (error); 1301 } 1302 //lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1303 vp->v_data = ip; 1304 ip->i_vnode = vp; 1305 ip->i_e2fs = ump->um_e2fs; 1306 ip->i_dev = ump->um_dev; 1307 ip->i_ump = ump; 1308 ip->i_number = ino; 1309 ip->i_block_group = ino_to_cg(ip->i_e2fs, ino); 1310 ip->i_next_alloc_block = 0; 1311 ip->i_next_alloc_goal = 0; 1312 1313 /* 1314 * Put it onto its hash chain. Since our vnode is locked, other 1315 * requests for this inode will block if they arrive while we are 1316 * sleeping waiting for old data structures to be purged or for the 1317 * contents of the disk portion of this inode to be read. 1318 */ 1319 if (ext2_ihashins(ip)) { 1320 printf("ext2_alloc_vnode: ihashins collision, retrying inode %ld\n", 1321 (long)ip->i_number); 1322 *vpp = NULL; 1323 vp->v_type = VBAD; 1324 vx_put(vp); 1325 free(ip, M_EXT2NODE); 1326 return (-1); 1327 } 1328 1329 if (ext2fs_inode_hash_lock < 0) 1330 wakeup(&ext2fs_inode_hash_lock); 1331 ext2fs_inode_hash_lock = 0; 1332 *vpp = vp; 1333 1334 return (0); 1335 } 1336 1337 /* 1338 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1339 * in from disk. If it is in core, wait for the lock bit to clear, then 1340 * return the inode locked. Detection and handling of mount points must be 1341 * done by the calling routine. 1342 */ 1343 static int 1344 ext2_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) 1345 { 1346 struct m_ext2fs *fs; 1347 struct inode *ip; 1348 struct ext2mount *ump; 1349 struct buf *bp; 1350 struct vnode *vp; 1351 unsigned int i, used_blocks; 1352 int error; 1353 1354 ump = VFSTOEXT2(mp); 1355 restart: 1356 if ((*vpp = ext2_ihashget(ump->um_dev, ino)) != NULL) 1357 return (0); 1358 if (ext2_alloc_vnode(mp, ino, &vp) == -1) 1359 goto restart; 1360 ip = VTOI(vp); 1361 fs = ip->i_e2fs; 1362 1363 /* Read in the disk contents for the inode, copy into the inode. */ 1364 if ((error = bread(ump->um_devvp, fsbtodoff(fs, ino_to_fsba(fs, ino)), 1365 (int)fs->e2fs_bsize, &bp)) != 0) { 1366 /* 1367 * The inode does not contain anything useful, so it would 1368 * be misleading to leave it on its hash chain. With mode 1369 * still zero, it will be unlinked and returned to the free 1370 * list by vput(). 1371 */ 1372 vp->v_type = VBAD; 1373 brelse(bp); 1374 vx_put(vp); 1375 *vpp = NULL; 1376 return (error); 1377 } 1378 /* convert ext2 inode to dinode */ 1379 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1380 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1381 if (error) { 1382 brelse(bp); 1383 vx_put(vp); 1384 *vpp = NULL; 1385 return (error); 1386 } 1387 1388 /* 1389 * Now we want to make sure that block pointers for unused 1390 * blocks are zeroed out - ext2_balloc depends on this 1391 * although for regular files and directories only 1392 * 1393 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1394 * out because we could corrupt the extent tree. 1395 */ 1396 if (!(ip->i_flag & IN_E4EXTENTS) && 1397 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1398 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1399 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1400 ip->i_db[i] = 0; 1401 } 1402 #ifdef EXT2FS_PRINT_EXTENTS 1403 ext2_print_inode(ip); 1404 ext4_ext_print_extent_tree_status(ip); 1405 #endif 1406 bqrelse(bp); 1407 1408 /* 1409 * Initialize the vnode from the inode, check for aliases. 1410 * Note that the underlying vnode may have changed. 1411 */ 1412 if ((error = ext2_vinit(mp, &vp)) != 0) { 1413 vx_put(vp); 1414 *vpp = NULL; 1415 return (error); 1416 } 1417 1418 /* 1419 * Finish inode initialization now that aliasing has been resolved. 1420 */ 1421 vref(ip->i_devvp); 1422 /* 1423 * Set up a generation number for this inode if it does not 1424 * already have one. This should only happen on old filesystems. 1425 */ 1426 if (ip->i_gen == 0) { 1427 ip->i_gen = krandom() / 2 + 1; 1428 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1429 ip->i_flag |= IN_MODIFIED; 1430 } 1431 /* 1432 * Return the locked and refd vnode. 1433 */ 1434 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 1435 *vpp = vp; 1436 1437 return (0); 1438 } 1439 1440 /* 1441 * File handle to vnode 1442 * 1443 * Have to be really careful about stale file handles: 1444 * - check that the inode number is valid 1445 * - call ext2_vget() to get the locked inode 1446 * - check for an unallocated inode (i_mode == 0) 1447 * - check that the given client host has export rights and return 1448 * those rights via. exflagsp and credanonp 1449 */ 1450 static int 1451 ext2_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 1452 struct vnode **vpp) 1453 { 1454 struct inode *ip; 1455 struct ufid *ufhp; 1456 struct vnode *nvp; 1457 struct m_ext2fs *fs; 1458 int error; 1459 1460 ufhp = (struct ufid *)fhp; 1461 fs = VFSTOEXT2(mp)->um_e2fs; 1462 if (ufhp->ufid_ino < EXT2_ROOTINO || 1463 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1464 return (ESTALE); 1465 1466 error = VFS_VGET(mp, NULL, ufhp->ufid_ino, &nvp); 1467 if (error) { 1468 *vpp = NULLVP; 1469 return (error); 1470 } 1471 ip = VTOI(nvp); 1472 if (ip->i_mode == 0 || 1473 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1474 vput(nvp); 1475 *vpp = NULLVP; 1476 return (ESTALE); 1477 } 1478 *vpp = nvp; 1479 return (0); 1480 } 1481 1482 /* 1483 * Vnode pointer to File handle 1484 */ 1485 /* ARGSUSED */ 1486 static int 1487 ext2_vptofh(struct vnode *vp, struct fid *fhp) 1488 { 1489 struct inode *ip; 1490 struct ufid *ufhp; 1491 1492 ip = VTOI(vp); 1493 ufhp = (struct ufid *)fhp; 1494 ufhp->ufid_len = sizeof(struct ufid); 1495 ufhp->ufid_ino = ip->i_number; 1496 ufhp->ufid_gen = ip->i_gen; 1497 return (0); 1498 } 1499 1500 /* 1501 * This is the generic part of fhtovp called after the underlying 1502 * filesystem has validated the file handle. 1503 * 1504 * Verify that a host should have access to a filesystem. 1505 */ 1506 static int 1507 ext2_check_export(struct mount *mp, struct sockaddr *nam, int *exflagsp, 1508 struct ucred **credanonp) 1509 { 1510 struct netcred *np; 1511 struct ext2mount *ump; 1512 1513 ump = VFSTOEXT2(mp); 1514 /* 1515 * Get the export permission structure for this <mp, client> tuple. 1516 */ 1517 np = vfs_export_lookup(mp, &ump->um_export, nam); 1518 if (np == NULL) 1519 return (EACCES); 1520 1521 *exflagsp = np->netc_exflags; 1522 *credanonp = &np->netc_anon; 1523 return (0); 1524 } 1525 1526 /* 1527 * Write a superblock and associated information back to disk. 1528 */ 1529 static int 1530 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1531 { 1532 struct m_ext2fs *fs = mp->um_e2fs; 1533 struct ext2fs *es = fs->e2fs; 1534 struct buf *bp; 1535 int error = 0; 1536 1537 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1538 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1539 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1540 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1541 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1542 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1543 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1544 } 1545 1546 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1547 1548 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1549 ext2_sb_csum_set(fs); 1550 1551 bp = getblk(mp->um_devvp, SBOFF, SBSIZE, 0, 0); 1552 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1553 if (waitfor == MNT_WAIT) 1554 error = bwrite(bp); 1555 else 1556 bawrite(bp); 1557 1558 /* 1559 * The buffers for group descriptors, inode bitmaps and block bitmaps 1560 * are not busy at this point and are (hopefully) written by the 1561 * usual sync mechanism. No need to write them here. 1562 */ 1563 return (error); 1564 } 1565 1566 static int 1567 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1568 { 1569 struct m_ext2fs *fs = mp->um_e2fs; 1570 struct buf *bp; 1571 int i, j, g_count = 0, error = 0, allerror = 0; 1572 1573 allerror = ext2_sbupdate(mp, waitfor); 1574 1575 /* Update gd csums */ 1576 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1577 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1578 ext2_gd_csum_set(fs); 1579 1580 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1581 bp = getblk(mp->um_devvp, fsbtodoff(fs, 1582 ext2_cg_location(fs, i)), 1583 fs->e2fs_bsize, 0, 0); 1584 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1585 memcpy(bp->b_data, &fs->e2fs_gd[ 1586 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1587 fs->e2fs_bsize); 1588 } else { 1589 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1590 g_count < fs->e2fs_gcount; j++, g_count++) 1591 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1592 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1593 } 1594 if (waitfor == MNT_WAIT) 1595 error = bwrite(bp); 1596 else 1597 bawrite(bp); 1598 } 1599 1600 if (!allerror && error) 1601 allerror = error; 1602 return (allerror); 1603 } 1604 1605 /* 1606 * Return the root of a filesystem. 1607 */ 1608 static int 1609 ext2_root(struct mount *mp, struct vnode **vpp) 1610 { 1611 struct vnode *nvp; 1612 int error; 1613 1614 error = VFS_VGET(mp, NULL, (ino_t)EXT2_ROOTINO, &nvp); 1615 if (error) 1616 return (error); 1617 *vpp = nvp; 1618 return (0); 1619 } 1620 1621 /* 1622 * Initialize ext2 filesystems, done only once. 1623 */ 1624 static int 1625 ext2_init(struct vfsconf *vfsp) 1626 { 1627 static int done; 1628 1629 if (done) 1630 return (0); 1631 done = 1; 1632 ext2_ihashinit(); 1633 1634 return (0); 1635 } 1636 1637 static int 1638 ext2_uninit(struct vfsconf *vfsp) 1639 { 1640 1641 ext2_ihashuninit(); 1642 1643 return (0); 1644 } 1645