1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/caps.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf2.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/stat.h> 56 #include <sys/mutex2.h> 57 #include <sys/nlookup.h> 58 59 #include <vfs/ext2fs/fs.h> 60 #include <vfs/ext2fs/ext2_mount.h> 61 #include <vfs/ext2fs/inode.h> 62 63 #include <vfs/ext2fs/ext2fs.h> 64 #include <vfs/ext2fs/ext2_dinode.h> 65 #include <vfs/ext2fs/ext2_extern.h> 66 #include <vfs/ext2fs/ext2_extents.h> 67 68 SDT_PROVIDER_DECLARE(ext2fs); 69 /* 70 * ext2fs trace probe: 71 * arg0: verbosity. Higher numbers give more verbose messages 72 * arg1: Textual message 73 */ 74 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 75 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 76 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 77 78 static int ext2_flushfiles(struct mount *mp, int flags); 79 static int ext2_mountfs(struct vnode *, struct mount *); 80 static int ext2_reload(struct mount *mp); 81 static int ext2_sbupdate(struct ext2mount *, int); 82 static int ext2_cgupdate(struct ext2mount *, int); 83 static int ext2_init(struct vfsconf *); 84 static int ext2_uninit(struct vfsconf *); 85 static vfs_unmount_t ext2_unmount; 86 static vfs_root_t ext2_root; 87 static vfs_statfs_t ext2_statfs; 88 static vfs_statvfs_t ext2_statvfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_vptofh_t ext2_vptofh; 93 static vfs_checkexp_t ext2_check_export; 94 static vfs_mount_t ext2_mount; 95 96 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 97 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 98 99 static struct vfsops ext2fs_vfsops = { 100 .vfs_flags = 0, 101 .vfs_mount = ext2_mount, 102 .vfs_unmount = ext2_unmount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_statvfs = ext2_statvfs, 106 .vfs_sync = ext2_sync, 107 .vfs_vget = ext2_vget, 108 .vfs_fhtovp = ext2_fhtovp, 109 .vfs_vptofh = ext2_vptofh, 110 .vfs_checkexp = ext2_check_export, 111 .vfs_init = ext2_init, 112 .vfs_uninit = ext2_uninit 113 }; 114 115 VFS_SET(ext2fs_vfsops, ext2fs, VFCF_MPSAFE); 116 MODULE_VERSION(ext2fs, 1); 117 118 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 119 int ronly); 120 static int ext2_compute_sb_data(struct vnode * devvp, 121 struct ext2fs * es, struct m_ext2fs * fs); 122 123 static int ext2fs_inode_hash_lock; 124 125 /* 126 * VFS Operations. 127 * 128 * mount system call 129 */ 130 static int 131 ext2_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 132 { 133 struct ext2_args args; 134 struct vnode *devvp; 135 struct ext2mount *ump = NULL; 136 struct m_ext2fs *fs; 137 struct nlookupdata nd; 138 mode_t accmode; 139 int error, flags; 140 size_t size; 141 142 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ext2_args))) != 0) 143 return (error); 144 145 /* 146 * If updating, check whether changing from read-only to 147 * read/write; if there is no device name, that's all we do. 148 */ 149 if (mp->mnt_flag & MNT_UPDATE) { 150 ump = VFSTOEXT2(mp); 151 fs = ump->um_e2fs; 152 devvp = ump->um_devvp; 153 error = 0; 154 if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 155 error = VFS_SYNC(mp, MNT_WAIT); 156 if (error) 157 return (error); 158 flags = WRITECLOSE; 159 if (mp->mnt_flag & MNT_FORCE) 160 flags |= FORCECLOSE; 161 if (vfs_busy(mp, LK_NOWAIT)) 162 return (EBUSY); 163 error = ext2_flushfiles(mp, flags); 164 vfs_unbusy(mp); 165 if (error == 0 && fs->e2fs_wasvalid && 166 ext2_cgupdate(ump, MNT_WAIT) == 0) { 167 fs->e2fs->e2fs_state = 168 htole16((le16toh(fs->e2fs->e2fs_state) | 169 E2FS_ISCLEAN)); 170 ext2_sbupdate(ump, MNT_WAIT); 171 } 172 fs->e2fs_ronly = 1; 173 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 174 VOP_OPEN(devvp, FREAD, FSCRED, NULL); 175 VOP_CLOSE(devvp, FREAD | FWRITE, NULL); 176 vn_unlock(devvp); 177 } 178 if (!error && (mp->mnt_flag & MNT_RELOAD)) 179 error = ext2_reload(mp); 180 if (error) 181 return (error); 182 devvp = ump->um_devvp; 183 if (fs->e2fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 184 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 185 return (EPERM); 186 187 /* 188 * If upgrade to read-write by non-root, then verify 189 * that user has necessary permissions on the device. 190 */ 191 if (cred->cr_uid != 0) { 192 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 193 error = VOP_EACCESS(devvp, VREAD | VWRITE, cred); 194 if (error) { 195 vn_unlock(devvp); 196 return (error); 197 } 198 vn_unlock(devvp); 199 } 200 201 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 202 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 203 if (mp->mnt_flag & MNT_FORCE) { 204 printf( 205 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 206 } else { 207 printf( 208 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 209 fs->e2fs_fsmnt); 210 return (EPERM); 211 } 212 } 213 fs->e2fs->e2fs_state = 214 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 215 (void)ext2_cgupdate(ump, MNT_WAIT); 216 fs->e2fs_ronly = 0; 217 mp->mnt_flag &= ~MNT_RDONLY; 218 219 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 220 VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, NULL); 221 VOP_CLOSE(devvp, FREAD, NULL); 222 vn_unlock(devvp); 223 } 224 if (args.fspec == NULL) { 225 /* 226 * Process export requests. 227 */ 228 return (vfs_export(mp, &ump->um_export, &args.export)); 229 } 230 } 231 232 /* 233 * Not an update, or updating the name: look up the name 234 * and verify that it refers to a sensible disk device. 235 */ 236 devvp = NULL; 237 error = nlookup_init(&nd, args.fspec, UIO_USERSPACE, NLC_FOLLOW); 238 if (error == 0) 239 error = nlookup(&nd); 240 if (error == 0) 241 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 242 nlookup_done(&nd); 243 if (error) 244 return (error); 245 246 if (!vn_isdisk(devvp, &error)) { 247 vrele(devvp); 248 return (error); 249 } 250 251 /* 252 * If mount by non-root, then verify that user has necessary 253 * permissions on the device. 254 * 255 * XXXRW: VOP_ACCESS() enough? 256 */ 257 if (cred->cr_uid != 0) { 258 accmode = VREAD; 259 if ((mp->mnt_flag & MNT_RDONLY) == 0) 260 accmode |= VWRITE; 261 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 262 if ((error = VOP_EACCESS(devvp, accmode, cred)) != 0) { 263 vput(devvp); 264 return (error); 265 } 266 vn_unlock(devvp); 267 } 268 269 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 270 error = ext2_mountfs(devvp, mp); 271 } else { 272 if (devvp != ump->um_devvp) 273 error = EINVAL; /* needs translation */ 274 else 275 vrele(devvp); 276 } 277 if (error) { 278 vrele(devvp); 279 return (error); 280 } 281 ump = VFSTOEXT2(mp); 282 fs = ump->um_e2fs; 283 284 /* 285 * Note that this strncpy() is ok because of a check at the start 286 * of ext2_mount(). 287 */ 288 copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1, &size); 289 bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size); 290 copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 291 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 292 ext2_statfs(mp, &mp->mnt_stat, cred); 293 return (0); 294 } 295 296 static int 297 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 298 { 299 uint32_t i, mask; 300 301 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 302 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 303 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 304 return (1); 305 } 306 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 307 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 308 if (mask) { 309 printf("WARNING: mount of %s denied due to " 310 "unsupported optional features:\n", devtoname(dev)); 311 for (i = 0; 312 i < sizeof(incompat)/sizeof(struct ext2_feature); 313 i++) 314 if (mask & incompat[i].mask) 315 printf("%s ", incompat[i].name); 316 printf("\n"); 317 return (1); 318 } 319 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 320 if (!ronly && mask) { 321 printf("WARNING: R/W mount of %s denied due to " 322 "unsupported optional features:\n", devtoname(dev)); 323 for (i = 0; 324 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 325 i++) 326 if (mask & ro_compat[i].mask) 327 printf("%s ", ro_compat[i].name); 328 printf("\n"); 329 return (1); 330 } 331 } 332 return (0); 333 } 334 335 static e4fs_daddr_t 336 ext2_cg_location(struct m_ext2fs *fs, int number) 337 { 338 int cg, descpb, logical_sb, has_super = 0; 339 340 /* 341 * Adjust logical superblock block number. 342 * Godmar thinks: if the blocksize is greater than 1024, then 343 * the superblock is logically part of block zero. 344 */ 345 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 346 347 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 348 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 349 return (logical_sb + number + 1); 350 351 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 352 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 353 else 354 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 355 356 cg = descpb * number; 357 358 if (ext2_cg_has_sb(fs, cg)) 359 has_super = 1; 360 361 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 362 le32toh(fs->e2fs->e2fs_first_dblock)); 363 } 364 365 static int 366 ext2_cg_validate(struct m_ext2fs *fs) 367 { 368 uint64_t b_bitmap; 369 uint64_t i_bitmap; 370 uint64_t i_tables; 371 uint64_t first_block, last_block, last_cg_block; 372 struct ext2_gd *gd; 373 unsigned int i, cg_count; 374 375 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 376 last_cg_block = ext2_cg_number_gdb(fs, 0); 377 cg_count = fs->e2fs_gcount; 378 379 for (i = 0; i < fs->e2fs_gcount; i++) { 380 gd = &fs->e2fs_gd[i]; 381 382 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 383 i == fs->e2fs_gcount - 1) { 384 last_block = fs->e2fs_bcount - 1; 385 } else { 386 last_block = first_block + 387 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 388 } 389 390 if ((cg_count == fs->e2fs_gcount) && 391 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 392 cg_count = i; 393 394 b_bitmap = e2fs_gd_get_b_bitmap(gd); 395 if (b_bitmap == 0) { 396 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 397 "block bitmap is zero", i); 398 return (EINVAL); 399 } 400 if (b_bitmap <= last_cg_block) { 401 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 402 "block bitmap overlaps gds", i); 403 return (EINVAL); 404 } 405 if (b_bitmap < first_block || b_bitmap > last_block) { 406 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 407 "block bitmap not in group", i); 408 return (EINVAL); 409 } 410 411 i_bitmap = e2fs_gd_get_i_bitmap(gd); 412 if (i_bitmap == 0) { 413 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 414 "inode bitmap is zero", i); 415 return (EINVAL); 416 } 417 if (i_bitmap <= last_cg_block) { 418 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 419 "inode bitmap overlaps gds", i); 420 return (EINVAL); 421 } 422 if (i_bitmap < first_block || i_bitmap > last_block) { 423 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 424 "inode bitmap not in group blk", i); 425 return (EINVAL); 426 } 427 428 i_tables = e2fs_gd_get_i_tables(gd); 429 if (i_tables == 0) { 430 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 431 "inode table is zero", i); 432 return (EINVAL); 433 } 434 if (i_tables <= last_cg_block) { 435 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 436 "inode tables overlaps gds", i); 437 return (EINVAL); 438 } 439 if (i_tables < first_block || 440 i_tables + fs->e2fs_itpg - 1 > last_block) { 441 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 442 "inode tables not in group blk", i); 443 return (EINVAL); 444 } 445 446 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 447 first_block += EXT2_BLOCKS_PER_GROUP(fs); 448 } 449 450 return (0); 451 } 452 453 /* 454 * This computes the fields of the m_ext2fs structure from the 455 * data in the ext2fs structure read in. 456 */ 457 static int 458 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 459 struct m_ext2fs *fs) 460 { 461 struct buf *bp; 462 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 463 int i, j; 464 int g_count = 0; 465 int error; 466 467 /* Check checksum features */ 468 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 469 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 470 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 471 "incorrect checksum features combination"); 472 return (EINVAL); 473 } 474 475 /* Precompute checksum seed for all metadata */ 476 ext2_sb_csum_set_seed(fs); 477 478 /* Verify sb csum if possible */ 479 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 480 error = ext2_sb_csum_verify(fs); 481 if (error) { 482 return (error); 483 } 484 } 485 486 /* Check for block size = 1K|2K|4K */ 487 if (le32toh(es->e2fs_log_bsize) > 2) { 488 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 489 "bad block size"); 490 return (EINVAL); 491 } 492 493 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 494 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 495 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 496 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 497 498 /* Check for fragment size */ 499 if (le32toh(es->e2fs_log_fsize) > 500 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 501 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 502 "invalid log cluster size"); 503 return (EINVAL); 504 } 505 506 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 507 if (fs->e2fs_fsize != fs->e2fs_bsize) { 508 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 509 "fragment size != block size"); 510 return (EINVAL); 511 } 512 513 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 514 515 /* Check reserved gdt blocks for future filesystem expansion */ 516 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 517 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 518 "number of reserved GDT blocks too large"); 519 return (EINVAL); 520 } 521 522 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 523 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 524 } else { 525 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 526 527 /* 528 * Check first ino. 529 */ 530 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 531 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 532 "invalid first ino"); 533 return (EINVAL); 534 } 535 536 /* 537 * Simple sanity check for superblock inode size value. 538 */ 539 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 540 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 541 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 542 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 543 "invalid inode size"); 544 return (EINVAL); 545 } 546 } 547 548 /* Check group descriptors */ 549 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 550 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 551 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 552 "unsupported 64bit descriptor size"); 553 return (EINVAL); 554 } 555 556 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 557 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 558 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 559 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 560 "zero blocks/fragments per group"); 561 return (EINVAL); 562 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 563 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 564 "blocks per group not equal fragments per group"); 565 return (EINVAL); 566 } 567 568 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 569 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 570 "non-standard group size unsupported"); 571 return (EINVAL); 572 } 573 574 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 575 if (fs->e2fs_ipb == 0 || 576 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 577 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 578 "bad inodes per block size"); 579 return (EINVAL); 580 } 581 582 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 583 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 584 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 585 "invalid inodes per group"); 586 return (EINVAL); 587 } 588 589 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 590 591 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 592 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 593 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 594 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 595 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 596 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 597 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 598 } 599 if (fs->e2fs_rbcount > fs->e2fs_bcount || 600 fs->e2fs_fbcount > fs->e2fs_bcount) { 601 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 602 "invalid block count"); 603 return (EINVAL); 604 } 605 606 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 607 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 608 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 609 "invalid number of free inodes"); 610 return (EINVAL); 611 } 612 613 if (le32toh(es->e2fs_first_dblock) != (fs->e2fs_bsize > 1024 ? 0 : 1) || 614 le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 615 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 616 "first data block out of range"); 617 return (EINVAL); 618 } 619 620 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 621 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 622 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 623 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 624 "groups count too large"); 625 return (EINVAL); 626 } 627 628 /* Check for extra isize in big inodes. */ 629 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 630 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 631 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 632 "no space for extra inode timestamps"); 633 return (EINVAL); 634 } 635 636 /* s_resuid / s_resgid ? */ 637 638 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 639 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 640 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 641 } else { 642 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 643 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 644 fs->e2fs_bsize / sizeof(struct ext2_gd)); 645 } 646 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 647 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 648 M_EXT2MNT, M_WAITOK | M_ZERO); 649 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 650 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 651 652 for (i = 0; i < fs->e2fs_gdbcount; i++) { 653 error = bread(devvp, fsbtodoff(fs, ext2_cg_location(fs, i)), 654 fs->e2fs_bsize, &bp); 655 if (error) { 656 /* 657 * fs->e2fs_gd and fs->e2fs_contigdirs 658 * will be freed later by the caller, 659 * because this function could be called from 660 * MNT_UPDATE path. 661 */ 662 brelse(bp); 663 return (error); 664 } 665 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 666 memcpy(&fs->e2fs_gd[ 667 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 668 bp->b_data, fs->e2fs_bsize); 669 } else { 670 for (j = 0; j < e2fs_descpb && 671 g_count < fs->e2fs_gcount; j++, g_count++) 672 memcpy(&fs->e2fs_gd[g_count], 673 bp->b_data + j * E2FS_REV0_GD_SIZE, 674 E2FS_REV0_GD_SIZE); 675 } 676 brelse(bp); 677 bp = NULL; 678 } 679 680 /* Validate cgs consistency */ 681 error = ext2_cg_validate(fs); 682 if (error) 683 return (error); 684 685 /* Verfy cgs csum */ 686 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 687 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 688 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 689 if (error) 690 return (error); 691 } 692 /* Initialization for the ext2 Orlov allocator variant. */ 693 fs->e2fs_total_dir = 0; 694 for (i = 0; i < fs->e2fs_gcount; i++) 695 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 696 697 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 698 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 699 fs->e2fs_maxfilesize = 0x7fffffff; 700 else { 701 fs->e2fs_maxfilesize = 0xffffffffffff; 702 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 703 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 704 } 705 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 706 fs->e2fs_uhash = 3; 707 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 708 #ifdef __CHAR_UNSIGNED__ 709 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 710 fs->e2fs_uhash = 3; 711 #else 712 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 713 #endif 714 } 715 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 716 error = ext2_sb_csum_verify(fs); 717 718 return (error); 719 } 720 721 struct scaninfo { 722 int rescan; 723 int allerror; 724 int waitfor; 725 struct vnode *devvp; 726 struct m_ext2fs *fs; 727 }; 728 729 static int 730 ext2_reload_scan(struct mount *mp, struct vnode *vp, void *data) 731 { 732 struct scaninfo *info = data; 733 struct inode *ip; 734 struct buf *bp; 735 int error; 736 737 /* 738 * Try to recycle 739 */ 740 if (vrecycle(vp)) 741 return (0); 742 743 /* 744 * Step 1: invalidate all cached file data. 745 */ 746 if (vinvalbuf(vp, 0, 0, 0)) 747 panic("ext2_reload: dirty2"); 748 /* 749 * Step 2: re-read inode data for all active vnodes. 750 */ 751 ip = VTOI(vp); 752 error = bread(info->devvp, 753 fsbtodoff(info->fs, ino_to_fsba(info->fs, ip->i_number)), 754 (int)info->fs->e2fs_bsize, &bp); 755 if (error) { 756 brelse(bp); 757 return (error); 758 } 759 760 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 761 EXT2_INODE_SIZE(info->fs) * ino_to_fsbo(info->fs, ip->i_number)), 762 ip); 763 764 brelse(bp); 765 return (error); 766 } 767 768 /* 769 * Reload all incore data for a filesystem (used after running fsck on 770 * the root filesystem and finding things to fix). The filesystem must 771 * be mounted read-only. 772 * 773 * Things to do to update the mount: 774 * 1) invalidate all cached meta-data. 775 * 2) re-read superblock from disk. 776 * 3) invalidate all cluster summary information. 777 * 4) invalidate all inactive vnodes. 778 * 5) invalidate all cached file data. 779 * 6) re-read inode data for all active vnodes. 780 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 781 */ 782 static int 783 ext2_reload(struct mount *mp) 784 { 785 struct vnode *devvp; 786 struct buf *bp; 787 struct ext2fs *es; 788 struct m_ext2fs *fs; 789 struct csum *sump; 790 struct scaninfo scaninfo; 791 int error, i; 792 int32_t *lp; 793 794 if ((mp->mnt_flag & MNT_RDONLY) == 0) 795 return (EINVAL); 796 /* 797 * Step 1: invalidate all cached meta-data. 798 */ 799 devvp = VFSTOEXT2(mp)->um_devvp; 800 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 801 if (vinvalbuf(devvp, 0, 0, 0) != 0) 802 panic("ext2_reload: dirty1"); 803 vn_unlock(devvp); 804 805 /* 806 * Step 2: re-read superblock from disk. 807 * constants have been adjusted for ext2 808 */ 809 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) { 810 brelse(bp); 811 return (error); 812 } 813 es = (struct ext2fs *)bp->b_data; 814 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 815 brelse(bp); 816 return (EIO); /* XXX needs translation */ 817 } 818 fs = VFSTOEXT2(mp)->um_e2fs; 819 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 820 821 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 822 brelse(bp); 823 return (error); 824 } 825 #ifdef UNKLAR 826 if (fs->fs_sbsize < SBSIZE) 827 bp->b_flags |= B_INVAL; 828 #endif 829 brelse(bp); 830 831 /* 832 * Step 3: invalidate all cluster summary information. 833 */ 834 if (fs->e2fs_contigsumsize > 0) { 835 lp = fs->e2fs_maxcluster; 836 sump = fs->e2fs_clustersum; 837 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 838 *lp++ = fs->e2fs_contigsumsize; 839 sump->cs_init = 0; 840 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 841 } 842 } 843 844 scaninfo.rescan = 1; 845 scaninfo.devvp = devvp; 846 scaninfo.fs = fs; 847 while (error == 0 && scaninfo.rescan) { 848 scaninfo.rescan = 0; 849 error = vmntvnodescan(mp, VMSC_GETVX, NULL, ext2_reload_scan, 850 &scaninfo); 851 } 852 return (error); 853 } 854 855 /* 856 * Common code for mount and mountroot. 857 */ 858 static int 859 ext2_mountfs(struct vnode *devvp, struct mount *mp) 860 { 861 struct ext2mount *ump; 862 struct buf *bp; 863 struct m_ext2fs *fs; 864 struct ext2fs *es; 865 struct cdev *dev = devvp->v_rdev; 866 struct csum *sump; 867 int error; 868 int ronly; 869 int i; 870 u_long size; 871 int32_t *lp; 872 int32_t e2fs_maxcontig; 873 874 /* 875 * Disallow multiple mounts of the same device. 876 * Disallow mounting of a device that is currently in use 877 * (except for root, which might share swap device for miniroot). 878 * Flush out any old buffers remaining from a previous use. 879 */ 880 if ((error = vfs_mountedon(devvp)) != 0) 881 return (error); 882 if (vcount(devvp) > 0) 883 return (EBUSY); 884 if ((error = vinvalbuf(devvp, V_SAVE, 0, 0)) != 0) 885 return (error); 886 #ifdef READONLY 887 /* Turn on this to force it to be read-only. */ 888 mp->mnt_flag |= MNT_RDONLY; 889 #endif 890 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 891 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 892 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, NULL); 893 vn_unlock(devvp); 894 if (error) 895 return (error); 896 897 if (devvp->v_rdev->si_iosize_max != 0) 898 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 899 if (mp->mnt_iosize_max > MAXPHYS) 900 mp->mnt_iosize_max = MAXPHYS; 901 902 bp = NULL; 903 ump = NULL; 904 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) 905 goto out; 906 es = (struct ext2fs *)bp->b_data; 907 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 908 error = EINVAL; /* XXX needs translation */ 909 goto out; 910 } 911 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 912 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 913 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 914 printf( 915 "WARNING: Filesystem was not properly dismounted\n"); 916 } else { 917 printf( 918 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 919 error = EPERM; 920 goto out; 921 } 922 } 923 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 924 925 /* 926 * I don't know whether this is the right strategy. Note that 927 * we dynamically allocate both an m_ext2fs and an ext2fs 928 * while Linux keeps the super block in a locked buffer. 929 */ 930 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 931 M_EXT2MNT, M_WAITOK | M_ZERO); 932 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 933 M_EXT2MNT, M_WAITOK); 934 mtx_init(EXT2_MTX(ump), "EXT2FS Lock"); 935 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 936 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 937 goto out; 938 939 /* 940 * Calculate the maximum contiguous blocks and size of cluster summary 941 * array. In FFS this is done by newfs; however, the superblock 942 * in ext2fs doesn't have these variables, so we can calculate 943 * them here. 944 */ 945 e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); 946 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 947 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 948 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 949 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 950 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 951 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 952 lp = ump->um_e2fs->e2fs_maxcluster; 953 sump = ump->um_e2fs->e2fs_clustersum; 954 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 955 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 956 sump->cs_init = 0; 957 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 958 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 959 } 960 } 961 962 brelse(bp); 963 bp = NULL; 964 fs = ump->um_e2fs; 965 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 966 967 /* 968 * If the fs is not mounted read-only, make sure the super block is 969 * always written back on a sync(). 970 */ 971 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 972 if (ronly == 0) { 973 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 974 fs->e2fs->e2fs_state = 975 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 976 } 977 mp->mnt_data = (qaddr_t)ump; 978 mp->mnt_stat.f_fsid.val[0] = devid_from_dev(dev); 979 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 980 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 981 mp->mnt_flag |= MNT_LOCAL; 982 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 983 ump->um_mountp = mp; 984 ump->um_dev = dev; 985 ump->um_devvp = devvp; 986 987 /* 988 * Setting those two parameters allowed us to use 989 * ufs_bmap w/o changse! 990 */ 991 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 992 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 993 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 994 dev->si_mountpoint = mp; 995 996 vfs_add_vnodeops(mp, &ext2_vnodeops, &mp->mnt_vn_norm_ops); 997 vfs_add_vnodeops(mp, &ext2_specops, &mp->mnt_vn_spec_ops); 998 vfs_add_vnodeops(mp, &ext2_fifoops, &mp->mnt_vn_fifo_ops); 999 1000 if (ronly == 0) 1001 ext2_sbupdate(ump, MNT_WAIT); 1002 return (0); 1003 out: 1004 if (bp) 1005 brelse(bp); 1006 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1007 VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1008 vn_unlock(devvp); 1009 if (ump) { 1010 mtx_uninit(EXT2_MTX(ump)); 1011 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1012 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1013 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1014 free(ump->um_e2fs, M_EXT2MNT); 1015 free(ump, M_EXT2MNT); 1016 mp->mnt_data = NULL; 1017 } 1018 return (error); 1019 } 1020 1021 /* 1022 * Unmount system call. 1023 */ 1024 static int 1025 ext2_unmount(struct mount *mp, int mntflags) 1026 { 1027 struct ext2mount *ump; 1028 struct m_ext2fs *fs; 1029 struct csum *sump; 1030 int error, flags, i, ronly; 1031 1032 flags = 0; 1033 if (mntflags & MNT_FORCE) { 1034 if (mp->mnt_flag & MNT_ROOTFS) 1035 return (EINVAL); 1036 flags |= FORCECLOSE; 1037 } 1038 if ((error = ext2_flushfiles(mp, flags)) != 0) 1039 return (error); 1040 ump = VFSTOEXT2(mp); 1041 fs = ump->um_e2fs; 1042 ronly = fs->e2fs_ronly; 1043 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1044 if (fs->e2fs_wasvalid) 1045 fs->e2fs->e2fs_state = 1046 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1047 ext2_sbupdate(ump, MNT_WAIT); 1048 } 1049 1050 ump->um_devvp->v_rdev->si_mountpoint = NULL; 1051 1052 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1053 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1054 vn_unlock(ump->um_devvp); 1055 1056 vrele(ump->um_devvp); 1057 sump = fs->e2fs_clustersum; 1058 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1059 free(sump->cs_sum, M_EXT2MNT); 1060 free(fs->e2fs_clustersum, M_EXT2MNT); 1061 free(fs->e2fs_maxcluster, M_EXT2MNT); 1062 free(fs->e2fs_gd, M_EXT2MNT); 1063 free(fs->e2fs_contigdirs, M_EXT2MNT); 1064 free(fs->e2fs, M_EXT2MNT); 1065 free(fs, M_EXT2MNT); 1066 free(ump, M_EXT2MNT); 1067 mp->mnt_data = NULL; 1068 mp->mnt_flag &= ~MNT_LOCAL; 1069 return (error); 1070 } 1071 1072 /* 1073 * Flush out all the files in a filesystem. 1074 */ 1075 static int 1076 ext2_flushfiles(struct mount *mp, int flags) 1077 { 1078 int error; 1079 1080 error = vflush(mp, 0, flags); 1081 return (error); 1082 } 1083 1084 /* 1085 * Get filesystem statistics. 1086 */ 1087 static int 1088 ext2_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1089 { 1090 struct ext2mount *ump; 1091 struct m_ext2fs *fs; 1092 uint32_t overhead, overhead_per_group, ngdb; 1093 int i, ngroups; 1094 1095 ump = VFSTOEXT2(mp); 1096 fs = ump->um_e2fs; 1097 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1098 panic("ext2_statfs"); 1099 1100 /* 1101 * Compute the overhead (FS structures) 1102 */ 1103 overhead_per_group = 1104 1 /* block bitmap */ + 1105 1 /* inode bitmap */ + 1106 fs->e2fs_itpg; 1107 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1108 fs->e2fs_gcount * overhead_per_group; 1109 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1110 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1111 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1112 if (ext2_cg_has_sb(fs, i)) 1113 ngroups++; 1114 } 1115 } else { 1116 ngroups = fs->e2fs_gcount; 1117 } 1118 ngdb = fs->e2fs_gdbcount; 1119 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1120 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1121 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1122 overhead += ngroups * (1 /* superblock */ + ngdb); 1123 1124 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1125 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1126 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1127 sbp->f_blocks = fs->e2fs_bcount - overhead; 1128 sbp->f_bfree = fs->e2fs_fbcount; 1129 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1130 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1131 sbp->f_ffree = fs->e2fs_ficount; 1132 if (sbp != &mp->mnt_stat) { 1133 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 1134 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 1135 } 1136 return (0); 1137 } 1138 1139 static int 1140 ext2_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1141 { 1142 struct ext2mount *ump; 1143 struct m_ext2fs *fs; 1144 uint32_t overhead, overhead_per_group, ngdb; 1145 int i, ngroups; 1146 1147 ump = VFSTOEXT2(mp); 1148 fs = ump->um_e2fs; 1149 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1150 panic("ext2_statfs"); 1151 1152 /* 1153 * Compute the overhead (FS structures) 1154 */ 1155 overhead_per_group = 1156 1 /* block bitmap */ + 1157 1 /* inode bitmap */ + 1158 fs->e2fs_itpg; 1159 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1160 fs->e2fs_gcount * overhead_per_group; 1161 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1162 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1163 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1164 if (ext2_cg_has_sb(fs, i)) 1165 ngroups++; 1166 } 1167 } else { 1168 ngroups = fs->e2fs_gcount; 1169 } 1170 ngdb = fs->e2fs_gdbcount; 1171 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1172 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1173 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1174 overhead += ngroups * (1 /* superblock */ + ngdb); 1175 1176 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1177 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1178 sbp->f_frsize = EXT2_BLOCK_SIZE(fs); 1179 sbp->f_blocks = fs->e2fs_bcount - overhead; 1180 sbp->f_bfree = fs->e2fs_fbcount; 1181 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1182 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1183 sbp->f_ffree = fs->e2fs_ficount; 1184 return (0); 1185 } 1186 1187 static int 1188 ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) 1189 { 1190 struct scaninfo *info = data; 1191 struct inode *ip; 1192 int error; 1193 1194 ip = VTOI(vp); 1195 if (vp->v_type == VNON || 1196 ((ip->i_flag & 1197 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1198 (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { 1199 return (0); 1200 } 1201 if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) 1202 info->allerror = error; 1203 return (0); 1204 } 1205 1206 /* 1207 * Go through the disk queues to initiate sandbagged IO; 1208 * go through the inodes to write those that have been modified; 1209 * initiate the writing of the super block if it has been modified. 1210 * 1211 * Note: we are always called with the filesystem marked `MPBUSY'. 1212 */ 1213 static int 1214 ext2_sync(struct mount *mp, int waitfor) 1215 { 1216 struct ext2mount *ump = VFSTOEXT2(mp); 1217 struct m_ext2fs *fs; 1218 struct scaninfo scaninfo; 1219 int error; 1220 1221 fs = ump->um_e2fs; 1222 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1223 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1224 } 1225 1226 /* 1227 * Write back each (modified) inode. 1228 */ 1229 scaninfo.allerror = 0; 1230 scaninfo.rescan = 1; 1231 scaninfo.waitfor = waitfor; 1232 while (scaninfo.rescan) { 1233 scaninfo.rescan = 0; 1234 vmntvnodescan(mp, VMSC_GETVP | VMSC_NOWAIT, 1235 NULL, ext2_sync_scan, &scaninfo); 1236 } 1237 1238 /* 1239 * Force stale filesystem control information to be flushed. 1240 */ 1241 if ((waitfor & MNT_LAZY) == 0) { 1242 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1243 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) 1244 scaninfo.allerror = error; 1245 vn_unlock(ump->um_devvp); 1246 } 1247 1248 /* 1249 * Write back modified superblock. 1250 */ 1251 if (fs->e2fs_fmod != 0) { 1252 fs->e2fs_fmod = 0; 1253 fs->e2fs->e2fs_wtime = htole32(time_second); 1254 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1255 scaninfo.allerror = error; 1256 } 1257 return (scaninfo.allerror); 1258 } 1259 1260 int 1261 ext2_alloc_vnode(struct mount *mp, ino_t ino, struct vnode **vpp) 1262 { 1263 struct ext2mount *ump; 1264 struct vnode *vp; 1265 struct inode *ip; 1266 int error; 1267 1268 ump = VFSTOEXT2(mp); 1269 /* 1270 * Lock out the creation of new entries in the FFS hash table in 1271 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1272 * may occur! 1273 */ 1274 if (ext2fs_inode_hash_lock) { 1275 while (ext2fs_inode_hash_lock) { 1276 ext2fs_inode_hash_lock = -1; 1277 tsleep(&ext2fs_inode_hash_lock, 0, "e2vget", 0); 1278 } 1279 return (-1); 1280 } 1281 ext2fs_inode_hash_lock = 1; 1282 1283 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1284 1285 /* Allocate a new vnode/inode. */ 1286 if ((error = getnewvnode(VT_EXT2FS, mp, &vp, VLKTIMEOUT, 1287 LK_CANRECURSE)) != 0) { 1288 if (ext2fs_inode_hash_lock < 0) 1289 wakeup(&ext2fs_inode_hash_lock); 1290 ext2fs_inode_hash_lock = 0; 1291 *vpp = NULL; 1292 free(ip, M_EXT2NODE); 1293 return (error); 1294 } 1295 //lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1296 vp->v_data = ip; 1297 ip->i_vnode = vp; 1298 ip->i_e2fs = ump->um_e2fs; 1299 ip->i_dev = ump->um_dev; 1300 ip->i_ump = ump; 1301 ip->i_number = ino; 1302 ip->i_block_group = ino_to_cg(ip->i_e2fs, ino); 1303 ip->i_next_alloc_block = 0; 1304 ip->i_next_alloc_goal = 0; 1305 1306 /* 1307 * Put it onto its hash chain. Since our vnode is locked, other 1308 * requests for this inode will block if they arrive while we are 1309 * sleeping waiting for old data structures to be purged or for the 1310 * contents of the disk portion of this inode to be read. 1311 */ 1312 if (ext2_ihashins(ip)) { 1313 printf("ext2_alloc_vnode: ihashins collision, retrying inode %ld\n", 1314 (long)ip->i_number); 1315 *vpp = NULL; 1316 vp->v_type = VBAD; 1317 vx_put(vp); 1318 free(ip, M_EXT2NODE); 1319 return (-1); 1320 } 1321 1322 if (ext2fs_inode_hash_lock < 0) 1323 wakeup(&ext2fs_inode_hash_lock); 1324 ext2fs_inode_hash_lock = 0; 1325 *vpp = vp; 1326 1327 return (0); 1328 } 1329 1330 /* 1331 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1332 * in from disk. If it is in core, wait for the lock bit to clear, then 1333 * return the inode locked. Detection and handling of mount points must be 1334 * done by the calling routine. 1335 */ 1336 static int 1337 ext2_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) 1338 { 1339 struct m_ext2fs *fs; 1340 struct inode *ip; 1341 struct ext2mount *ump; 1342 struct buf *bp; 1343 struct vnode *vp; 1344 unsigned int i, used_blocks; 1345 int error; 1346 1347 ump = VFSTOEXT2(mp); 1348 restart: 1349 if ((*vpp = ext2_ihashget(ump->um_dev, ino)) != NULL) 1350 return (0); 1351 if (ext2_alloc_vnode(mp, ino, &vp) == -1) 1352 goto restart; 1353 ip = VTOI(vp); 1354 fs = ip->i_e2fs; 1355 1356 /* Read in the disk contents for the inode, copy into the inode. */ 1357 if ((error = bread(ump->um_devvp, fsbtodoff(fs, ino_to_fsba(fs, ino)), 1358 (int)fs->e2fs_bsize, &bp)) != 0) { 1359 /* 1360 * The inode does not contain anything useful, so it would 1361 * be misleading to leave it on its hash chain. With mode 1362 * still zero, it will be unlinked and returned to the free 1363 * list by vput(). 1364 */ 1365 vp->v_type = VBAD; 1366 brelse(bp); 1367 vx_put(vp); 1368 *vpp = NULL; 1369 return (error); 1370 } 1371 /* convert ext2 inode to dinode */ 1372 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1373 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1374 if (error) { 1375 brelse(bp); 1376 vx_put(vp); 1377 *vpp = NULL; 1378 return (error); 1379 } 1380 1381 /* 1382 * Now we want to make sure that block pointers for unused 1383 * blocks are zeroed out - ext2_balloc depends on this 1384 * although for regular files and directories only 1385 * 1386 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1387 * out because we could corrupt the extent tree. 1388 */ 1389 if (!(ip->i_flag & IN_E4EXTENTS) && 1390 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1391 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1392 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1393 ip->i_db[i] = 0; 1394 } 1395 #ifdef EXT2FS_PRINT_EXTENTS 1396 ext2_print_inode(ip); 1397 ext4_ext_print_extent_tree_status(ip); 1398 #endif 1399 bqrelse(bp); 1400 1401 /* 1402 * Initialize the vnode from the inode, check for aliases. 1403 * Note that the underlying vnode may have changed. 1404 */ 1405 if ((error = ext2_vinit(mp, &vp)) != 0) { 1406 vx_put(vp); 1407 *vpp = NULL; 1408 return (error); 1409 } 1410 1411 /* 1412 * Finish inode initialization now that aliasing has been resolved. 1413 */ 1414 vref(ip->i_devvp); 1415 /* 1416 * Set up a generation number for this inode if it does not 1417 * already have one. This should only happen on old filesystems. 1418 */ 1419 if (ip->i_gen == 0) { 1420 ip->i_gen = krandom() / 2 + 1; 1421 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1422 ip->i_flag |= IN_MODIFIED; 1423 } 1424 /* 1425 * Return the locked and refd vnode. 1426 */ 1427 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 1428 *vpp = vp; 1429 1430 return (0); 1431 } 1432 1433 /* 1434 * File handle to vnode 1435 * 1436 * Have to be really careful about stale file handles: 1437 * - check that the inode number is valid 1438 * - call ext2_vget() to get the locked inode 1439 * - check for an unallocated inode (i_mode == 0) 1440 * - check that the given client host has export rights and return 1441 * those rights via. exflagsp and credanonp 1442 */ 1443 static int 1444 ext2_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 1445 struct vnode **vpp) 1446 { 1447 struct inode *ip; 1448 struct ufid *ufhp; 1449 struct vnode *nvp; 1450 struct m_ext2fs *fs; 1451 int error; 1452 1453 ufhp = (struct ufid *)fhp; 1454 fs = VFSTOEXT2(mp)->um_e2fs; 1455 if (ufhp->ufid_ino < EXT2_ROOTINO || 1456 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1457 return (ESTALE); 1458 1459 error = VFS_VGET(mp, NULL, ufhp->ufid_ino, &nvp); 1460 if (error) { 1461 *vpp = NULLVP; 1462 return (error); 1463 } 1464 ip = VTOI(nvp); 1465 if (ip->i_mode == 0 || 1466 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1467 vput(nvp); 1468 *vpp = NULLVP; 1469 return (ESTALE); 1470 } 1471 *vpp = nvp; 1472 return (0); 1473 } 1474 1475 /* 1476 * Vnode pointer to File handle 1477 */ 1478 /* ARGSUSED */ 1479 static int 1480 ext2_vptofh(struct vnode *vp, struct fid *fhp) 1481 { 1482 struct inode *ip; 1483 struct ufid *ufhp; 1484 1485 ip = VTOI(vp); 1486 ufhp = (struct ufid *)fhp; 1487 ufhp->ufid_len = sizeof(struct ufid); 1488 ufhp->ufid_ino = ip->i_number; 1489 ufhp->ufid_gen = ip->i_gen; 1490 return (0); 1491 } 1492 1493 /* 1494 * This is the generic part of fhtovp called after the underlying 1495 * filesystem has validated the file handle. 1496 * 1497 * Verify that a host should have access to a filesystem. 1498 */ 1499 static int 1500 ext2_check_export(struct mount *mp, struct sockaddr *nam, int *exflagsp, 1501 struct ucred **credanonp) 1502 { 1503 struct netcred *np; 1504 struct ext2mount *ump; 1505 1506 ump = VFSTOEXT2(mp); 1507 /* 1508 * Get the export permission structure for this <mp, client> tuple. 1509 */ 1510 np = vfs_export_lookup(mp, &ump->um_export, nam); 1511 if (np == NULL) 1512 return (EACCES); 1513 1514 *exflagsp = np->netc_exflags; 1515 *credanonp = &np->netc_anon; 1516 return (0); 1517 } 1518 1519 /* 1520 * Write a superblock and associated information back to disk. 1521 */ 1522 static int 1523 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1524 { 1525 struct m_ext2fs *fs = mp->um_e2fs; 1526 struct ext2fs *es = fs->e2fs; 1527 struct buf *bp; 1528 int error = 0; 1529 1530 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1531 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1532 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1533 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1534 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1535 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1536 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1537 } 1538 1539 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1540 1541 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1542 ext2_sb_csum_set(fs); 1543 1544 bp = getblk(mp->um_devvp, SBOFF, SBSIZE, 0, 0); 1545 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1546 if (waitfor == MNT_WAIT) 1547 error = bwrite(bp); 1548 else 1549 bawrite(bp); 1550 1551 /* 1552 * The buffers for group descriptors, inode bitmaps and block bitmaps 1553 * are not busy at this point and are (hopefully) written by the 1554 * usual sync mechanism. No need to write them here. 1555 */ 1556 return (error); 1557 } 1558 1559 static int 1560 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1561 { 1562 struct m_ext2fs *fs = mp->um_e2fs; 1563 struct buf *bp; 1564 int i, j, g_count = 0, error = 0, allerror = 0; 1565 1566 allerror = ext2_sbupdate(mp, waitfor); 1567 1568 /* Update gd csums */ 1569 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1570 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1571 ext2_gd_csum_set(fs); 1572 1573 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1574 bp = getblk(mp->um_devvp, fsbtodoff(fs, 1575 ext2_cg_location(fs, i)), 1576 fs->e2fs_bsize, 0, 0); 1577 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1578 memcpy(bp->b_data, &fs->e2fs_gd[ 1579 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1580 fs->e2fs_bsize); 1581 } else { 1582 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1583 g_count < fs->e2fs_gcount; j++, g_count++) 1584 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1585 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1586 } 1587 if (waitfor == MNT_WAIT) 1588 error = bwrite(bp); 1589 else 1590 bawrite(bp); 1591 } 1592 1593 if (!allerror && error) 1594 allerror = error; 1595 return (allerror); 1596 } 1597 1598 /* 1599 * Return the root of a filesystem. 1600 */ 1601 static int 1602 ext2_root(struct mount *mp, struct vnode **vpp) 1603 { 1604 struct vnode *nvp; 1605 int error; 1606 1607 error = VFS_VGET(mp, NULL, (ino_t)EXT2_ROOTINO, &nvp); 1608 if (error) 1609 return (error); 1610 *vpp = nvp; 1611 return (0); 1612 } 1613 1614 /* 1615 * Initialize ext2 filesystems, done only once. 1616 */ 1617 static int 1618 ext2_init(struct vfsconf *vfsp) 1619 { 1620 static int done; 1621 1622 if (done) 1623 return (0); 1624 done = 1; 1625 ext2_ihashinit(); 1626 1627 return (0); 1628 } 1629 1630 static int 1631 ext2_uninit(struct vfsconf *vfsp) 1632 { 1633 1634 ext2_ihashuninit(); 1635 1636 return (0); 1637 } 1638