1 /*- 2 * modified for EXT2FS support in Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1989, 1991, 1993, 1994 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/namei.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/kernel.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/bio.h> 50 #include <sys/buf2.h> 51 #include <sys/conf.h> 52 #include <sys/endian.h> 53 #include <sys/fcntl.h> 54 #include <sys/malloc.h> 55 #include <sys/stat.h> 56 #include <sys/mutex2.h> 57 #include <sys/nlookup.h> 58 59 #include <vfs/ext2fs/fs.h> 60 #include <vfs/ext2fs/ext2_mount.h> 61 #include <vfs/ext2fs/inode.h> 62 63 #include <vfs/ext2fs/ext2fs.h> 64 #include <vfs/ext2fs/ext2_dinode.h> 65 #include <vfs/ext2fs/ext2_extern.h> 66 #include <vfs/ext2fs/ext2_extents.h> 67 68 SDT_PROVIDER_DECLARE(ext2fs); 69 /* 70 * ext2fs trace probe: 71 * arg0: verbosity. Higher numbers give more verbose messages 72 * arg1: Textual message 73 */ 74 SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); 75 SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); 76 SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); 77 78 static int ext2_flushfiles(struct mount *mp, int flags); 79 static int ext2_mountfs(struct vnode *, struct mount *); 80 static int ext2_reload(struct mount *mp); 81 static int ext2_sbupdate(struct ext2mount *, int); 82 static int ext2_cgupdate(struct ext2mount *, int); 83 static int ext2_init(struct vfsconf *); 84 static int ext2_uninit(struct vfsconf *); 85 static vfs_unmount_t ext2_unmount; 86 static vfs_root_t ext2_root; 87 static vfs_statfs_t ext2_statfs; 88 static vfs_statvfs_t ext2_statvfs; 89 static vfs_sync_t ext2_sync; 90 static vfs_vget_t ext2_vget; 91 static vfs_fhtovp_t ext2_fhtovp; 92 static vfs_vptofh_t ext2_vptofh; 93 static vfs_checkexp_t ext2_check_export; 94 static vfs_mount_t ext2_mount; 95 96 MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); 97 static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); 98 99 static struct vfsops ext2fs_vfsops = { 100 .vfs_flags = 0, 101 .vfs_mount = ext2_mount, 102 .vfs_unmount = ext2_unmount, 103 .vfs_root = ext2_root, /* root inode via vget */ 104 .vfs_statfs = ext2_statfs, 105 .vfs_statvfs = ext2_statvfs, 106 .vfs_sync = ext2_sync, 107 .vfs_vget = ext2_vget, 108 .vfs_fhtovp = ext2_fhtovp, 109 .vfs_vptofh = ext2_vptofh, 110 .vfs_checkexp = ext2_check_export, 111 .vfs_init = ext2_init, 112 .vfs_uninit = ext2_uninit 113 }; 114 115 VFS_SET(ext2fs_vfsops, ext2fs, VFCF_MPSAFE); 116 MODULE_VERSION(ext2fs, 1); 117 118 static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, 119 int ronly); 120 static int ext2_compute_sb_data(struct vnode * devvp, 121 struct ext2fs * es, struct m_ext2fs * fs); 122 123 static int ext2fs_inode_hash_lock; 124 125 /* 126 * VFS Operations. 127 * 128 * mount system call 129 */ 130 static int 131 ext2_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 132 { 133 struct ext2_args args; 134 struct vnode *devvp; 135 struct ext2mount *ump = NULL; 136 struct m_ext2fs *fs; 137 struct nlookupdata nd; 138 mode_t accmode; 139 int error, flags; 140 size_t size; 141 142 if ((error = copyin(data, (caddr_t)&args, sizeof (struct ext2_args))) != 0) 143 return (error); 144 145 /* 146 * If updating, check whether changing from read-only to 147 * read/write; if there is no device name, that's all we do. 148 */ 149 if (mp->mnt_flag & MNT_UPDATE) { 150 ump = VFSTOEXT2(mp); 151 fs = ump->um_e2fs; 152 devvp = ump->um_devvp; 153 error = 0; 154 if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { 155 error = VFS_SYNC(mp, MNT_WAIT); 156 if (error) 157 return (error); 158 flags = WRITECLOSE; 159 if (mp->mnt_flag & MNT_FORCE) 160 flags |= FORCECLOSE; 161 if (vfs_busy(mp, LK_NOWAIT)) 162 return (EBUSY); 163 error = ext2_flushfiles(mp, flags); 164 vfs_unbusy(mp); 165 if (error == 0 && fs->e2fs_wasvalid && 166 ext2_cgupdate(ump, MNT_WAIT) == 0) { 167 fs->e2fs->e2fs_state = 168 htole16((le16toh(fs->e2fs->e2fs_state) | 169 E2FS_ISCLEAN)); 170 ext2_sbupdate(ump, MNT_WAIT); 171 } 172 fs->e2fs_ronly = 1; 173 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 174 VOP_OPEN(devvp, FREAD, FSCRED, NULL); 175 VOP_CLOSE(devvp, FREAD | FWRITE, NULL); 176 vn_unlock(devvp); 177 } 178 if (!error && (mp->mnt_flag & MNT_RELOAD)) 179 error = ext2_reload(mp); 180 if (error) 181 return (error); 182 devvp = ump->um_devvp; 183 if (fs->e2fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { 184 if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) 185 return (EPERM); 186 187 /* 188 * If upgrade to read-write by non-root, then verify 189 * that user has necessary permissions on the device. 190 */ 191 if (cred->cr_uid != 0) { 192 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 193 error = VOP_EACCESS(devvp, VREAD | VWRITE, cred); 194 if (error) { 195 vn_unlock(devvp); 196 return (error); 197 } 198 vn_unlock(devvp); 199 } 200 201 if ((le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN) == 0 || 202 (le16toh(fs->e2fs->e2fs_state) & E2FS_ERRORS)) { 203 if (mp->mnt_flag & MNT_FORCE) { 204 printf( 205 "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); 206 } else { 207 printf( 208 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", 209 fs->e2fs_fsmnt); 210 return (EPERM); 211 } 212 } 213 fs->e2fs->e2fs_state = 214 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 215 (void)ext2_cgupdate(ump, MNT_WAIT); 216 fs->e2fs_ronly = 0; 217 mp->mnt_flag &= ~MNT_RDONLY; 218 219 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 220 VOP_OPEN(devvp, FREAD | FWRITE, FSCRED, NULL); 221 VOP_CLOSE(devvp, FREAD, NULL); 222 vn_unlock(devvp); 223 } 224 if (args.fspec == NULL) { 225 /* 226 * Process export requests. 227 */ 228 return (vfs_export(mp, &ump->um_export, &args.export)); 229 } 230 } 231 232 /* 233 * Not an update, or updating the name: look up the name 234 * and verify that it refers to a sensible disk device. 235 */ 236 devvp = NULL; 237 error = nlookup_init(&nd, args.fspec, UIO_USERSPACE, NLC_FOLLOW); 238 if (error == 0) 239 error = nlookup(&nd); 240 if (error == 0) 241 error = cache_vref(&nd.nl_nch, nd.nl_cred, &devvp); 242 nlookup_done(&nd); 243 if (error) 244 return (error); 245 246 if (!vn_isdisk(devvp, &error)) { 247 vrele(devvp); 248 return (error); 249 } 250 251 /* 252 * If mount by non-root, then verify that user has necessary 253 * permissions on the device. 254 * 255 * XXXRW: VOP_ACCESS() enough? 256 */ 257 if (cred->cr_uid != 0) { 258 accmode = VREAD; 259 if ((mp->mnt_flag & MNT_RDONLY) == 0) 260 accmode |= VWRITE; 261 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 262 if ((error = VOP_EACCESS(devvp, accmode, cred)) != 0) { 263 vput(devvp); 264 return (error); 265 } 266 vn_unlock(devvp); 267 } 268 269 if ((mp->mnt_flag & MNT_UPDATE) == 0) { 270 error = ext2_mountfs(devvp, mp); 271 } else { 272 if (devvp != ump->um_devvp) 273 error = EINVAL; /* needs translation */ 274 else 275 vrele(devvp); 276 } 277 if (error) { 278 vrele(devvp); 279 return (error); 280 } 281 ump = VFSTOEXT2(mp); 282 fs = ump->um_e2fs; 283 284 /* 285 * Note that this strncpy() is ok because of a check at the start 286 * of ext2_mount(). 287 */ 288 copyinstr(path, fs->e2fs_fsmnt, sizeof(fs->e2fs_fsmnt) - 1, &size); 289 bzero(fs->e2fs_fsmnt + size, sizeof(fs->e2fs_fsmnt) - size); 290 copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 291 bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); 292 ext2_statfs(mp, &mp->mnt_stat, cred); 293 return (0); 294 } 295 296 static int 297 ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) 298 { 299 uint32_t i, mask; 300 301 if (le16toh(es->e2fs_magic) != E2FS_MAGIC) { 302 printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", 303 devtoname(dev), le16toh(es->e2fs_magic), E2FS_MAGIC); 304 return (1); 305 } 306 if (le32toh(es->e2fs_rev) > E2FS_REV0) { 307 mask = le32toh(es->e2fs_features_incompat) & ~(EXT2F_INCOMPAT_SUPP); 308 if (mask) { 309 printf("WARNING: mount of %s denied due to " 310 "unsupported optional features:\n", devtoname(dev)); 311 for (i = 0; 312 i < sizeof(incompat)/sizeof(struct ext2_feature); 313 i++) 314 if (mask & incompat[i].mask) 315 printf("%s ", incompat[i].name); 316 printf("\n"); 317 return (1); 318 } 319 mask = le32toh(es->e2fs_features_rocompat) & ~EXT2F_ROCOMPAT_SUPP; 320 if (!ronly && mask) { 321 printf("WARNING: R/W mount of %s denied due to " 322 "unsupported optional features:\n", devtoname(dev)); 323 for (i = 0; 324 i < sizeof(ro_compat)/sizeof(struct ext2_feature); 325 i++) 326 if (mask & ro_compat[i].mask) 327 printf("%s ", ro_compat[i].name); 328 printf("\n"); 329 return (1); 330 } 331 } 332 return (0); 333 } 334 335 static e4fs_daddr_t 336 ext2_cg_location(struct m_ext2fs *fs, int number) 337 { 338 int cg, descpb, logical_sb, has_super = 0; 339 340 /* 341 * Adjust logical superblock block number. 342 * Godmar thinks: if the blocksize is greater than 1024, then 343 * the superblock is logically part of block zero. 344 */ 345 logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; 346 347 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 348 number < le32toh(fs->e2fs->e3fs_first_meta_bg)) 349 return (logical_sb + number + 1); 350 351 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) 352 descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); 353 else 354 descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 355 356 cg = descpb * number; 357 358 if (ext2_cg_has_sb(fs, cg)) 359 has_super = 1; 360 361 return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + 362 le32toh(fs->e2fs->e2fs_first_dblock)); 363 } 364 365 static int 366 ext2_cg_validate(struct m_ext2fs *fs) 367 { 368 uint64_t b_bitmap; 369 uint64_t i_bitmap; 370 uint64_t i_tables; 371 uint64_t first_block, last_block, last_cg_block; 372 struct ext2_gd *gd; 373 unsigned int i, cg_count; 374 375 first_block = le32toh(fs->e2fs->e2fs_first_dblock); 376 last_cg_block = ext2_cg_number_gdb(fs, 0); 377 cg_count = fs->e2fs_gcount; 378 379 for (i = 0; i < fs->e2fs_gcount; i++) { 380 gd = &fs->e2fs_gd[i]; 381 382 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 383 i == fs->e2fs_gcount - 1) { 384 last_block = fs->e2fs_bcount - 1; 385 } else { 386 last_block = first_block + 387 (EXT2_BLOCKS_PER_GROUP(fs) - 1); 388 } 389 390 if ((cg_count == fs->e2fs_gcount) && 391 !(le16toh(gd->ext4bgd_flags) & EXT2_BG_INODE_ZEROED)) 392 cg_count = i; 393 394 b_bitmap = e2fs_gd_get_b_bitmap(gd); 395 if (b_bitmap == 0) { 396 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 397 "block bitmap is zero", i); 398 return (EINVAL); 399 } 400 if (b_bitmap <= last_cg_block) { 401 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 402 "block bitmap overlaps gds", i); 403 return (EINVAL); 404 } 405 if (b_bitmap < first_block || b_bitmap > last_block) { 406 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 407 "block bitmap not in group", i); 408 return (EINVAL); 409 } 410 411 i_bitmap = e2fs_gd_get_i_bitmap(gd); 412 if (i_bitmap == 0) { 413 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 414 "inode bitmap is zero", i); 415 return (EINVAL); 416 } 417 if (i_bitmap <= last_cg_block) { 418 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 419 "inode bitmap overlaps gds", i); 420 return (EINVAL); 421 } 422 if (i_bitmap < first_block || i_bitmap > last_block) { 423 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 424 "inode bitmap not in group blk", i); 425 return (EINVAL); 426 } 427 428 i_tables = e2fs_gd_get_i_tables(gd); 429 if (i_tables == 0) { 430 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 431 "inode table is zero", i); 432 return (EINVAL); 433 } 434 if (i_tables <= last_cg_block) { 435 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 436 "inode tables overlaps gds", i); 437 return (EINVAL); 438 } 439 if (i_tables < first_block || 440 i_tables + fs->e2fs_itpg - 1 > last_block) { 441 SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, 442 "inode tables not in group blk", i); 443 return (EINVAL); 444 } 445 446 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) 447 first_block += EXT2_BLOCKS_PER_GROUP(fs); 448 } 449 450 return (0); 451 } 452 453 /* 454 * This computes the fields of the m_ext2fs structure from the 455 * data in the ext2fs structure read in. 456 */ 457 static int 458 ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, 459 struct m_ext2fs *fs) 460 { 461 struct buf *bp; 462 uint32_t e2fs_descpb, e2fs_gdbcount_alloc; 463 int i, j; 464 int g_count = 0; 465 int error; 466 467 /* Check checksum features */ 468 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && 469 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 470 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 471 "incorrect checksum features combination"); 472 return (EINVAL); 473 } 474 475 /* Precompute checksum seed for all metadata */ 476 ext2_sb_csum_set_seed(fs); 477 478 /* Verify sb csum if possible */ 479 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 480 error = ext2_sb_csum_verify(fs); 481 if (error) { 482 return (error); 483 } 484 } 485 486 /* Check for block size = 1K|2K|4K */ 487 if (le32toh(es->e2fs_log_bsize) > 2) { 488 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 489 "bad block size"); 490 return (EINVAL); 491 } 492 493 fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + le32toh(es->e2fs_log_bsize); 494 fs->e2fs_bsize = 1U << fs->e2fs_bshift; 495 fs->e2fs_fsbtodb = le32toh(es->e2fs_log_bsize) + 1; 496 fs->e2fs_qbmask = fs->e2fs_bsize - 1; 497 498 /* Check for fragment size */ 499 if (le32toh(es->e2fs_log_fsize) > 500 (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { 501 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 502 "invalid log cluster size"); 503 return (EINVAL); 504 } 505 506 fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << le32toh(es->e2fs_log_fsize); 507 if (fs->e2fs_fsize != fs->e2fs_bsize) { 508 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 509 "fragment size != block size"); 510 return (EINVAL); 511 } 512 513 fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; 514 515 /* Check reserved gdt blocks for future filesystem expansion */ 516 if (le16toh(es->e2fs_reserved_ngdb) > (fs->e2fs_bsize / 4)) { 517 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 518 "number of reserved GDT blocks too large"); 519 return (EINVAL); 520 } 521 522 if (le32toh(es->e2fs_rev) == E2FS_REV0) { 523 fs->e2fs_isize = E2FS_REV0_INODE_SIZE; 524 } else { 525 fs->e2fs_isize = le16toh(es->e2fs_inode_size); 526 527 /* 528 * Check first ino. 529 */ 530 if (le32toh(es->e2fs_first_ino) < EXT2_FIRSTINO) { 531 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 532 "invalid first ino"); 533 return (EINVAL); 534 } 535 536 /* 537 * Simple sanity check for superblock inode size value. 538 */ 539 if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || 540 EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || 541 (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { 542 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 543 "invalid inode size"); 544 return (EINVAL); 545 } 546 } 547 548 /* Check group descriptors */ 549 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && 550 le16toh(es->e3fs_desc_size) != E2FS_64BIT_GD_SIZE) { 551 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 552 "unsupported 64bit descriptor size"); 553 return (EINVAL); 554 } 555 556 fs->e2fs_bpg = le32toh(es->e2fs_bpg); 557 fs->e2fs_fpg = le32toh(es->e2fs_fpg); 558 if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { 559 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 560 "zero blocks/fragments per group"); 561 return (EINVAL); 562 } else if (fs->e2fs_bpg != fs->e2fs_fpg) { 563 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 564 "blocks per group not equal fragments per group"); 565 return (EINVAL); 566 } 567 568 if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { 569 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 570 "non-standard group size unsupported"); 571 return (EINVAL); 572 } 573 574 fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); 575 if (fs->e2fs_ipb == 0 || 576 fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { 577 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 578 "bad inodes per block size"); 579 return (EINVAL); 580 } 581 582 fs->e2fs_ipg = le32toh(es->e2fs_ipg); 583 if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { 584 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 585 "invalid inodes per group"); 586 return (EINVAL); 587 } 588 589 fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; 590 591 fs->e2fs_bcount = le32toh(es->e2fs_bcount); 592 fs->e2fs_rbcount = le32toh(es->e2fs_rbcount); 593 fs->e2fs_fbcount = le32toh(es->e2fs_fbcount); 594 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 595 fs->e2fs_bcount |= (uint64_t)(le32toh(es->e4fs_bcount_hi)) << 32; 596 fs->e2fs_rbcount |= (uint64_t)(le32toh(es->e4fs_rbcount_hi)) << 32; 597 fs->e2fs_fbcount |= (uint64_t)(le32toh(es->e4fs_fbcount_hi)) << 32; 598 } 599 if (fs->e2fs_rbcount > fs->e2fs_bcount || 600 fs->e2fs_fbcount > fs->e2fs_bcount) { 601 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 602 "invalid block count"); 603 return (EINVAL); 604 } 605 606 fs->e2fs_ficount = le32toh(es->e2fs_ficount); 607 if (fs->e2fs_ficount > le32toh(es->e2fs_icount)) { 608 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 609 "invalid number of free inodes"); 610 return (EINVAL); 611 } 612 613 if (le32toh(es->e2fs_first_dblock) >= fs->e2fs_bcount) { 614 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 615 "first data block out of range"); 616 return (EINVAL); 617 } 618 619 fs->e2fs_gcount = howmany(fs->e2fs_bcount - 620 le32toh(es->e2fs_first_dblock), EXT2_BLOCKS_PER_GROUP(fs)); 621 if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { 622 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 623 "groups count too large"); 624 return (EINVAL); 625 } 626 627 /* Check for extra isize in big inodes. */ 628 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && 629 EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { 630 SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, 631 "no space for extra inode timestamps"); 632 return (EINVAL); 633 } 634 635 /* s_resuid / s_resgid ? */ 636 637 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 638 e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; 639 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); 640 } else { 641 e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; 642 e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, 643 fs->e2fs_bsize / sizeof(struct ext2_gd)); 644 } 645 fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); 646 fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, 647 M_EXT2MNT, M_WAITOK | M_ZERO); 648 fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * 649 sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); 650 651 for (i = 0; i < fs->e2fs_gdbcount; i++) { 652 error = bread(devvp, fsbtodoff(fs, ext2_cg_location(fs, i)), 653 fs->e2fs_bsize, &bp); 654 if (error) { 655 /* 656 * fs->e2fs_gd and fs->e2fs_contigdirs 657 * will be freed later by the caller, 658 * because this function could be called from 659 * MNT_UPDATE path. 660 */ 661 brelse(bp); 662 return (error); 663 } 664 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 665 memcpy(&fs->e2fs_gd[ 666 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 667 bp->b_data, fs->e2fs_bsize); 668 } else { 669 for (j = 0; j < e2fs_descpb && 670 g_count < fs->e2fs_gcount; j++, g_count++) 671 memcpy(&fs->e2fs_gd[g_count], 672 bp->b_data + j * E2FS_REV0_GD_SIZE, 673 E2FS_REV0_GD_SIZE); 674 } 675 brelse(bp); 676 bp = NULL; 677 } 678 679 /* Validate cgs consistency */ 680 error = ext2_cg_validate(fs); 681 if (error) 682 return (error); 683 684 /* Verfy cgs csum */ 685 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 686 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 687 error = ext2_gd_csum_verify(fs, devvp->v_rdev); 688 if (error) 689 return (error); 690 } 691 /* Initialization for the ext2 Orlov allocator variant. */ 692 fs->e2fs_total_dir = 0; 693 for (i = 0; i < fs->e2fs_gcount; i++) 694 fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); 695 696 if (le32toh(es->e2fs_rev) == E2FS_REV0 || 697 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) 698 fs->e2fs_maxfilesize = 0x7fffffff; 699 else { 700 fs->e2fs_maxfilesize = 0xffffffffffff; 701 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) 702 fs->e2fs_maxfilesize = 0x7fffffffffffffff; 703 } 704 if (le32toh(es->e4fs_flags) & E2FS_UNSIGNED_HASH) { 705 fs->e2fs_uhash = 3; 706 } else if ((le32toh(es->e4fs_flags) & E2FS_SIGNED_HASH) == 0) { 707 #ifdef __CHAR_UNSIGNED__ 708 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_UNSIGNED_HASH); 709 fs->e2fs_uhash = 3; 710 #else 711 es->e4fs_flags = htole32(le32toh(es->e4fs_flags) | E2FS_SIGNED_HASH); 712 #endif 713 } 714 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 715 error = ext2_sb_csum_verify(fs); 716 717 return (error); 718 } 719 720 struct scaninfo { 721 int rescan; 722 int allerror; 723 int waitfor; 724 struct vnode *devvp; 725 struct m_ext2fs *fs; 726 }; 727 728 static int 729 ext2_reload_scan(struct mount *mp, struct vnode *vp, void *data) 730 { 731 struct scaninfo *info = data; 732 struct inode *ip; 733 struct buf *bp; 734 int error; 735 736 /* 737 * Try to recycle 738 */ 739 if (vrecycle(vp)) 740 return (0); 741 742 /* 743 * Step 1: invalidate all cached file data. 744 */ 745 if (vinvalbuf(vp, 0, 0, 0)) 746 panic("ext2_reload: dirty2"); 747 /* 748 * Step 2: re-read inode data for all active vnodes. 749 */ 750 ip = VTOI(vp); 751 error = bread(info->devvp, 752 fsbtodoff(info->fs, ino_to_fsba(info->fs, ip->i_number)), 753 (int)info->fs->e2fs_bsize, &bp); 754 if (error) { 755 brelse(bp); 756 return (error); 757 } 758 759 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 760 EXT2_INODE_SIZE(info->fs) * ino_to_fsbo(info->fs, ip->i_number)), 761 ip); 762 763 brelse(bp); 764 return (error); 765 } 766 767 /* 768 * Reload all incore data for a filesystem (used after running fsck on 769 * the root filesystem and finding things to fix). The filesystem must 770 * be mounted read-only. 771 * 772 * Things to do to update the mount: 773 * 1) invalidate all cached meta-data. 774 * 2) re-read superblock from disk. 775 * 3) invalidate all cluster summary information. 776 * 4) invalidate all inactive vnodes. 777 * 5) invalidate all cached file data. 778 * 6) re-read inode data for all active vnodes. 779 * XXX we are missing some steps, in particular # 3, this has to be reviewed. 780 */ 781 static int 782 ext2_reload(struct mount *mp) 783 { 784 struct vnode *devvp; 785 struct buf *bp; 786 struct ext2fs *es; 787 struct m_ext2fs *fs; 788 struct csum *sump; 789 struct scaninfo scaninfo; 790 int error, i; 791 int32_t *lp; 792 793 if ((mp->mnt_flag & MNT_RDONLY) == 0) 794 return (EINVAL); 795 /* 796 * Step 1: invalidate all cached meta-data. 797 */ 798 devvp = VFSTOEXT2(mp)->um_devvp; 799 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 800 if (vinvalbuf(devvp, 0, 0, 0) != 0) 801 panic("ext2_reload: dirty1"); 802 vn_unlock(devvp); 803 804 /* 805 * Step 2: re-read superblock from disk. 806 * constants have been adjusted for ext2 807 */ 808 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) { 809 brelse(bp); 810 return (error); 811 } 812 es = (struct ext2fs *)bp->b_data; 813 if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { 814 brelse(bp); 815 return (EIO); /* XXX needs translation */ 816 } 817 fs = VFSTOEXT2(mp)->um_e2fs; 818 bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); 819 820 if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { 821 brelse(bp); 822 return (error); 823 } 824 #ifdef UNKLAR 825 if (fs->fs_sbsize < SBSIZE) 826 bp->b_flags |= B_INVAL; 827 #endif 828 brelse(bp); 829 830 /* 831 * Step 3: invalidate all cluster summary information. 832 */ 833 if (fs->e2fs_contigsumsize > 0) { 834 lp = fs->e2fs_maxcluster; 835 sump = fs->e2fs_clustersum; 836 for (i = 0; i < fs->e2fs_gcount; i++, sump++) { 837 *lp++ = fs->e2fs_contigsumsize; 838 sump->cs_init = 0; 839 bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); 840 } 841 } 842 843 scaninfo.rescan = 1; 844 scaninfo.devvp = devvp; 845 scaninfo.fs = fs; 846 while (error == 0 && scaninfo.rescan) { 847 scaninfo.rescan = 0; 848 error = vmntvnodescan(mp, VMSC_GETVX, NULL, ext2_reload_scan, 849 &scaninfo); 850 } 851 return (error); 852 } 853 854 /* 855 * Common code for mount and mountroot. 856 */ 857 static int 858 ext2_mountfs(struct vnode *devvp, struct mount *mp) 859 { 860 struct ext2mount *ump; 861 struct buf *bp; 862 struct m_ext2fs *fs; 863 struct ext2fs *es; 864 struct cdev *dev = devvp->v_rdev; 865 struct csum *sump; 866 int error; 867 int ronly; 868 int i; 869 u_long size; 870 int32_t *lp; 871 int32_t e2fs_maxcontig; 872 873 /* 874 * Disallow multiple mounts of the same device. 875 * Disallow mounting of a device that is currently in use 876 * (except for root, which might share swap device for miniroot). 877 * Flush out any old buffers remaining from a previous use. 878 */ 879 if ((error = vfs_mountedon(devvp)) != 0) 880 return (error); 881 if (vcount(devvp) > 0) 882 return (EBUSY); 883 if ((error = vinvalbuf(devvp, V_SAVE, 0, 0)) != 0) 884 return (error); 885 #ifdef READONLY 886 /* Turn on this to force it to be read-only. */ 887 mp->mnt_flag |= MNT_RDONLY; 888 #endif 889 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 890 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 891 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD | FWRITE, FSCRED, NULL); 892 vn_unlock(devvp); 893 if (error) 894 return (error); 895 896 if (devvp->v_rdev->si_iosize_max != 0) 897 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; 898 if (mp->mnt_iosize_max > MAXPHYS) 899 mp->mnt_iosize_max = MAXPHYS; 900 901 bp = NULL; 902 ump = NULL; 903 if ((error = bread(devvp, SBOFF, SBSIZE, &bp)) != 0) 904 goto out; 905 es = (struct ext2fs *)bp->b_data; 906 if (ext2_check_sb_compat(es, dev, ronly) != 0) { 907 error = EINVAL; /* XXX needs translation */ 908 goto out; 909 } 910 if ((le16toh(es->e2fs_state) & E2FS_ISCLEAN) == 0 || 911 (le16toh(es->e2fs_state) & E2FS_ERRORS)) { 912 if (ronly || (mp->mnt_flag & MNT_FORCE)) { 913 printf( 914 "WARNING: Filesystem was not properly dismounted\n"); 915 } else { 916 printf( 917 "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); 918 error = EPERM; 919 goto out; 920 } 921 } 922 ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); 923 924 /* 925 * I don't know whether this is the right strategy. Note that 926 * we dynamically allocate both an m_ext2fs and an ext2fs 927 * while Linux keeps the super block in a locked buffer. 928 */ 929 ump->um_e2fs = malloc(sizeof(struct m_ext2fs), 930 M_EXT2MNT, M_WAITOK | M_ZERO); 931 ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), 932 M_EXT2MNT, M_WAITOK); 933 mtx_init(EXT2_MTX(ump), "EXT2FS Lock"); 934 bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); 935 if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) 936 goto out; 937 938 /* 939 * Calculate the maximum contiguous blocks and size of cluster summary 940 * array. In FFS this is done by newfs; however, the superblock 941 * in ext2fs doesn't have these variables, so we can calculate 942 * them here. 943 */ 944 e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); 945 ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); 946 if (ump->um_e2fs->e2fs_contigsumsize > 0) { 947 size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); 948 ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); 949 size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); 950 ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); 951 lp = ump->um_e2fs->e2fs_maxcluster; 952 sump = ump->um_e2fs->e2fs_clustersum; 953 for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { 954 *lp++ = ump->um_e2fs->e2fs_contigsumsize; 955 sump->cs_init = 0; 956 sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * 957 sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); 958 } 959 } 960 961 brelse(bp); 962 bp = NULL; 963 fs = ump->um_e2fs; 964 fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ 965 966 /* 967 * If the fs is not mounted read-only, make sure the super block is 968 * always written back on a sync(). 969 */ 970 fs->e2fs_wasvalid = le16toh(fs->e2fs->e2fs_state) & E2FS_ISCLEAN ? 1 : 0; 971 if (ronly == 0) { 972 fs->e2fs_fmod = 1; /* mark it modified and set fs invalid */ 973 fs->e2fs->e2fs_state = 974 htole16(le16toh(fs->e2fs->e2fs_state) & ~E2FS_ISCLEAN); 975 } 976 mp->mnt_data = (qaddr_t)ump; 977 mp->mnt_stat.f_fsid.val[0] = devid_from_dev(dev); 978 mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; 979 mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; 980 mp->mnt_flag |= MNT_LOCAL; 981 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 982 ump->um_mountp = mp; 983 ump->um_dev = dev; 984 ump->um_devvp = devvp; 985 986 /* 987 * Setting those two parameters allowed us to use 988 * ufs_bmap w/o changse! 989 */ 990 ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); 991 ump->um_bptrtodb = le32toh(fs->e2fs->e2fs_log_bsize) + 1; 992 ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); 993 dev->si_mountpoint = mp; 994 995 vfs_add_vnodeops(mp, &ext2_vnodeops, &mp->mnt_vn_norm_ops); 996 vfs_add_vnodeops(mp, &ext2_specops, &mp->mnt_vn_spec_ops); 997 vfs_add_vnodeops(mp, &ext2_fifoops, &mp->mnt_vn_fifo_ops); 998 999 if (ronly == 0) 1000 ext2_sbupdate(ump, MNT_WAIT); 1001 return (0); 1002 out: 1003 if (bp) 1004 brelse(bp); 1005 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); 1006 VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1007 vn_unlock(devvp); 1008 if (ump) { 1009 mtx_uninit(EXT2_MTX(ump)); 1010 free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); 1011 free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); 1012 free(ump->um_e2fs->e2fs, M_EXT2MNT); 1013 free(ump->um_e2fs, M_EXT2MNT); 1014 free(ump, M_EXT2MNT); 1015 mp->mnt_data = NULL; 1016 } 1017 return (error); 1018 } 1019 1020 /* 1021 * Unmount system call. 1022 */ 1023 static int 1024 ext2_unmount(struct mount *mp, int mntflags) 1025 { 1026 struct ext2mount *ump; 1027 struct m_ext2fs *fs; 1028 struct csum *sump; 1029 int error, flags, i, ronly; 1030 1031 flags = 0; 1032 if (mntflags & MNT_FORCE) { 1033 if (mp->mnt_flag & MNT_ROOTFS) 1034 return (EINVAL); 1035 flags |= FORCECLOSE; 1036 } 1037 if ((error = ext2_flushfiles(mp, flags)) != 0) 1038 return (error); 1039 ump = VFSTOEXT2(mp); 1040 fs = ump->um_e2fs; 1041 ronly = fs->e2fs_ronly; 1042 if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { 1043 if (fs->e2fs_wasvalid) 1044 fs->e2fs->e2fs_state = 1045 htole16(le16toh(fs->e2fs->e2fs_state) | E2FS_ISCLEAN); 1046 ext2_sbupdate(ump, MNT_WAIT); 1047 } 1048 1049 ump->um_devvp->v_rdev->si_mountpoint = NULL; 1050 1051 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1052 error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD | FWRITE, NULL); 1053 vn_unlock(ump->um_devvp); 1054 1055 vrele(ump->um_devvp); 1056 sump = fs->e2fs_clustersum; 1057 for (i = 0; i < fs->e2fs_gcount; i++, sump++) 1058 free(sump->cs_sum, M_EXT2MNT); 1059 free(fs->e2fs_clustersum, M_EXT2MNT); 1060 free(fs->e2fs_maxcluster, M_EXT2MNT); 1061 free(fs->e2fs_gd, M_EXT2MNT); 1062 free(fs->e2fs_contigdirs, M_EXT2MNT); 1063 free(fs->e2fs, M_EXT2MNT); 1064 free(fs, M_EXT2MNT); 1065 free(ump, M_EXT2MNT); 1066 mp->mnt_data = NULL; 1067 mp->mnt_flag &= ~MNT_LOCAL; 1068 return (error); 1069 } 1070 1071 /* 1072 * Flush out all the files in a filesystem. 1073 */ 1074 static int 1075 ext2_flushfiles(struct mount *mp, int flags) 1076 { 1077 int error; 1078 1079 error = vflush(mp, 0, flags); 1080 return (error); 1081 } 1082 1083 /* 1084 * Get filesystem statistics. 1085 */ 1086 static int 1087 ext2_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 1088 { 1089 struct ext2mount *ump; 1090 struct m_ext2fs *fs; 1091 uint32_t overhead, overhead_per_group, ngdb; 1092 int i, ngroups; 1093 1094 ump = VFSTOEXT2(mp); 1095 fs = ump->um_e2fs; 1096 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1097 panic("ext2_statfs"); 1098 1099 /* 1100 * Compute the overhead (FS structures) 1101 */ 1102 overhead_per_group = 1103 1 /* block bitmap */ + 1104 1 /* inode bitmap */ + 1105 fs->e2fs_itpg; 1106 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1107 fs->e2fs_gcount * overhead_per_group; 1108 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1109 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1110 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1111 if (ext2_cg_has_sb(fs, i)) 1112 ngroups++; 1113 } 1114 } else { 1115 ngroups = fs->e2fs_gcount; 1116 } 1117 ngdb = fs->e2fs_gdbcount; 1118 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1119 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1120 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1121 overhead += ngroups * (1 /* superblock */ + ngdb); 1122 1123 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1124 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1125 sbp->f_iosize = EXT2_BLOCK_SIZE(fs); 1126 sbp->f_blocks = fs->e2fs_bcount - overhead; 1127 sbp->f_bfree = fs->e2fs_fbcount; 1128 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1129 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1130 sbp->f_ffree = fs->e2fs_ficount; 1131 if (sbp != &mp->mnt_stat) { 1132 bcopy((caddr_t)mp->mnt_stat.f_mntfromname, 1133 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); 1134 } 1135 return (0); 1136 } 1137 1138 static int 1139 ext2_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 1140 { 1141 struct ext2mount *ump; 1142 struct m_ext2fs *fs; 1143 uint32_t overhead, overhead_per_group, ngdb; 1144 int i, ngroups; 1145 1146 ump = VFSTOEXT2(mp); 1147 fs = ump->um_e2fs; 1148 if (le16toh(fs->e2fs->e2fs_magic) != E2FS_MAGIC) 1149 panic("ext2_statfs"); 1150 1151 /* 1152 * Compute the overhead (FS structures) 1153 */ 1154 overhead_per_group = 1155 1 /* block bitmap */ + 1156 1 /* inode bitmap */ + 1157 fs->e2fs_itpg; 1158 overhead = le32toh(fs->e2fs->e2fs_first_dblock) + 1159 fs->e2fs_gcount * overhead_per_group; 1160 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1161 le32toh(fs->e2fs->e2fs_features_rocompat) & EXT2F_ROCOMPAT_SPARSESUPER) { 1162 for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { 1163 if (ext2_cg_has_sb(fs, i)) 1164 ngroups++; 1165 } 1166 } else { 1167 ngroups = fs->e2fs_gcount; 1168 } 1169 ngdb = fs->e2fs_gdbcount; 1170 if (le32toh(fs->e2fs->e2fs_rev) > E2FS_REV0 && 1171 le32toh(fs->e2fs->e2fs_features_compat) & EXT2F_COMPAT_RESIZE) 1172 ngdb += le16toh(fs->e2fs->e2fs_reserved_ngdb); 1173 overhead += ngroups * (1 /* superblock */ + ngdb); 1174 1175 sbp->f_type = mp->mnt_vfc->vfc_typenum; 1176 sbp->f_bsize = EXT2_FRAG_SIZE(fs); 1177 sbp->f_frsize = EXT2_BLOCK_SIZE(fs); 1178 sbp->f_blocks = fs->e2fs_bcount - overhead; 1179 sbp->f_bfree = fs->e2fs_fbcount; 1180 sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; 1181 sbp->f_files = le32toh(fs->e2fs->e2fs_icount); 1182 sbp->f_ffree = fs->e2fs_ficount; 1183 return (0); 1184 } 1185 1186 static int 1187 ext2_sync_scan(struct mount *mp, struct vnode *vp, void *data) 1188 { 1189 struct scaninfo *info = data; 1190 struct inode *ip; 1191 int error; 1192 1193 ip = VTOI(vp); 1194 if (vp->v_type == VNON || 1195 ((ip->i_flag & 1196 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && 1197 (RB_EMPTY(&vp->v_rbdirty_tree) || (info->waitfor & MNT_LAZY)))) { 1198 return (0); 1199 } 1200 if ((error = VOP_FSYNC(vp, info->waitfor, 0)) != 0) 1201 info->allerror = error; 1202 return (0); 1203 } 1204 1205 /* 1206 * Go through the disk queues to initiate sandbagged IO; 1207 * go through the inodes to write those that have been modified; 1208 * initiate the writing of the super block if it has been modified. 1209 * 1210 * Note: we are always called with the filesystem marked `MPBUSY'. 1211 */ 1212 static int 1213 ext2_sync(struct mount *mp, int waitfor) 1214 { 1215 struct ext2mount *ump = VFSTOEXT2(mp); 1216 struct m_ext2fs *fs; 1217 struct scaninfo scaninfo; 1218 int error; 1219 1220 fs = ump->um_e2fs; 1221 if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ 1222 panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); 1223 } 1224 1225 /* 1226 * Write back each (modified) inode. 1227 */ 1228 scaninfo.allerror = 0; 1229 scaninfo.rescan = 1; 1230 scaninfo.waitfor = waitfor; 1231 while (scaninfo.rescan) { 1232 scaninfo.rescan = 0; 1233 vmntvnodescan(mp, VMSC_GETVP | VMSC_NOWAIT, 1234 NULL, ext2_sync_scan, &scaninfo); 1235 } 1236 1237 /* 1238 * Force stale filesystem control information to be flushed. 1239 */ 1240 if ((waitfor & MNT_LAZY) == 0) { 1241 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); 1242 if ((error = VOP_FSYNC(ump->um_devvp, waitfor, 0)) != 0) 1243 scaninfo.allerror = error; 1244 vn_unlock(ump->um_devvp); 1245 } 1246 1247 /* 1248 * Write back modified superblock. 1249 */ 1250 if (fs->e2fs_fmod != 0) { 1251 fs->e2fs_fmod = 0; 1252 fs->e2fs->e2fs_wtime = htole32(time_second); 1253 if ((error = ext2_cgupdate(ump, waitfor)) != 0) 1254 scaninfo.allerror = error; 1255 } 1256 return (scaninfo.allerror); 1257 } 1258 1259 int 1260 ext2_alloc_vnode(struct mount *mp, ino_t ino, struct vnode **vpp) 1261 { 1262 struct ext2mount *ump; 1263 struct vnode *vp; 1264 struct inode *ip; 1265 int error; 1266 1267 ump = VFSTOEXT2(mp); 1268 /* 1269 * Lock out the creation of new entries in the FFS hash table in 1270 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate 1271 * may occur! 1272 */ 1273 if (ext2fs_inode_hash_lock) { 1274 while (ext2fs_inode_hash_lock) { 1275 ext2fs_inode_hash_lock = -1; 1276 tsleep(&ext2fs_inode_hash_lock, 0, "e2vget", 0); 1277 } 1278 return (-1); 1279 } 1280 ext2fs_inode_hash_lock = 1; 1281 1282 ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); 1283 1284 /* Allocate a new vnode/inode. */ 1285 if ((error = getnewvnode(VT_EXT2FS, mp, &vp, VLKTIMEOUT, 1286 LK_CANRECURSE)) != 0) { 1287 if (ext2fs_inode_hash_lock < 0) 1288 wakeup(&ext2fs_inode_hash_lock); 1289 ext2fs_inode_hash_lock = 0; 1290 *vpp = NULL; 1291 free(ip, M_EXT2NODE); 1292 return (error); 1293 } 1294 //lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 1295 vp->v_data = ip; 1296 ip->i_vnode = vp; 1297 ip->i_e2fs = ump->um_e2fs; 1298 ip->i_dev = ump->um_dev; 1299 ip->i_ump = ump; 1300 ip->i_number = ino; 1301 ip->i_block_group = ino_to_cg(ip->i_e2fs, ino); 1302 ip->i_next_alloc_block = 0; 1303 ip->i_next_alloc_goal = 0; 1304 1305 /* 1306 * Put it onto its hash chain. Since our vnode is locked, other 1307 * requests for this inode will block if they arrive while we are 1308 * sleeping waiting for old data structures to be purged or for the 1309 * contents of the disk portion of this inode to be read. 1310 */ 1311 if (ext2_ihashins(ip)) { 1312 printf("ext2_alloc_vnode: ihashins collision, retrying inode %ld\n", 1313 (long)ip->i_number); 1314 *vpp = NULL; 1315 vp->v_type = VBAD; 1316 vx_put(vp); 1317 free(ip, M_EXT2NODE); 1318 return (-1); 1319 } 1320 1321 if (ext2fs_inode_hash_lock < 0) 1322 wakeup(&ext2fs_inode_hash_lock); 1323 ext2fs_inode_hash_lock = 0; 1324 *vpp = vp; 1325 1326 return (0); 1327 } 1328 1329 /* 1330 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it 1331 * in from disk. If it is in core, wait for the lock bit to clear, then 1332 * return the inode locked. Detection and handling of mount points must be 1333 * done by the calling routine. 1334 */ 1335 static int 1336 ext2_vget(struct mount *mp, struct vnode *dvp, ino_t ino, struct vnode **vpp) 1337 { 1338 struct m_ext2fs *fs; 1339 struct inode *ip; 1340 struct ext2mount *ump; 1341 struct buf *bp; 1342 struct vnode *vp; 1343 unsigned int i, used_blocks; 1344 int error; 1345 1346 ump = VFSTOEXT2(mp); 1347 restart: 1348 if ((*vpp = ext2_ihashget(ump->um_dev, ino)) != NULL) 1349 return (0); 1350 if (ext2_alloc_vnode(mp, ino, &vp) == -1) 1351 goto restart; 1352 ip = VTOI(vp); 1353 fs = ip->i_e2fs; 1354 1355 /* Read in the disk contents for the inode, copy into the inode. */ 1356 if ((error = bread(ump->um_devvp, fsbtodoff(fs, ino_to_fsba(fs, ino)), 1357 (int)fs->e2fs_bsize, &bp)) != 0) { 1358 /* 1359 * The inode does not contain anything useful, so it would 1360 * be misleading to leave it on its hash chain. With mode 1361 * still zero, it will be unlinked and returned to the free 1362 * list by vput(). 1363 */ 1364 vp->v_type = VBAD; 1365 brelse(bp); 1366 vx_put(vp); 1367 *vpp = NULL; 1368 return (error); 1369 } 1370 /* convert ext2 inode to dinode */ 1371 error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + 1372 EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); 1373 if (error) { 1374 brelse(bp); 1375 vx_put(vp); 1376 *vpp = NULL; 1377 return (error); 1378 } 1379 1380 /* 1381 * Now we want to make sure that block pointers for unused 1382 * blocks are zeroed out - ext2_balloc depends on this 1383 * although for regular files and directories only 1384 * 1385 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed 1386 * out because we could corrupt the extent tree. 1387 */ 1388 if (!(ip->i_flag & IN_E4EXTENTS) && 1389 (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { 1390 used_blocks = howmany(ip->i_size, fs->e2fs_bsize); 1391 for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) 1392 ip->i_db[i] = 0; 1393 } 1394 #ifdef EXT2FS_PRINT_EXTENTS 1395 ext2_print_inode(ip); 1396 ext4_ext_print_extent_tree_status(ip); 1397 #endif 1398 bqrelse(bp); 1399 1400 /* 1401 * Initialize the vnode from the inode, check for aliases. 1402 * Note that the underlying vnode may have changed. 1403 */ 1404 if ((error = ext2_vinit(mp, &vp)) != 0) { 1405 vx_put(vp); 1406 *vpp = NULL; 1407 return (error); 1408 } 1409 1410 /* 1411 * Finish inode initialization now that aliasing has been resolved. 1412 */ 1413 vref(ip->i_devvp); 1414 /* 1415 * Set up a generation number for this inode if it does not 1416 * already have one. This should only happen on old filesystems. 1417 */ 1418 if (ip->i_gen == 0) { 1419 ip->i_gen = krandom() / 2 + 1; 1420 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 1421 ip->i_flag |= IN_MODIFIED; 1422 } 1423 /* 1424 * Return the locked and refd vnode. 1425 */ 1426 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 1427 *vpp = vp; 1428 1429 return (0); 1430 } 1431 1432 /* 1433 * File handle to vnode 1434 * 1435 * Have to be really careful about stale file handles: 1436 * - check that the inode number is valid 1437 * - call ext2_vget() to get the locked inode 1438 * - check for an unallocated inode (i_mode == 0) 1439 * - check that the given client host has export rights and return 1440 * those rights via. exflagsp and credanonp 1441 */ 1442 static int 1443 ext2_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 1444 struct vnode **vpp) 1445 { 1446 struct inode *ip; 1447 struct ufid *ufhp; 1448 struct vnode *nvp; 1449 struct m_ext2fs *fs; 1450 int error; 1451 1452 ufhp = (struct ufid *)fhp; 1453 fs = VFSTOEXT2(mp)->um_e2fs; 1454 if (ufhp->ufid_ino < EXT2_ROOTINO || 1455 ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs_ipg) 1456 return (ESTALE); 1457 1458 error = VFS_VGET(mp, NULL, ufhp->ufid_ino, &nvp); 1459 if (error) { 1460 *vpp = NULLVP; 1461 return (error); 1462 } 1463 ip = VTOI(nvp); 1464 if (ip->i_mode == 0 || 1465 ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { 1466 vput(nvp); 1467 *vpp = NULLVP; 1468 return (ESTALE); 1469 } 1470 *vpp = nvp; 1471 return (0); 1472 } 1473 1474 /* 1475 * Vnode pointer to File handle 1476 */ 1477 /* ARGSUSED */ 1478 static int 1479 ext2_vptofh(struct vnode *vp, struct fid *fhp) 1480 { 1481 struct inode *ip; 1482 struct ufid *ufhp; 1483 1484 ip = VTOI(vp); 1485 ufhp = (struct ufid *)fhp; 1486 ufhp->ufid_len = sizeof(struct ufid); 1487 ufhp->ufid_ino = ip->i_number; 1488 ufhp->ufid_gen = ip->i_gen; 1489 return (0); 1490 } 1491 1492 /* 1493 * This is the generic part of fhtovp called after the underlying 1494 * filesystem has validated the file handle. 1495 * 1496 * Verify that a host should have access to a filesystem. 1497 */ 1498 static int 1499 ext2_check_export(struct mount *mp, struct sockaddr *nam, int *exflagsp, 1500 struct ucred **credanonp) 1501 { 1502 struct netcred *np; 1503 struct ext2mount *ump; 1504 1505 ump = VFSTOEXT2(mp); 1506 /* 1507 * Get the export permission structure for this <mp, client> tuple. 1508 */ 1509 np = vfs_export_lookup(mp, &ump->um_export, nam); 1510 if (np == NULL) 1511 return (EACCES); 1512 1513 *exflagsp = np->netc_exflags; 1514 *credanonp = &np->netc_anon; 1515 return (0); 1516 } 1517 1518 /* 1519 * Write a superblock and associated information back to disk. 1520 */ 1521 static int 1522 ext2_sbupdate(struct ext2mount *mp, int waitfor) 1523 { 1524 struct m_ext2fs *fs = mp->um_e2fs; 1525 struct ext2fs *es = fs->e2fs; 1526 struct buf *bp; 1527 int error = 0; 1528 1529 es->e2fs_bcount = htole32(fs->e2fs_bcount & 0xffffffff); 1530 es->e2fs_rbcount = htole32(fs->e2fs_rbcount & 0xffffffff); 1531 es->e2fs_fbcount = htole32(fs->e2fs_fbcount & 0xffffffff); 1532 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1533 es->e4fs_bcount_hi = htole32(fs->e2fs_bcount >> 32); 1534 es->e4fs_rbcount_hi = htole32(fs->e2fs_rbcount >> 32); 1535 es->e4fs_fbcount_hi = htole32(fs->e2fs_fbcount >> 32); 1536 } 1537 1538 es->e2fs_ficount = htole32(fs->e2fs_ficount); 1539 1540 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1541 ext2_sb_csum_set(fs); 1542 1543 bp = getblk(mp->um_devvp, SBOFF, SBSIZE, 0, 0); 1544 bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); 1545 if (waitfor == MNT_WAIT) 1546 error = bwrite(bp); 1547 else 1548 bawrite(bp); 1549 1550 /* 1551 * The buffers for group descriptors, inode bitmaps and block bitmaps 1552 * are not busy at this point and are (hopefully) written by the 1553 * usual sync mechanism. No need to write them here. 1554 */ 1555 return (error); 1556 } 1557 1558 static int 1559 ext2_cgupdate(struct ext2mount *mp, int waitfor) 1560 { 1561 struct m_ext2fs *fs = mp->um_e2fs; 1562 struct buf *bp; 1563 int i, j, g_count = 0, error = 0, allerror = 0; 1564 1565 allerror = ext2_sbupdate(mp, waitfor); 1566 1567 /* Update gd csums */ 1568 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1569 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) 1570 ext2_gd_csum_set(fs); 1571 1572 for (i = 0; i < fs->e2fs_gdbcount; i++) { 1573 bp = getblk(mp->um_devvp, fsbtodoff(fs, 1574 ext2_cg_location(fs, i)), 1575 fs->e2fs_bsize, 0, 0); 1576 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { 1577 memcpy(bp->b_data, &fs->e2fs_gd[ 1578 i * fs->e2fs_bsize / sizeof(struct ext2_gd)], 1579 fs->e2fs_bsize); 1580 } else { 1581 for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && 1582 g_count < fs->e2fs_gcount; j++, g_count++) 1583 memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, 1584 &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); 1585 } 1586 if (waitfor == MNT_WAIT) 1587 error = bwrite(bp); 1588 else 1589 bawrite(bp); 1590 } 1591 1592 if (!allerror && error) 1593 allerror = error; 1594 return (allerror); 1595 } 1596 1597 /* 1598 * Return the root of a filesystem. 1599 */ 1600 static int 1601 ext2_root(struct mount *mp, struct vnode **vpp) 1602 { 1603 struct vnode *nvp; 1604 int error; 1605 1606 error = VFS_VGET(mp, NULL, (ino_t)EXT2_ROOTINO, &nvp); 1607 if (error) 1608 return (error); 1609 *vpp = nvp; 1610 return (0); 1611 } 1612 1613 /* 1614 * Initialize ext2 filesystems, done only once. 1615 */ 1616 static int 1617 ext2_init(struct vfsconf *vfsp) 1618 { 1619 static int done; 1620 1621 if (done) 1622 return (0); 1623 done = 1; 1624 ext2_ihashinit(); 1625 1626 return (0); 1627 } 1628 1629 static int 1630 ext2_uninit(struct vfsconf *vfsp) 1631 { 1632 1633 ext2_ihashuninit(); 1634 1635 return (0); 1636 } 1637