1 /*- 2 * modified for Lites 1.1 3 * 4 * Aug 1995, Godmar Back (gback@cs.utah.edu) 5 * University of Utah, Department of Computer Science 6 */ 7 /*- 8 * SPDX-License-Identifier: BSD-3-Clause 9 * 10 * Copyright (c) 1982, 1986, 1989, 1993 11 * The Regents of the University of California. All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 38 * $FreeBSD$ 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/conf.h> 44 #include <sys/vnode.h> 45 #include <sys/stat.h> 46 #include <sys/mount.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/buf2.h> 50 #include <sys/endian.h> 51 #include <sys/malloc.h> 52 #include <sys/mutex2.h> 53 54 #include <vfs/ext2fs/fs.h> 55 #include <vfs/ext2fs/inode.h> 56 #include <vfs/ext2fs/ext2_mount.h> 57 #include <vfs/ext2fs/ext2fs.h> 58 #include <vfs/ext2fs/ext2_extern.h> 59 60 SDT_PROVIDER_DEFINE(ext2fs); 61 /* 62 * ext2fs trace probe: 63 * arg0: verbosity. Higher numbers give more verbose messages 64 * arg1: Textual message 65 */ 66 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); 67 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, 68 "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); 69 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); 70 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); 71 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); 72 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, 73 "int", "daddr_t", "char*"); 74 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); 75 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); 76 77 static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); 78 static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); 79 static u_long ext2_dirpref(struct inode *); 80 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, 81 daddr_t (*)(struct inode *, int, daddr_t, 82 int)); 83 static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); 84 static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); 85 86 /* 87 * Allocate a block in the filesystem. 88 * 89 * A preference may be optionally specified. If a preference is given 90 * the following hierarchy is used to allocate a block: 91 * 1) allocate the requested block. 92 * 2) allocate a rotationally optimal block in the same cylinder. 93 * 3) allocate a block in the same cylinder group. 94 * 4) quadradically rehash into other cylinder groups, until an 95 * available block is located. 96 * If no block preference is given the following hierarchy is used 97 * to allocate a block: 98 * 1) allocate a block in the cylinder group that contains the 99 * inode for the file. 100 * 2) quadradically rehash into other cylinder groups, until an 101 * available block is located. 102 */ 103 int 104 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, 105 struct ucred *cred, e4fs_daddr_t *bnp) 106 { 107 struct m_ext2fs *fs; 108 struct ext2mount *ump; 109 e4fs_daddr_t bno; 110 int cg; 111 112 *bnp = 0; 113 fs = ip->i_e2fs; 114 ump = ip->i_ump; 115 mtx_assert(EXT2_MTX(ump), MA_OWNED); 116 #ifdef INVARIANTS 117 if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { 118 printf("bsize = %lu, size = %d, fs = %s\n", 119 (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); 120 panic("ext2_alloc: bad size"); 121 } 122 if (cred == NOCRED) 123 panic("ext2_alloc: missing credential"); 124 #endif /* INVARIANTS */ 125 if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) 126 goto nospace; 127 if (cred->cr_uid != 0 && 128 fs->e2fs_fbcount < fs->e2fs_rbcount) 129 goto nospace; 130 if (bpref >= fs->e2fs_bcount) 131 bpref = 0; 132 if (bpref == 0) 133 cg = ino_to_cg(fs, ip->i_number); 134 else 135 cg = dtog(fs, bpref); 136 bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 137 ext2_alloccg); 138 if (bno > 0) { 139 /* set next_alloc fields as done in block_getblk */ 140 ip->i_next_alloc_block = lbn; 141 ip->i_next_alloc_goal = bno; 142 143 ip->i_blocks += btodb(fs->e2fs_bsize); 144 ip->i_flag |= IN_CHANGE | IN_UPDATE; 145 *bnp = bno; 146 return (0); 147 } 148 nospace: 149 EXT2_UNLOCK(ump); 150 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); 151 return (ENOSPC); 152 } 153 154 /* 155 * Allocate EA's block for inode. 156 */ 157 e4fs_daddr_t 158 ext2_alloc_meta(struct inode *ip) 159 { 160 struct m_ext2fs *fs; 161 daddr_t blk; 162 163 fs = ip->i_e2fs; 164 165 EXT2_LOCK(ip->i_ump); 166 blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, 167 ext2_alloccg); 168 if (0 == blk) { 169 EXT2_UNLOCK(ip->i_ump); 170 SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); 171 } 172 173 return (blk); 174 } 175 176 /* 177 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 178 * 179 * The vnode and an array of buffer pointers for a range of sequential 180 * logical blocks to be made contiguous is given. The allocator attempts 181 * to find a range of sequential blocks starting as close as possible to 182 * an fs_rotdelay offset from the end of the allocation for the logical 183 * block immediately preceding the current range. If successful, the 184 * physical block numbers in the buffer pointers and in the inode are 185 * changed to reflect the new allocation. If unsuccessful, the allocation 186 * is left unchanged. The success in doing the reallocation is returned. 187 * Note that the error return is not reflected back to the user. Rather 188 * the previous block allocation will be used. 189 */ 190 191 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); 192 193 static int doasyncfree = 1; 194 195 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, 196 "Use asychronous writes to update block pointers when freeing blocks"); 197 198 static int doreallocblks = 0; 199 200 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 201 202 int 203 ext2_reallocblks(struct vop_reallocblks_args *ap) 204 { 205 struct m_ext2fs *fs; 206 struct inode *ip; 207 struct vnode *vp; 208 struct buf *sbp, *ebp; 209 uint32_t *bap, *sbap, *ebap; 210 struct ext2mount *ump; 211 struct cluster_save *buflist; 212 struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; 213 e2fs_lbn_t start_lbn, end_lbn; 214 int soff; 215 e2fs_daddr_t newblk, blkno; 216 int i, len, start_lvl, end_lvl, pref, ssize; 217 218 if (doreallocblks == 0) 219 return (ENOSPC); 220 221 vp = ap->a_vp; 222 ip = VTOI(vp); 223 fs = ip->i_e2fs; 224 ump = ip->i_ump; 225 226 if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) 227 return (ENOSPC); 228 229 buflist = ap->a_buflist; 230 len = buflist->bs_nchildren; 231 start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset); 232 end_lbn = start_lbn + len - 1; 233 #ifdef INVARIANTS 234 for (i = 1; i < len; i++) 235 if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn + i)) 236 panic("ext2_reallocblks: non-cluster"); 237 #endif 238 /* 239 * If the cluster crosses the boundary for the first indirect 240 * block, leave space for the indirect block. Indirect blocks 241 * are initially laid out in a position after the last direct 242 * block. Block reallocation would usually destroy locality by 243 * moving the indirect block out of the way to make room for 244 * data blocks if we didn't compensate here. We should also do 245 * this for other indirect block boundaries, but it is only 246 * important for the first one. 247 */ 248 if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) 249 return (ENOSPC); 250 /* 251 * If the latest allocation is in a new cylinder group, assume that 252 * the filesystem has decided to move and do not force it back to 253 * the previous cylinder group. 254 */ 255 if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) != 256 dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset))) 257 return (ENOSPC); 258 if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || 259 ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) 260 return (ENOSPC); 261 /* 262 * Get the starting offset and block map for the first block. 263 */ 264 if (start_lvl == 0) { 265 sbap = &ip->i_db[0]; 266 soff = start_lbn; 267 } else { 268 idp = &start_ap[start_lvl - 1]; 269 if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->e2fs_bsize, 270 &sbp)) { 271 brelse(sbp); 272 return (ENOSPC); 273 } 274 sbap = (u_int *)sbp->b_data; 275 soff = idp->in_off; 276 } 277 /* 278 * If the block range spans two block maps, get the second map. 279 */ 280 ebap = NULL; 281 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 282 ssize = len; 283 } else { 284 #ifdef INVARIANTS 285 if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) 286 panic("ext2_reallocblks: start == end"); 287 #endif 288 ssize = len - (idp->in_off + 1); 289 if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->e2fs_bsize, 290 &ebp)) 291 goto fail; 292 ebap = (u_int *)ebp->b_data; 293 } 294 /* 295 * Find the preferred location for the cluster. 296 */ 297 EXT2_LOCK(ump); 298 pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); 299 /* 300 * Search the block map looking for an allocation of the desired size. 301 */ 302 if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, 303 len, ext2_clusteralloc)) == 0) { 304 EXT2_UNLOCK(ump); 305 goto fail; 306 } 307 /* 308 * We have found a new contiguous block. 309 * 310 * First we have to replace the old block pointers with the new 311 * block pointers in the inode and indirect blocks associated 312 * with the file. 313 */ 314 SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, 315 ip->i_number, start_lbn, end_lbn); 316 blkno = newblk; 317 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 318 if (i == ssize) { 319 bap = ebap; 320 soff = -i; 321 } 322 #ifdef INVARIANTS 323 if (buflist->bs_children[i]->b_bio2.bio_offset != 324 fsbtodoff(fs, *bap)) 325 panic("ext2_reallocblks: alloc mismatch"); 326 #endif 327 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); 328 *bap++ = blkno; 329 } 330 /* 331 * Next we must write out the modified inode and indirect blocks. 332 * For strict correctness, the writes should be synchronous since 333 * the old block values may have been written to disk. In practise 334 * they are almost never written, but if we are concerned about 335 * strict correctness, the `doasyncfree' flag should be set to zero. 336 * 337 * The test on `doasyncfree' should be changed to test a flag 338 * that shows whether the associated buffers and inodes have 339 * been written. The flag should be set when the cluster is 340 * started and cleared whenever the buffer or inode is flushed. 341 * We can then check below to see if it is set, and do the 342 * synchronous write only when it has been cleared. 343 */ 344 if (sbap != &ip->i_db[0]) { 345 if (doasyncfree) 346 bdwrite(sbp); 347 else 348 bwrite(sbp); 349 } else { 350 ip->i_flag |= IN_CHANGE | IN_UPDATE; 351 if (!doasyncfree) 352 ext2_update(vp, 1); 353 } 354 if (ssize < len) { 355 if (doasyncfree) 356 bdwrite(ebp); 357 else 358 bwrite(ebp); 359 } 360 /* 361 * Last, free the old blocks and assign the new blocks to the buffers. 362 */ 363 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { 364 ext2_blkfree(ip, dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), 365 fs->e2fs_bsize); 366 buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno); 367 SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); 368 } 369 370 return (0); 371 372 fail: 373 if (ssize < len) 374 brelse(ebp); 375 if (sbap != &ip->i_db[0]) 376 brelse(sbp); 377 return (ENOSPC); 378 } 379 380 /* 381 * Allocate an inode in the filesystem. 382 * 383 */ 384 int 385 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 386 { 387 struct timespec ts; 388 struct m_ext2fs *fs; 389 struct ext2mount *ump; 390 struct inode *pip; 391 struct inode *ip; 392 struct vnode *vp; 393 ino_t ino, ipref; 394 int error, cg; 395 396 *vpp = NULL; 397 pip = VTOI(pvp); 398 fs = pip->i_e2fs; 399 ump = pip->i_ump; 400 401 EXT2_LOCK(ump); 402 if (fs->e2fs_ficount == 0) 403 goto noinodes; 404 /* 405 * If it is a directory then obtain a cylinder group based on 406 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is 407 * always the next inode. 408 */ 409 if ((mode & IFMT) == IFDIR) { 410 cg = ext2_dirpref(pip); 411 if (fs->e2fs_contigdirs[cg] < 255) 412 fs->e2fs_contigdirs[cg]++; 413 } else { 414 cg = ino_to_cg(fs, pip->i_number); 415 if (fs->e2fs_contigdirs[cg] > 0) 416 fs->e2fs_contigdirs[cg]--; 417 } 418 ipref = cg * fs->e2fs_ipg + 1; 419 ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); 420 if (ino == 0) 421 goto noinodes; 422 restart: 423 if ((vp = ext2_ihashget(ump->um_dev, ino)) != NULL) { 424 printf("ext2_valloc: vp %p exists for inode %lu\n", vp, ino); 425 return (EEXIST); 426 } 427 if (ext2_alloc_vnode(ump->um_mountp, ino, &vp) == -1) 428 goto restart; 429 ip = VTOI(vp); 430 431 if ((error = ext2_vinit(vp->v_mount, &vp)) != 0) { 432 *vpp = NULL; 433 vp->v_type = VBAD; 434 vx_put(vp); 435 return (error); 436 } 437 438 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) 439 && (S_ISREG(mode) || S_ISDIR(mode))) 440 ext4_ext_tree_init(ip); 441 else 442 memset(ip->i_data, 0, sizeof(ip->i_data)); 443 444 /* 445 * Set up a new generation number for this inode. 446 * Avoid zero values. 447 */ 448 do { 449 ip->i_gen = karc4random(); 450 } while (ip->i_gen == 0); 451 452 vfs_timestamp(&ts); 453 ip->i_birthtime = ts.tv_sec; 454 ip->i_birthnsec = ts.tv_nsec; 455 456 /* 457 * Finish inode initialization now that aliasing has been resolved. 458 */ 459 vref(ip->i_devvp); 460 /* 461 * Return the locked and refd vnode. 462 */ 463 vx_downgrade(vp); /* downgrade VX lock to VN lock */ 464 *vpp = vp; 465 466 return (0); 467 468 noinodes: 469 EXT2_UNLOCK(ump); 470 SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); 471 return (ENOSPC); 472 } 473 474 /* 475 * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h 476 */ 477 uint64_t 478 e2fs_gd_get_b_bitmap(struct ext2_gd *gd) 479 { 480 481 return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) | 482 le32toh(gd->ext2bgd_b_bitmap)); 483 } 484 485 uint64_t 486 e2fs_gd_get_i_bitmap(struct ext2_gd *gd) 487 { 488 489 return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) | 490 le32toh(gd->ext2bgd_i_bitmap)); 491 } 492 493 uint64_t 494 e2fs_gd_get_i_tables(struct ext2_gd *gd) 495 { 496 497 return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) | 498 le32toh(gd->ext2bgd_i_tables)); 499 } 500 501 static uint32_t 502 e2fs_gd_get_nbfree(struct ext2_gd *gd) 503 { 504 505 return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) | 506 le16toh(gd->ext2bgd_nbfree)); 507 } 508 509 static void 510 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) 511 { 512 513 gd->ext2bgd_nbfree = htole16(val & 0xffff); 514 gd->ext4bgd_nbfree_hi = htole16(val >> 16); 515 } 516 517 static uint32_t 518 e2fs_gd_get_nifree(struct ext2_gd *gd) 519 { 520 521 return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) | 522 le16toh(gd->ext2bgd_nifree)); 523 } 524 525 static void 526 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) 527 { 528 529 gd->ext2bgd_nifree = htole16(val & 0xffff); 530 gd->ext4bgd_nifree_hi = htole16(val >> 16); 531 } 532 533 uint32_t 534 e2fs_gd_get_ndirs(struct ext2_gd *gd) 535 { 536 537 return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) | 538 le16toh(gd->ext2bgd_ndirs)); 539 } 540 541 static void 542 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) 543 { 544 545 gd->ext2bgd_ndirs = htole16(val & 0xffff); 546 gd->ext4bgd_ndirs_hi = htole16(val >> 16); 547 } 548 549 static uint32_t 550 e2fs_gd_get_i_unused(struct ext2_gd *gd) 551 { 552 return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) | 553 le16toh(gd->ext4bgd_i_unused)); 554 } 555 556 static void 557 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) 558 { 559 560 gd->ext4bgd_i_unused = htole16(val & 0xffff); 561 gd->ext4bgd_i_unused_hi = htole16(val >> 16); 562 } 563 564 /* 565 * Find a cylinder to place a directory. 566 * 567 * The policy implemented by this algorithm is to allocate a 568 * directory inode in the same cylinder group as its parent 569 * directory, but also to reserve space for its files inodes 570 * and data. Restrict the number of directories which may be 571 * allocated one after another in the same cylinder group 572 * without intervening allocation of files. 573 * 574 * If we allocate a first level directory then force allocation 575 * in another cylinder group. 576 * 577 */ 578 static u_long 579 ext2_dirpref(struct inode *pip) 580 { 581 struct m_ext2fs *fs; 582 int cg, prefcg, cgsize; 583 uint64_t avgbfree, minbfree; 584 u_int avgifree, avgndir, curdirsize; 585 u_int minifree, maxndir; 586 u_int mincg, minndir; 587 u_int dirsize, maxcontigdirs; 588 589 mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); 590 fs = pip->i_e2fs; 591 592 avgifree = fs->e2fs_ficount / fs->e2fs_gcount; 593 avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; 594 avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; 595 596 /* 597 * Force allocation in another cg if creating a first level dir. 598 */ 599 ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); 600 if (ITOV(pip)->v_flag & VROOT) { 601 prefcg = karc4random() % fs->e2fs_gcount; 602 mincg = prefcg; 603 minndir = fs->e2fs_ipg; 604 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 605 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 606 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 607 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 608 mincg = cg; 609 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 610 } 611 for (cg = 0; cg < prefcg; cg++) 612 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && 613 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && 614 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { 615 mincg = cg; 616 minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); 617 } 618 return (mincg); 619 } 620 /* 621 * Count various limits which used for 622 * optimal allocation of a directory inode. 623 */ 624 maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); 625 minifree = avgifree - avgifree / 4; 626 if (minifree < 1) 627 minifree = 1; 628 minbfree = avgbfree - avgbfree / 4; 629 if (minbfree < 1) 630 minbfree = 1; 631 cgsize = fs->e2fs_fsize * fs->e2fs_fpg; 632 dirsize = AVGDIRSIZE; 633 curdirsize = avgndir ? 634 (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; 635 if (dirsize < curdirsize) 636 dirsize = curdirsize; 637 maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); 638 maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); 639 if (maxcontigdirs == 0) 640 maxcontigdirs = 1; 641 642 /* 643 * Limit number of dirs in one cg and reserve space for 644 * regular files, but only if we have no deficit in 645 * inodes or space. 646 */ 647 prefcg = ino_to_cg(fs, pip->i_number); 648 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 649 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 650 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 651 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 652 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 653 return (cg); 654 } 655 for (cg = 0; cg < prefcg; cg++) 656 if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && 657 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && 658 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { 659 if (fs->e2fs_contigdirs[cg] < maxcontigdirs) 660 return (cg); 661 } 662 /* 663 * This is a backstop when we have deficit in space. 664 */ 665 for (cg = prefcg; cg < fs->e2fs_gcount; cg++) 666 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 667 return (cg); 668 for (cg = 0; cg < prefcg; cg++) 669 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) 670 break; 671 return (cg); 672 } 673 674 /* 675 * Select the desired position for the next block in a file. 676 * 677 * we try to mimic what Remy does in inode_getblk/block_getblk 678 * 679 * we note: blocknr == 0 means that we're about to allocate either 680 * a direct block or a pointer block at the first level of indirection 681 * (In other words, stuff that will go in i_db[] or i_ib[]) 682 * 683 * blocknr != 0 means that we're allocating a block that is none 684 * of the above. Then, blocknr tells us the number of the block 685 * that will hold the pointer 686 */ 687 e4fs_daddr_t 688 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, 689 e2fs_daddr_t blocknr) 690 { 691 struct m_ext2fs *fs; 692 int tmp; 693 694 fs = ip->i_e2fs; 695 696 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 697 698 /* 699 * If the next block is actually what we thought it is, then set the 700 * goal to what we thought it should be. 701 */ 702 if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) 703 return ip->i_next_alloc_goal; 704 705 /* 706 * Now check whether we were provided with an array that basically 707 * tells us previous blocks to which we want to stay close. 708 */ 709 if (bap) 710 for (tmp = indx - 1; tmp >= 0; tmp--) 711 if (bap[tmp]) 712 return (le32toh(bap[tmp])); 713 714 /* 715 * Else lets fall back to the blocknr or, if there is none, follow 716 * the rule that a block should be allocated near its inode. 717 */ 718 return (blocknr ? blocknr : 719 (e2fs_daddr_t)(ip->i_block_group * 720 EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock)); 721 } 722 723 /* 724 * Implement the cylinder overflow algorithm. 725 * 726 * The policy implemented by this algorithm is: 727 * 1) allocate the block in its requested cylinder group. 728 * 2) quadradically rehash on the cylinder group number. 729 * 3) brute force search for a free block. 730 */ 731 static e4fs_daddr_t 732 ext2_hashalloc(struct inode *ip, int cg, long pref, int size, 733 daddr_t (*allocator) (struct inode *, int, daddr_t, int)) 734 { 735 struct m_ext2fs *fs; 736 e4fs_daddr_t result; 737 int i, icg = cg; 738 739 mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); 740 fs = ip->i_e2fs; 741 /* 742 * 1: preferred cylinder group 743 */ 744 result = (*allocator)(ip, cg, pref, size); 745 if (result) 746 return (result); 747 /* 748 * 2: quadratic rehash 749 */ 750 for (i = 1; i < fs->e2fs_gcount; i *= 2) { 751 cg += i; 752 if (cg >= fs->e2fs_gcount) 753 cg -= fs->e2fs_gcount; 754 result = (*allocator)(ip, cg, 0, size); 755 if (result) 756 return (result); 757 } 758 /* 759 * 3: brute force search 760 * Note that we start at i == 2, since 0 was checked initially, 761 * and 1 is always checked in the quadratic rehash. 762 */ 763 cg = (icg + 2) % fs->e2fs_gcount; 764 for (i = 2; i < fs->e2fs_gcount; i++) { 765 result = (*allocator)(ip, cg, 0, size); 766 if (result) 767 return (result); 768 cg++; 769 if (cg == fs->e2fs_gcount) 770 cg = 0; 771 } 772 return (0); 773 } 774 775 static uint64_t 776 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) 777 { 778 779 if (!ext2_cg_has_sb(fs, cg)) 780 return (0); 781 782 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) 783 return (le32toh(fs->e2fs->e3fs_first_meta_bg)); 784 785 return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / 786 EXT2_DESCS_PER_BLOCK(fs)); 787 } 788 789 static uint64_t 790 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) 791 { 792 unsigned long metagroup; 793 int first, last; 794 795 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 796 first = metagroup * EXT2_DESCS_PER_BLOCK(fs); 797 last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; 798 799 if (cg == first || cg == first + 1 || cg == last) 800 return (1); 801 802 return (0); 803 } 804 805 uint64_t 806 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) 807 { 808 unsigned long first_meta_bg, metagroup; 809 810 first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg); 811 metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); 812 813 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 814 metagroup < first_meta_bg) 815 return (ext2_cg_number_gdb_nometa(fs, cg)); 816 817 return ext2_cg_number_gdb_meta(fs, cg); 818 } 819 820 static int 821 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) 822 { 823 int number; 824 825 number = ext2_cg_has_sb(fs, cg); 826 827 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || 828 cg < le32toh(fs->e2fs->e3fs_first_meta_bg) * 829 EXT2_DESCS_PER_BLOCK(fs)) { 830 if (number) { 831 number += ext2_cg_number_gdb(fs, cg); 832 number += le16toh(fs->e2fs->e2fs_reserved_ngdb); 833 } 834 } else { 835 number += ext2_cg_number_gdb(fs, cg); 836 } 837 838 return (number); 839 } 840 841 static void 842 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) 843 { 844 int i; 845 846 if (start_bit >= end_bit) 847 return; 848 849 for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) 850 setbit(bitmap, i); 851 if (i < end_bit) 852 memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); 853 } 854 855 static int 856 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) 857 { 858 859 return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) / 860 fs->e2fs_bsize); 861 } 862 863 static int 864 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) 865 { 866 867 return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); 868 } 869 870 static int 871 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) 872 { 873 int bit, bit_max, inodes_per_block; 874 uint64_t start, tmp; 875 876 if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT)) 877 return (0); 878 879 memset(bp->b_data, 0, fs->e2fs_bsize); 880 881 bit_max = ext2_number_base_meta_blocks(fs, cg); 882 if ((bit_max >> 3) >= fs->e2fs_bsize) 883 return (EINVAL); 884 885 for (bit = 0; bit < bit_max; bit++) 886 setbit(bp->b_data, bit); 887 888 start = (uint64_t)cg * fs->e2fs_bpg + 889 le32toh(fs->e2fs->e2fs_first_dblock); 890 891 /* Set bits for block and inode bitmaps, and inode table. */ 892 tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); 893 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 894 ext2_block_in_group(fs, tmp, cg)) 895 setbit(bp->b_data, tmp - start); 896 897 tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); 898 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 899 ext2_block_in_group(fs, tmp, cg)) 900 setbit(bp->b_data, tmp - start); 901 902 tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); 903 inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); 904 while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + 905 fs->e2fs_ipg / inodes_per_block ) { 906 if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || 907 ext2_block_in_group(fs, tmp, cg)) 908 setbit(bp->b_data, tmp - start); 909 tmp++; 910 } 911 912 /* 913 * Also if the number of blocks within the group is less than 914 * the blocksize * 8 ( which is the size of bitmap ), set rest 915 * of the block bitmap to 1 916 */ 917 ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8, 918 bp->b_data); 919 920 /* Clean the flag */ 921 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 922 fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT); 923 924 return (0); 925 } 926 927 static int 928 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) 929 { 930 struct ext2_gd *gd; 931 uint64_t group_first_block; 932 unsigned int offset, max_bit; 933 934 if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { 935 /* 936 * It is not possible to check block bitmap in case of this 937 * feature, because the inode and block bitmaps and inode table 938 * blocks may not be in the group at all. 939 * So, skip check in this case. 940 */ 941 return (0); 942 } 943 944 gd = &fs->e2fs_gd[cg]; 945 max_bit = fs->e2fs_fpg; 946 group_first_block = ((uint64_t)cg) * fs->e2fs_fpg + 947 le32toh(fs->e2fs->e2fs_first_dblock); 948 949 /* Check block bitmap block number */ 950 offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; 951 if (offset >= max_bit || !isset(bp->b_data, offset)) { 952 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 953 "bad block bitmap, group", cg); 954 return (EINVAL); 955 } 956 957 /* Check inode bitmap block number */ 958 offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; 959 if (offset >= max_bit || !isset(bp->b_data, offset)) { 960 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 961 "bad inode bitmap", cg); 962 return (EINVAL); 963 } 964 965 /* Check inode table */ 966 offset = e2fs_gd_get_i_tables(gd) - group_first_block; 967 if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { 968 SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, 969 "bad inode table, group", cg); 970 return (EINVAL); 971 } 972 973 return (0); 974 } 975 976 /* 977 * Determine whether a block can be allocated. 978 * 979 * Check to see if a block of the appropriate size is available, 980 * and if it is, allocate it. 981 */ 982 static daddr_t 983 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 984 { 985 struct m_ext2fs *fs; 986 struct buf *bp; 987 struct ext2mount *ump; 988 daddr_t bno, runstart, runlen; 989 int bit, loc, end, error, start; 990 char *bbp; 991 /* XXX ondisk32 */ 992 fs = ip->i_e2fs; 993 ump = ip->i_ump; 994 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 995 return (0); 996 997 EXT2_UNLOCK(ump); 998 error = bread(ip->i_devvp, fsbtodoff(fs, 999 e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1000 (int)fs->e2fs_bsize, &bp); 1001 if (error) 1002 goto fail; 1003 1004 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1005 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1006 error = ext2_cg_block_bitmap_init(fs, cg, bp); 1007 if (error) 1008 goto fail; 1009 1010 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1011 } 1012 error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); 1013 if (error) 1014 goto fail; 1015 1016 error = ext2_b_bitmap_validate(fs,bp, cg); 1017 if (error) 1018 goto fail; 1019 1020 /* 1021 * Check, that another thread did not not allocate the last block in 1022 * this group while we were waiting for the buffer. 1023 */ 1024 if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) 1025 goto fail; 1026 1027 bbp = (char *)bp->b_data; 1028 1029 if (dtog(fs, bpref) != cg) 1030 bpref = 0; 1031 if (bpref != 0) { 1032 bpref = dtogd(fs, bpref); 1033 /* 1034 * if the requested block is available, use it 1035 */ 1036 if (isclr(bbp, bpref)) { 1037 bno = bpref; 1038 goto gotit; 1039 } 1040 } 1041 /* 1042 * no blocks in the requested cylinder, so take next 1043 * available one in this cylinder group. 1044 * first try to get 8 contigous blocks, then fall back to a single 1045 * block. 1046 */ 1047 if (bpref) 1048 start = dtogd(fs, bpref) / NBBY; 1049 else 1050 start = 0; 1051 end = howmany(fs->e2fs_fpg, NBBY) - start; 1052 retry: 1053 runlen = 0; 1054 runstart = 0; 1055 for (loc = start; loc < end; loc++) { 1056 if (bbp[loc] == (char)0xff) { 1057 runlen = 0; 1058 continue; 1059 } 1060 1061 /* Start of a run, find the number of high clear bits. */ 1062 if (runlen == 0) { 1063 bit = fls(bbp[loc]); 1064 runlen = NBBY - bit; 1065 runstart = loc * NBBY + bit; 1066 } else if (bbp[loc] == 0) { 1067 /* Continue a run. */ 1068 runlen += NBBY; 1069 } else { 1070 /* 1071 * Finish the current run. If it isn't long 1072 * enough, start a new one. 1073 */ 1074 bit = ffs(bbp[loc]) - 1; 1075 runlen += bit; 1076 if (runlen >= 8) { 1077 bno = runstart; 1078 goto gotit; 1079 } 1080 1081 /* Run was too short, start a new one. */ 1082 bit = fls(bbp[loc]); 1083 runlen = NBBY - bit; 1084 runstart = loc * NBBY + bit; 1085 } 1086 1087 /* If the current run is long enough, use it. */ 1088 if (runlen >= 8) { 1089 bno = runstart; 1090 goto gotit; 1091 } 1092 } 1093 if (start != 0) { 1094 end = start; 1095 start = 0; 1096 goto retry; 1097 } 1098 bno = ext2_mapsearch(fs, bbp, bpref); 1099 if (bno < 0) 1100 goto fail; 1101 1102 gotit: 1103 #ifdef INVARIANTS 1104 if (isset(bbp, bno)) { 1105 printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", 1106 cg, (intmax_t)bno, fs->e2fs_fsmnt); 1107 panic("ext2fs_alloccg: dup alloc"); 1108 } 1109 #endif 1110 setbit(bbp, bno); 1111 EXT2_LOCK(ump); 1112 ext2_clusteracct(fs, bbp, cg, bno, -1); 1113 fs->e2fs_fbcount--; 1114 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1115 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1116 fs->e2fs_fmod = 1; 1117 EXT2_UNLOCK(ump); 1118 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1119 bdwrite(bp); 1120 return (((uint64_t)cg) * fs->e2fs_fpg + 1121 le32toh(fs->e2fs->e2fs_first_dblock) + bno); 1122 1123 fail: 1124 brelse(bp); 1125 EXT2_LOCK(ump); 1126 return (0); 1127 } 1128 1129 /* 1130 * Determine whether a cluster can be allocated. 1131 */ 1132 static daddr_t 1133 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) 1134 { 1135 struct m_ext2fs *fs; 1136 struct ext2mount *ump; 1137 struct buf *bp; 1138 char *bbp; 1139 int bit, error, got, i, loc, run; 1140 int32_t *lp; 1141 daddr_t bno; 1142 1143 fs = ip->i_e2fs; 1144 ump = ip->i_ump; 1145 1146 if (fs->e2fs_maxcluster[cg] < len) 1147 return (0); 1148 1149 EXT2_UNLOCK(ump); 1150 error = bread(ip->i_devvp, 1151 fsbtodoff(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1152 (int)fs->e2fs_bsize, &bp); 1153 if (error) 1154 goto fail_lock; 1155 1156 bbp = (char *)bp->b_data; 1157 EXT2_LOCK(ump); 1158 /* 1159 * Check to see if a cluster of the needed size (or bigger) is 1160 * available in this cylinder group. 1161 */ 1162 lp = &fs->e2fs_clustersum[cg].cs_sum[len]; 1163 for (i = len; i <= fs->e2fs_contigsumsize; i++) 1164 if (*lp++ > 0) 1165 break; 1166 if (i > fs->e2fs_contigsumsize) { 1167 /* 1168 * Update the cluster summary information to reflect 1169 * the true maximum-sized cluster so that future cluster 1170 * allocation requests can avoid reading the bitmap only 1171 * to find no cluster. 1172 */ 1173 lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; 1174 for (i = len - 1; i > 0; i--) 1175 if (*lp-- > 0) 1176 break; 1177 fs->e2fs_maxcluster[cg] = i; 1178 goto fail; 1179 } 1180 EXT2_UNLOCK(ump); 1181 1182 /* Search the bitmap to find a big enough cluster like in FFS. */ 1183 if (dtog(fs, bpref) != cg) 1184 bpref = 0; 1185 if (bpref != 0) 1186 bpref = dtogd(fs, bpref); 1187 loc = bpref / NBBY; 1188 bit = 1 << (bpref % NBBY); 1189 for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) { 1190 if ((bbp[loc] & bit) != 0) 1191 run = 0; 1192 else { 1193 run++; 1194 if (run == len) 1195 break; 1196 } 1197 if ((got & (NBBY - 1)) != (NBBY - 1)) 1198 bit <<= 1; 1199 else { 1200 loc++; 1201 bit = 1; 1202 } 1203 } 1204 1205 if (got >= fs->e2fs_fpg) 1206 goto fail_lock; 1207 1208 /* Allocate the cluster that we found. */ 1209 for (i = 1; i < len; i++) 1210 if (!isclr(bbp, got - run + i)) 1211 panic("ext2_clusteralloc: map mismatch"); 1212 1213 bno = got - run + 1; 1214 if (bno >= fs->e2fs_fpg) 1215 panic("ext2_clusteralloc: allocated out of group"); 1216 1217 EXT2_LOCK(ump); 1218 for (i = 0; i < len; i += fs->e2fs_fpb) { 1219 setbit(bbp, bno + i); 1220 ext2_clusteracct(fs, bbp, cg, bno + i, -1); 1221 fs->e2fs_fbcount--; 1222 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1223 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); 1224 } 1225 fs->e2fs_fmod = 1; 1226 EXT2_UNLOCK(ump); 1227 1228 bdwrite(bp); 1229 return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock) 1230 + bno); 1231 1232 fail_lock: 1233 EXT2_LOCK(ump); 1234 fail: 1235 brelse(bp); 1236 return (0); 1237 } 1238 1239 static int 1240 ext2_zero_inode_table(struct inode *ip, int cg) 1241 { 1242 struct m_ext2fs *fs; 1243 struct buf *bp; 1244 int i, all_blks, used_blks; 1245 1246 fs = ip->i_e2fs; 1247 1248 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED) 1249 return (0); 1250 1251 all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg / 1252 fs->e2fs_bsize; 1253 1254 used_blks = howmany(fs->e2fs_ipg - 1255 e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), 1256 fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); 1257 1258 for (i = 0; i < all_blks - used_blks; i++) { 1259 bp = getblk(ip->i_devvp, fsbtodoff(fs, 1260 e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), 1261 fs->e2fs_bsize, 0, 0); 1262 if (!bp) 1263 return (EIO); 1264 1265 vfs_bio_clrbuf(bp); 1266 bawrite(bp); 1267 } 1268 1269 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1270 fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED); 1271 1272 return (0); 1273 } 1274 1275 static void 1276 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last) 1277 { 1278 int i; 1279 1280 for (i = first; i <= last; i++) 1281 bitmap[i] = 0xff; 1282 } 1283 1284 1285 /* 1286 * Determine whether an inode can be allocated. 1287 * 1288 * Check to see if an inode is available, and if it is, 1289 * allocate it using tode in the specified cylinder group. 1290 */ 1291 static daddr_t 1292 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) 1293 { 1294 struct m_ext2fs *fs; 1295 struct buf *bp; 1296 struct ext2mount *ump; 1297 int error, start, len, ifree, ibytes; 1298 char *ibp, *loc; 1299 1300 ipref--; /* to avoid a lot of (ipref -1) */ 1301 if (ipref == -1) 1302 ipref = 0; 1303 fs = ip->i_e2fs; 1304 ump = ip->i_ump; 1305 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) 1306 return (0); 1307 EXT2_UNLOCK(ump); 1308 error = bread(ip->i_devvp, fsbtodoff(fs, 1309 e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1310 (int)fs->e2fs_bsize, &bp); 1311 if (error) { 1312 brelse(bp); 1313 EXT2_LOCK(ump); 1314 return (0); 1315 } 1316 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1317 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1318 if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & 1319 EXT2_BG_INODE_UNINIT) { 1320 ibytes = fs->e2fs_ipg / 8; 1321 memset(bp->b_data, 0, ibytes - 1); 1322 ext2_fix_bitmap_tail(bp->b_data, ibytes, 1323 fs->e2fs_bsize - 1); 1324 fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh( 1325 fs->e2fs_gd[cg].ext4bgd_flags) & 1326 ~EXT2_BG_INODE_UNINIT); 1327 } 1328 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1329 error = ext2_zero_inode_table(ip, cg); 1330 if (error) { 1331 brelse(bp); 1332 EXT2_LOCK(ump); 1333 return (0); 1334 } 1335 } 1336 error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); 1337 if (error) { 1338 brelse(bp); 1339 EXT2_LOCK(ump); 1340 return (0); 1341 } 1342 if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { 1343 /* 1344 * Another thread allocated the last i-node in this 1345 * group while we were waiting for the buffer. 1346 */ 1347 brelse(bp); 1348 EXT2_LOCK(ump); 1349 return (0); 1350 } 1351 ibp = (char *)bp->b_data; 1352 if (ipref) { 1353 ipref %= fs->e2fs_ipg; 1354 if (isclr(ibp, ipref)) 1355 goto gotit; 1356 } 1357 start = ipref / NBBY; 1358 len = howmany(fs->e2fs_ipg - ipref, NBBY); 1359 loc = memcchr(&ibp[start], 0xff, len); 1360 if (loc == NULL) { 1361 len = start + 1; 1362 start = 0; 1363 loc = memcchr(&ibp[start], 0xff, len); 1364 if (loc == NULL) { 1365 SDT_PROBE3(ext2fs, , alloc, 1366 ext2_nodealloccg_bmap_corrupted, cg, ipref, 1367 fs->e2fs_fsmnt); 1368 brelse(bp); 1369 EXT2_LOCK(ump); 1370 return (0); 1371 } 1372 } 1373 ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; 1374 gotit: 1375 setbit(ibp, ipref); 1376 EXT2_LOCK(ump); 1377 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1378 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); 1379 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || 1380 EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { 1381 ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); 1382 if (ipref + 1 > ifree) 1383 e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], 1384 fs->e2fs_ipg - (ipref + 1)); 1385 } 1386 fs->e2fs_ficount--; 1387 fs->e2fs_fmod = 1; 1388 if ((mode & IFMT) == IFDIR) { 1389 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1390 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); 1391 fs->e2fs_total_dir++; 1392 } 1393 EXT2_UNLOCK(ump); 1394 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1395 bdwrite(bp); 1396 return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); 1397 } 1398 1399 /* 1400 * Free a block or fragment. 1401 * 1402 */ 1403 void 1404 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) 1405 { 1406 struct m_ext2fs *fs; 1407 struct buf *bp; 1408 struct ext2mount *ump; 1409 int cg, error; 1410 char *bbp; 1411 1412 fs = ip->i_e2fs; 1413 ump = ip->i_ump; 1414 cg = dtog(fs, bno); 1415 if (bno >= fs->e2fs_bcount) { 1416 SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, 1417 ip->i_number, bno); 1418 return; 1419 } 1420 error = bread(ip->i_devvp, 1421 fsbtodoff(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), 1422 (int)fs->e2fs_bsize, &bp); 1423 if (error) { 1424 brelse(bp); 1425 return; 1426 } 1427 bbp = (char *)bp->b_data; 1428 bno = dtogd(fs, bno); 1429 if (isclr(bbp, bno)) { 1430 panic("ext2_blkfree: freeing free block %lld, fs=%s", 1431 (long long)bno, fs->e2fs_fsmnt); 1432 } 1433 clrbit(bbp, bno); 1434 EXT2_LOCK(ump); 1435 ext2_clusteracct(fs, bbp, cg, bno, 1); 1436 fs->e2fs_fbcount++; 1437 e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], 1438 e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); 1439 fs->e2fs_fmod = 1; 1440 EXT2_UNLOCK(ump); 1441 ext2_gd_b_bitmap_csum_set(fs, cg, bp); 1442 bdwrite(bp); 1443 } 1444 1445 /* 1446 * Free an inode. 1447 * 1448 */ 1449 int 1450 ext2_vfree(struct vnode *pvp, ino_t ino, int mode) 1451 { 1452 struct m_ext2fs *fs; 1453 struct inode *pip; 1454 struct buf *bp; 1455 struct ext2mount *ump; 1456 int error, cg; 1457 char *ibp; 1458 1459 pip = VTOI(pvp); 1460 fs = pip->i_e2fs; 1461 ump = pip->i_ump; 1462 if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) 1463 panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", 1464 pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); 1465 1466 cg = ino_to_cg(fs, ino); 1467 error = bread(pip->i_devvp, 1468 fsbtodoff(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), 1469 (int)fs->e2fs_bsize, &bp); 1470 if (error) { 1471 brelse(bp); 1472 return (0); 1473 } 1474 ibp = (char *)bp->b_data; 1475 ino = (ino - 1) % fs->e2fs_ipg; 1476 if (isclr(ibp, ino)) { 1477 SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, 1478 fs->e2fs_fsmnt, ino); 1479 if (fs->e2fs_ronly == 0) 1480 panic("ext2_vfree: freeing free inode"); 1481 } 1482 clrbit(ibp, ino); 1483 EXT2_LOCK(ump); 1484 fs->e2fs_ficount++; 1485 e2fs_gd_set_nifree(&fs->e2fs_gd[cg], 1486 e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); 1487 if ((mode & IFMT) == IFDIR) { 1488 e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], 1489 e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); 1490 fs->e2fs_total_dir--; 1491 } 1492 fs->e2fs_fmod = 1; 1493 EXT2_UNLOCK(ump); 1494 ext2_gd_i_bitmap_csum_set(fs, cg, bp); 1495 bdwrite(bp); 1496 return (0); 1497 } 1498 1499 /* 1500 * Find a block in the specified cylinder group. 1501 * 1502 * It is a panic if a request is made to find a block if none are 1503 * available. 1504 */ 1505 static daddr_t 1506 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) 1507 { 1508 char *loc; 1509 int start, len; 1510 1511 /* 1512 * find the fragment by searching through the free block 1513 * map for an appropriate bit pattern 1514 */ 1515 if (bpref) 1516 start = dtogd(fs, bpref) / NBBY; 1517 else 1518 start = 0; 1519 len = howmany(fs->e2fs_fpg, NBBY) - start; 1520 loc = memcchr(&bbp[start], 0xff, len); 1521 if (loc == NULL) { 1522 len = start + 1; 1523 start = 0; 1524 loc = memcchr(&bbp[start], 0xff, len); 1525 if (loc == NULL) { 1526 panic("ext2_mapsearch: map corrupted: start=%d, len=%d," 1527 "fs=%s", start, len, fs->e2fs_fsmnt); 1528 /* NOTREACHED */ 1529 } 1530 } 1531 return ((loc - bbp) * NBBY + ffs(~*loc) - 1); 1532 } 1533 1534 int 1535 ext2_cg_has_sb(struct m_ext2fs *fs, int cg) 1536 { 1537 int a3, a5, a7; 1538 1539 if (cg == 0) 1540 return (1); 1541 1542 if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { 1543 if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) || 1544 cg == le32toh(fs->e2fs->e4fs_backup_bgs[1])) 1545 return (1); 1546 return (0); 1547 } 1548 1549 if ((cg <= 1) || 1550 !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) 1551 return (1); 1552 1553 if (!(cg & 1)) 1554 return (0); 1555 1556 for (a3 = 3, a5 = 5, a7 = 7; 1557 a3 <= cg || a5 <= cg || a7 <= cg; 1558 a3 *= 3, a5 *= 5, a7 *= 7) 1559 if (cg == a3 || cg == a5 || cg == a7) 1560 return (1); 1561 return (0); 1562 } 1563