1 /* $OpenBSD: ffs_alloc.c,v 1.92 2011/09/18 23:20:28 bluhm Exp $ */ 2 /* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 2002 Networks Associates Technology, Inc. 6 * All rights reserved. 7 * 8 * This software was developed for the FreeBSD Project by Marshall 9 * Kirk McKusick and Network Associates Laboratories, the Security 10 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 11 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 12 * research program. 13 * 14 * Copyright (c) 1982, 1986, 1989, 1993 15 * The Regents of the University of California. All rights reserved. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 3. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/buf.h> 47 #include <sys/proc.h> 48 #include <sys/vnode.h> 49 #include <sys/mount.h> 50 #include <sys/kernel.h> 51 #include <sys/syslog.h> 52 #include <sys/stdint.h> 53 #include <sys/time.h> 54 55 #include <uvm/uvm_extern.h> 56 57 #include <dev/rndvar.h> 58 59 #include <ufs/ufs/quota.h> 60 #include <ufs/ufs/inode.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/ufs_extern.h> 63 64 #include <ufs/ffs/fs.h> 65 #include <ufs/ffs/ffs_extern.h> 66 67 #define ffs_fserr(fs, uid, cp) do { \ 68 log(LOG_ERR, "uid %u on %s: %s\n", (uid), \ 69 (fs)->fs_fsmnt, (cp)); \ 70 } while (0) 71 72 daddr64_t ffs_alloccg(struct inode *, int, daddr64_t, int); 73 struct buf * ffs_cgread(struct fs *, struct inode *, int); 74 daddr64_t ffs_alloccgblk(struct inode *, struct buf *, daddr64_t); 75 daddr64_t ffs_clusteralloc(struct inode *, int, daddr64_t, int); 76 ino_t ffs_dirpref(struct inode *); 77 daddr64_t ffs_fragextend(struct inode *, int, daddr64_t, int, int); 78 daddr64_t ffs_hashalloc(struct inode *, int, daddr64_t, int, 79 daddr64_t (*)(struct inode *, int, daddr64_t, int)); 80 daddr64_t ffs_nodealloccg(struct inode *, int, daddr64_t, int); 81 daddr64_t ffs_mapsearch(struct fs *, struct cg *, daddr64_t, int); 82 83 int ffs1_reallocblks(void *); 84 #ifdef FFS2 85 int ffs2_reallocblks(void *); 86 #endif 87 88 #ifdef DIAGNOSTIC 89 int ffs_checkblk(struct inode *, daddr64_t, long); 90 #endif 91 92 static const struct timeval fserr_interval = { 2, 0 }; 93 94 95 /* 96 * Allocate a block in the file system. 97 * 98 * The size of the requested block is given, which must be some 99 * multiple of fs_fsize and <= fs_bsize. 100 * A preference may be optionally specified. If a preference is given 101 * the following hierarchy is used to allocate a block: 102 * 1) allocate the requested block. 103 * 2) allocate a rotationally optimal block in the same cylinder. 104 * 3) allocate a block in the same cylinder group. 105 * 4) quadratically rehash into other cylinder groups, until an 106 * available block is located. 107 * If no block preference is given the following hierarchy is used 108 * to allocate a block: 109 * 1) allocate a block in the cylinder group that contains the 110 * inode for the file. 111 * 2) quadratically rehash into other cylinder groups, until an 112 * available block is located. 113 */ 114 int 115 ffs_alloc(struct inode *ip, daddr64_t lbn, daddr64_t bpref, int size, 116 struct ucred *cred, daddr64_t *bnp) 117 { 118 static struct timeval fsfull_last; 119 struct fs *fs; 120 daddr64_t bno; 121 int cg; 122 int error; 123 124 *bnp = 0; 125 fs = ip->i_fs; 126 #ifdef DIAGNOSTIC 127 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 128 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", 129 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 130 panic("ffs_alloc: bad size"); 131 } 132 if (cred == NOCRED) 133 panic("ffs_alloc: missing credential"); 134 #endif /* DIAGNOSTIC */ 135 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 136 goto nospace; 137 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) 138 goto nospace; 139 140 if ((error = ufs_quota_alloc_blocks(ip, btodb(size), cred)) != 0) 141 return (error); 142 143 /* 144 * Start allocation in the preferred block's cylinder group or 145 * the file's inode's cylinder group if no preferred block was 146 * specified. 147 */ 148 if (bpref >= fs->fs_size) 149 bpref = 0; 150 if (bpref == 0) 151 cg = ino_to_cg(fs, ip->i_number); 152 else 153 cg = dtog(fs, bpref); 154 155 /* Try allocating a block. */ 156 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 157 if (bno > 0) { 158 /* allocation successful, update inode data */ 159 DIP_ADD(ip, blocks, btodb(size)); 160 ip->i_flag |= IN_CHANGE | IN_UPDATE; 161 *bnp = bno; 162 return (0); 163 } 164 165 /* Restore user's disk quota because allocation failed. */ 166 (void) ufs_quota_free_blocks(ip, btodb(size), cred); 167 168 nospace: 169 if (ratecheck(&fsfull_last, &fserr_interval)) { 170 ffs_fserr(fs, cred->cr_uid, "file system full"); 171 uprintf("\n%s: write failed, file system is full\n", 172 fs->fs_fsmnt); 173 } 174 return (ENOSPC); 175 } 176 177 /* 178 * Reallocate a fragment to a bigger size 179 * 180 * The number and size of the old block is given, and a preference 181 * and new size is also specified. The allocator attempts to extend 182 * the original block. Failing that, the regular block allocator is 183 * invoked to get an appropriate block. 184 */ 185 int 186 ffs_realloccg(struct inode *ip, daddr64_t lbprev, daddr64_t bpref, int osize, 187 int nsize, struct ucred *cred, struct buf **bpp, daddr64_t *blknop) 188 { 189 static struct timeval fsfull_last; 190 struct fs *fs; 191 struct buf *bp = NULL; 192 daddr64_t quota_updated = 0; 193 int cg, request, error; 194 daddr64_t bprev, bno; 195 196 if (bpp != NULL) 197 *bpp = NULL; 198 fs = ip->i_fs; 199 #ifdef DIAGNOSTIC 200 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 201 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 202 printf( 203 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n", 204 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); 205 panic("ffs_realloccg: bad size"); 206 } 207 if (cred == NOCRED) 208 panic("ffs_realloccg: missing credential"); 209 #endif /* DIAGNOSTIC */ 210 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) 211 goto nospace; 212 213 bprev = DIP(ip, db[lbprev]); 214 215 if (bprev == 0) { 216 printf("dev = 0x%x, bsize = %d, bprev = %lld, fs = %s\n", 217 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); 218 panic("ffs_realloccg: bad bprev"); 219 } 220 221 /* 222 * Allocate the extra space in the buffer. 223 */ 224 if (bpp != NULL) { 225 if ((error = bread(ITOV(ip), lbprev, fs->fs_bsize, &bp)) != 0) 226 goto error; 227 bp->b_bcount = osize; 228 } 229 230 if ((error = ufs_quota_alloc_blocks(ip, btodb(nsize - osize), cred)) 231 != 0) 232 goto error; 233 234 quota_updated = btodb(nsize - osize); 235 236 /* 237 * Check for extension in the existing location. 238 */ 239 cg = dtog(fs, bprev); 240 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) { 241 DIP_ADD(ip, blocks, btodb(nsize - osize)); 242 ip->i_flag |= IN_CHANGE | IN_UPDATE; 243 if (bpp != NULL) { 244 if (bp->b_blkno != fsbtodb(fs, bno)) 245 panic("ffs_realloccg: bad blockno"); 246 #ifdef DIAGNOSTIC 247 if (nsize > bp->b_bufsize) 248 panic("ffs_realloccg: small buf"); 249 #endif 250 bp->b_bcount = nsize; 251 bp->b_flags |= B_DONE; 252 bzero(bp->b_data + osize, (u_int)nsize - osize); 253 *bpp = bp; 254 } 255 if (blknop != NULL) { 256 *blknop = bno; 257 } 258 return (0); 259 } 260 /* 261 * Allocate a new disk location. 262 */ 263 if (bpref >= fs->fs_size) 264 bpref = 0; 265 switch (fs->fs_optim) { 266 case FS_OPTSPACE: 267 /* 268 * Allocate an exact sized fragment. Although this makes 269 * best use of space, we will waste time relocating it if 270 * the file continues to grow. If the fragmentation is 271 * less than half of the minimum free reserve, we choose 272 * to begin optimizing for time. 273 */ 274 request = nsize; 275 if (fs->fs_minfree < 5 || 276 fs->fs_cstotal.cs_nffree > 277 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 278 break; 279 fs->fs_optim = FS_OPTTIME; 280 break; 281 case FS_OPTTIME: 282 /* 283 * At this point we have discovered a file that is trying to 284 * grow a small fragment to a larger fragment. To save time, 285 * we allocate a full sized block, then free the unused portion. 286 * If the file continues to grow, the `ffs_fragextend' call 287 * above will be able to grow it in place without further 288 * copying. If aberrant programs cause disk fragmentation to 289 * grow within 2% of the free reserve, we choose to begin 290 * optimizing for space. 291 */ 292 request = fs->fs_bsize; 293 if (fs->fs_cstotal.cs_nffree < 294 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 295 break; 296 fs->fs_optim = FS_OPTSPACE; 297 break; 298 default: 299 printf("dev = 0x%x, optim = %d, fs = %s\n", 300 ip->i_dev, fs->fs_optim, fs->fs_fsmnt); 301 panic("ffs_realloccg: bad optim"); 302 /* NOTREACHED */ 303 } 304 bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); 305 if (bno <= 0) 306 goto nospace; 307 308 (void) uvm_vnp_uncache(ITOV(ip)); 309 if (!DOINGSOFTDEP(ITOV(ip))) 310 ffs_blkfree(ip, bprev, (long)osize); 311 if (nsize < request) 312 ffs_blkfree(ip, bno + numfrags(fs, nsize), 313 (long)(request - nsize)); 314 DIP_ADD(ip, blocks, btodb(nsize - osize)); 315 ip->i_flag |= IN_CHANGE | IN_UPDATE; 316 if (bpp != NULL) { 317 bp->b_blkno = fsbtodb(fs, bno); 318 #ifdef DIAGNOSTIC 319 if (nsize > bp->b_bufsize) 320 panic("ffs_realloccg: small buf 2"); 321 #endif 322 bp->b_bcount = nsize; 323 bp->b_flags |= B_DONE; 324 bzero(bp->b_data + osize, (u_int)nsize - osize); 325 *bpp = bp; 326 } 327 if (blknop != NULL) { 328 *blknop = bno; 329 } 330 return (0); 331 332 nospace: 333 if (ratecheck(&fsfull_last, &fserr_interval)) { 334 ffs_fserr(fs, cred->cr_uid, "file system full"); 335 uprintf("\n%s: write failed, file system is full\n", 336 fs->fs_fsmnt); 337 } 338 error = ENOSPC; 339 340 error: 341 if (bp != NULL) { 342 brelse(bp); 343 bp = NULL; 344 } 345 346 /* 347 * Restore user's disk quota because allocation failed. 348 */ 349 if (quota_updated != 0) 350 (void)ufs_quota_free_blocks(ip, quota_updated, cred); 351 352 return error; 353 } 354 355 /* 356 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 357 * 358 * The vnode and an array of buffer pointers for a range of sequential 359 * logical blocks to be made contiguous are given. The allocator attempts 360 * to find a range of sequential blocks starting as close as possible to 361 * an fs_rotdelay offset from the end of the allocation for the logical 362 * block immediately preceding the current range. If successful, the 363 * physical block numbers in the buffer pointers and in the inode are 364 * changed to reflect the new allocation. If unsuccessful, the allocation 365 * is left unchanged. The success in doing the reallocation is returned. 366 * Note that the error return is not reflected back to the user. Rather 367 * the previous block allocation will be used. 368 */ 369 370 int doasyncfree = 1; 371 int doreallocblks = 1; 372 int prtrealloc = 0; 373 374 int 375 ffs1_reallocblks(void *v) 376 { 377 struct vop_reallocblks_args *ap = v; 378 struct fs *fs; 379 struct inode *ip; 380 struct vnode *vp; 381 struct buf *sbp, *ebp; 382 int32_t *bap, *sbap, *ebap = NULL; 383 struct cluster_save *buflist; 384 daddr64_t start_lbn, end_lbn, soff, newblk, blkno; 385 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 386 int i, len, start_lvl, end_lvl, pref, ssize; 387 388 vp = ap->a_vp; 389 ip = VTOI(vp); 390 fs = ip->i_fs; 391 if (fs->fs_contigsumsize <= 0) 392 return (ENOSPC); 393 buflist = ap->a_buflist; 394 len = buflist->bs_nchildren; 395 start_lbn = buflist->bs_children[0]->b_lblkno; 396 end_lbn = start_lbn + len - 1; 397 398 #ifdef DIAGNOSTIC 399 for (i = 0; i < len; i++) 400 if (!ffs_checkblk(ip, 401 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 402 panic("ffs1_reallocblks: unallocated block 1"); 403 404 for (i = 1; i < len; i++) 405 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 406 panic("ffs1_reallocblks: non-logical cluster"); 407 408 blkno = buflist->bs_children[0]->b_blkno; 409 ssize = fsbtodb(fs, fs->fs_frag); 410 for (i = 1; i < len - 1; i++) 411 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 412 panic("ffs1_reallocblks: non-physical cluster %d", i); 413 #endif 414 /* 415 * If the latest allocation is in a new cylinder group, assume that 416 * the filesystem has decided to move and do not force it back to 417 * the previous cylinder group. 418 */ 419 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 420 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 421 return (ENOSPC); 422 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 423 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 424 return (ENOSPC); 425 /* 426 * Get the starting offset and block map for the first block. 427 */ 428 if (start_lvl == 0) { 429 sbap = &ip->i_ffs1_db[0]; 430 soff = start_lbn; 431 } else { 432 idp = &start_ap[start_lvl - 1]; 433 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, &sbp)) { 434 brelse(sbp); 435 return (ENOSPC); 436 } 437 sbap = (int32_t *)sbp->b_data; 438 soff = idp->in_off; 439 } 440 /* 441 * Find the preferred location for the cluster. 442 */ 443 pref = ffs1_blkpref(ip, start_lbn, soff, sbap); 444 /* 445 * If the block range spans two block maps, get the second map. 446 */ 447 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 448 ssize = len; 449 } else { 450 #ifdef DIAGNOSTIC 451 if (start_lvl > 1 && 452 start_ap[start_lvl-1].in_lbn == idp->in_lbn) 453 panic("ffs1_reallocblk: start == end"); 454 #endif 455 ssize = len - (idp->in_off + 1); 456 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, &ebp)) 457 goto fail; 458 ebap = (int32_t *)ebp->b_data; 459 } 460 /* 461 * Search the block map looking for an allocation of the desired size. 462 */ 463 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, len, 464 ffs_clusteralloc)) == 0) 465 goto fail; 466 /* 467 * We have found a new contiguous block. 468 * 469 * First we have to replace the old block pointers with the new 470 * block pointers in the inode and indirect blocks associated 471 * with the file. 472 */ 473 #ifdef DEBUG 474 if (prtrealloc) 475 printf("realloc: ino %u, lbns %lld-%lld\n\told:", ip->i_number, 476 start_lbn, end_lbn); 477 #endif 478 blkno = newblk; 479 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 480 if (i == ssize) { 481 bap = ebap; 482 soff = -i; 483 } 484 #ifdef DIAGNOSTIC 485 if (!ffs_checkblk(ip, 486 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 487 panic("ffs1_reallocblks: unallocated block 2"); 488 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 489 panic("ffs1_reallocblks: alloc mismatch"); 490 #endif 491 #ifdef DEBUG 492 if (prtrealloc) 493 printf(" %d,", *bap); 494 #endif 495 if (DOINGSOFTDEP(vp)) { 496 if (sbap == &ip->i_ffs1_db[0] && i < ssize) 497 softdep_setup_allocdirect(ip, start_lbn + i, 498 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 499 buflist->bs_children[i]); 500 else 501 softdep_setup_allocindir_page(ip, start_lbn + i, 502 i < ssize ? sbp : ebp, soff + i, blkno, 503 *bap, buflist->bs_children[i]); 504 } 505 506 *bap++ = blkno; 507 } 508 /* 509 * Next we must write out the modified inode and indirect blocks. 510 * For strict correctness, the writes should be synchronous since 511 * the old block values may have been written to disk. In practise 512 * they are almost never written, but if we are concerned about 513 * strict correctness, the `doasyncfree' flag should be set to zero. 514 * 515 * The test on `doasyncfree' should be changed to test a flag 516 * that shows whether the associated buffers and inodes have 517 * been written. The flag should be set when the cluster is 518 * started and cleared whenever the buffer or inode is flushed. 519 * We can then check below to see if it is set, and do the 520 * synchronous write only when it has been cleared. 521 */ 522 if (sbap != &ip->i_ffs1_db[0]) { 523 if (doasyncfree) 524 bdwrite(sbp); 525 else 526 bwrite(sbp); 527 } else { 528 ip->i_flag |= IN_CHANGE | IN_UPDATE; 529 if (!doasyncfree) { 530 UFS_UPDATE(ip, MNT_WAIT); 531 } 532 } 533 if (ssize < len) { 534 if (doasyncfree) 535 bdwrite(ebp); 536 else 537 bwrite(ebp); 538 } 539 /* 540 * Last, free the old blocks and assign the new blocks to the buffers. 541 */ 542 #ifdef DEBUG 543 if (prtrealloc) 544 printf("\n\tnew:"); 545 #endif 546 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 547 if (!DOINGSOFTDEP(vp)) 548 ffs_blkfree(ip, 549 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 550 fs->fs_bsize); 551 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 552 #ifdef DIAGNOSTIC 553 if (!ffs_checkblk(ip, 554 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 555 panic("ffs1_reallocblks: unallocated block 3"); 556 if (prtrealloc) 557 printf(" %lld,", blkno); 558 #endif 559 } 560 #ifdef DEBUG 561 if (prtrealloc) { 562 prtrealloc--; 563 printf("\n"); 564 } 565 #endif 566 return (0); 567 568 fail: 569 if (ssize < len) 570 brelse(ebp); 571 if (sbap != &ip->i_ffs1_db[0]) 572 brelse(sbp); 573 return (ENOSPC); 574 } 575 576 #ifdef FFS2 577 int 578 ffs2_reallocblks(void *v) 579 { 580 struct vop_reallocblks_args *ap = v; 581 struct fs *fs; 582 struct inode *ip; 583 struct vnode *vp; 584 struct buf *sbp, *ebp; 585 daddr64_t *bap, *sbap, *ebap = NULL; 586 struct cluster_save *buflist; 587 daddr64_t start_lbn, end_lbn; 588 daddr64_t soff, newblk, blkno, pref; 589 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 590 int i, len, start_lvl, end_lvl, ssize; 591 592 vp = ap->a_vp; 593 ip = VTOI(vp); 594 fs = ip->i_fs; 595 596 if (fs->fs_contigsumsize <= 0) 597 return (ENOSPC); 598 599 buflist = ap->a_buflist; 600 len = buflist->bs_nchildren; 601 start_lbn = buflist->bs_children[0]->b_lblkno; 602 end_lbn = start_lbn + len - 1; 603 604 #ifdef DIAGNOSTIC 605 for (i = 0; i < len; i++) 606 if (!ffs_checkblk(ip, 607 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 608 panic("ffs2_reallocblks: unallocated block 1"); 609 610 for (i = 1; i < len; i++) 611 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 612 panic("ffs2_reallocblks: non-logical cluster"); 613 614 blkno = buflist->bs_children[0]->b_blkno; 615 ssize = fsbtodb(fs, fs->fs_frag); 616 617 for (i = 1; i < len - 1; i++) 618 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 619 panic("ffs2_reallocblks: non-physical cluster %d", i); 620 #endif 621 622 /* 623 * If the latest allocation is in a new cylinder group, assume that 624 * the filesystem has decided to move and do not force it back to 625 * the previous cylinder group. 626 */ 627 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 628 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 629 return (ENOSPC); 630 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 631 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 632 return (ENOSPC); 633 634 /* 635 * Get the starting offset and block map for the first block. 636 */ 637 if (start_lvl == 0) { 638 sbap = &ip->i_din2->di_db[0]; 639 soff = start_lbn; 640 } else { 641 idp = &start_ap[start_lvl - 1]; 642 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, &sbp)) { 643 brelse(sbp); 644 return (ENOSPC); 645 } 646 sbap = (daddr64_t *)sbp->b_data; 647 soff = idp->in_off; 648 } 649 650 /* 651 * If the block range spans two block maps, get the second map. 652 */ 653 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 654 ssize = len; 655 } else { 656 #ifdef DIAGNOSTIC 657 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 658 panic("ffs2_reallocblk: start == end"); 659 #endif 660 ssize = len - (idp->in_off + 1); 661 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, &ebp)) 662 goto fail; 663 ebap = (daddr64_t *)ebp->b_data; 664 } 665 666 /* 667 * Find the preferred location for the cluster. 668 */ 669 pref = ffs2_blkpref(ip, start_lbn, soff, sbap); 670 671 /* 672 * Search the block map looking for an allocation of the desired size. 673 */ 674 if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, 675 len, ffs_clusteralloc)) == 0) 676 goto fail; 677 678 /* 679 * We have found a new contiguous block. 680 * 681 * First we have to replace the old block pointers with the new 682 * block pointers in the inode and indirect blocks associated 683 * with the file. 684 */ 685 #ifdef DEBUG 686 if (prtrealloc) 687 printf("realloc: ino %u, lbns %lld-%lld\n\told:", ip->i_number, 688 start_lbn, end_lbn); 689 #endif 690 691 blkno = newblk; 692 693 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 694 if (i == ssize) { 695 bap = ebap; 696 soff = -i; 697 } 698 #ifdef DIAGNOSTIC 699 if (!ffs_checkblk(ip, 700 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 701 panic("ffs2_reallocblks: unallocated block 2"); 702 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 703 panic("ffs2_reallocblks: alloc mismatch"); 704 #endif 705 #ifdef DEBUG 706 if (prtrealloc) 707 printf(" %lld,", *bap); 708 #endif 709 if (DOINGSOFTDEP(vp)) { 710 if (sbap == &ip->i_din2->di_db[0] && i < ssize) 711 softdep_setup_allocdirect(ip, start_lbn + i, 712 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 713 buflist->bs_children[i]); 714 else 715 softdep_setup_allocindir_page(ip, start_lbn + i, 716 i < ssize ? sbp : ebp, soff + i, blkno, 717 *bap, buflist->bs_children[i]); 718 } 719 *bap++ = blkno; 720 } 721 722 /* 723 * Next we must write out the modified inode and indirect blocks. 724 * For strict correctness, the writes should be synchronous since 725 * the old block values may have been written to disk. In practise 726 * they are almost never written, but if we are concerned about 727 * strict correctness, the `doasyncfree' flag should be set to zero. 728 * 729 * The test on `doasyncfree' should be changed to test a flag 730 * that shows whether the associated buffers and inodes have 731 * been written. The flag should be set when the cluster is 732 * started and cleared whenever the buffer or inode is flushed. 733 * We can then check below to see if it is set, and do the 734 * synchronous write only when it has been cleared. 735 */ 736 if (sbap != &ip->i_din2->di_db[0]) { 737 if (doasyncfree) 738 bdwrite(sbp); 739 else 740 bwrite(sbp); 741 } else { 742 ip->i_flag |= IN_CHANGE | IN_UPDATE; 743 if (!doasyncfree) 744 ffs_update(ip, NULL, NULL, MNT_WAIT); 745 } 746 747 if (ssize < len) { 748 if (doasyncfree) 749 bdwrite(ebp); 750 else 751 bwrite(ebp); 752 } 753 754 /* 755 * Last, free the old blocks and assign the new blocks to the buffers. 756 */ 757 #ifdef DEBUG 758 if (prtrealloc) 759 printf("\n\tnew:"); 760 #endif 761 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 762 if (!DOINGSOFTDEP(vp)) 763 ffs_blkfree(ip, dbtofsb(fs, 764 buflist->bs_children[i]->b_blkno), fs->fs_bsize); 765 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 766 #ifdef DIAGNOSTIC 767 if (!ffs_checkblk(ip, 768 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 769 panic("ffs2_reallocblks: unallocated block 3"); 770 #endif 771 #ifdef DEBUG 772 if (prtrealloc) 773 printf(" %lld,", blkno); 774 #endif 775 } 776 #ifdef DEBUG 777 if (prtrealloc) { 778 prtrealloc--; 779 printf("\n"); 780 } 781 #endif 782 783 return (0); 784 785 fail: 786 if (ssize < len) 787 brelse(ebp); 788 789 if (sbap != &ip->i_din2->di_db[0]) 790 brelse(sbp); 791 792 return (ENOSPC); 793 } 794 #endif /* FFS2 */ 795 796 int 797 ffs_reallocblks(void *v) 798 { 799 #ifdef FFS2 800 struct vop_reallocblks_args *ap = v; 801 #endif 802 803 if (!doreallocblks) 804 return (ENOSPC); 805 806 #ifdef FFS2 807 if (VTOI(ap->a_vp)->i_ump->um_fstype == UM_UFS2) 808 return (ffs2_reallocblks(v)); 809 #endif 810 811 return (ffs1_reallocblks(v)); 812 } 813 814 /* 815 * Allocate an inode in the file system. 816 * 817 * If allocating a directory, use ffs_dirpref to select the inode. 818 * If allocating in a directory, the following hierarchy is followed: 819 * 1) allocate the preferred inode. 820 * 2) allocate an inode in the same cylinder group. 821 * 3) quadratically rehash into other cylinder groups, until an 822 * available inode is located. 823 * If no inode preference is given the following hierarchy is used 824 * to allocate an inode: 825 * 1) allocate an inode in cylinder group 0. 826 * 2) quadratically rehash into other cylinder groups, until an 827 * available inode is located. 828 */ 829 int 830 ffs_inode_alloc(struct inode *pip, mode_t mode, struct ucred *cred, 831 struct vnode **vpp) 832 { 833 static struct timeval fsnoinodes_last; 834 struct vnode *pvp = ITOV(pip); 835 struct fs *fs; 836 struct inode *ip; 837 ino_t ino, ipref; 838 int cg, error; 839 840 *vpp = NULL; 841 fs = pip->i_fs; 842 if (fs->fs_cstotal.cs_nifree == 0) 843 goto noinodes; 844 845 if ((mode & IFMT) == IFDIR) 846 ipref = ffs_dirpref(pip); 847 else 848 ipref = pip->i_number; 849 if (ipref >= fs->fs_ncg * fs->fs_ipg) 850 ipref = 0; 851 cg = ino_to_cg(fs, ipref); 852 853 /* 854 * Track number of dirs created one after another 855 * in a same cg without intervening by files. 856 */ 857 if ((mode & IFMT) == IFDIR) { 858 if (fs->fs_contigdirs[cg] < 255) 859 fs->fs_contigdirs[cg]++; 860 } else { 861 if (fs->fs_contigdirs[cg] > 0) 862 fs->fs_contigdirs[cg]--; 863 } 864 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, ffs_nodealloccg); 865 if (ino == 0) 866 goto noinodes; 867 error = VFS_VGET(pvp->v_mount, ino, vpp); 868 if (error) { 869 ffs_inode_free(pip, ino, mode); 870 return (error); 871 } 872 873 ip = VTOI(*vpp); 874 875 if (DIP(ip, mode)) { 876 printf("mode = 0%o, inum = %u, fs = %s\n", 877 DIP(ip, mode), ip->i_number, fs->fs_fsmnt); 878 panic("ffs_valloc: dup alloc"); 879 } 880 881 if (DIP(ip, blocks)) { 882 printf("free inode %s/%d had %lld blocks\n", 883 fs->fs_fsmnt, ino, (daddr64_t)DIP(ip, blocks)); 884 DIP_ASSIGN(ip, blocks, 0); 885 } 886 887 DIP_ASSIGN(ip, flags, 0); 888 889 /* 890 * Set up a new generation number for this inode. 891 * XXX - just increment for now, this is wrong! (millert) 892 * Need a way to preserve randomization. 893 */ 894 if (DIP(ip, gen) != 0) 895 DIP_ADD(ip, gen, 1); 896 if (DIP(ip, gen) == 0) 897 DIP_ASSIGN(ip, gen, arc4random() & INT_MAX); 898 899 if (DIP(ip, gen) == 0 || DIP(ip, gen) == -1) 900 DIP_ASSIGN(ip, gen, 1); /* Shouldn't happen */ 901 902 return (0); 903 904 noinodes: 905 if (ratecheck(&fsnoinodes_last, &fserr_interval)) { 906 ffs_fserr(fs, cred->cr_uid, "out of inodes"); 907 uprintf("\n%s: create/symlink failed, no inodes free\n", 908 fs->fs_fsmnt); 909 } 910 return (ENOSPC); 911 } 912 913 /* 914 * Find a cylinder group to place a directory. 915 * 916 * The policy implemented by this algorithm is to allocate a 917 * directory inode in the same cylinder group as its parent 918 * directory, but also to reserve space for its files inodes 919 * and data. Restrict the number of directories which may be 920 * allocated one after another in the same cylinder group 921 * without intervening allocation of files. 922 * 923 * If we allocate a first level directory then force allocation 924 * in another cylinder group. 925 */ 926 ino_t 927 ffs_dirpref(struct inode *pip) 928 { 929 struct fs *fs; 930 int cg, prefcg, dirsize, cgsize; 931 int avgifree, avgbfree, avgndir, curdirsize; 932 int minifree, minbfree, maxndir; 933 int mincg, minndir; 934 int maxcontigdirs; 935 936 fs = pip->i_fs; 937 938 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 939 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 940 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 941 942 /* 943 * Force allocation in another cg if creating a first level dir. 944 */ 945 if (ITOV(pip)->v_flag & VROOT) { 946 prefcg = (arc4random() & INT_MAX) % fs->fs_ncg; 947 mincg = prefcg; 948 minndir = fs->fs_ipg; 949 for (cg = prefcg; cg < fs->fs_ncg; cg++) 950 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 951 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 952 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 953 mincg = cg; 954 minndir = fs->fs_cs(fs, cg).cs_ndir; 955 } 956 for (cg = 0; cg < prefcg; cg++) 957 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 958 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 959 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 960 mincg = cg; 961 minndir = fs->fs_cs(fs, cg).cs_ndir; 962 } 963 cg = mincg; 964 goto end; 965 } else 966 prefcg = ino_to_cg(fs, pip->i_number); 967 968 /* 969 * Count various limits which used for 970 * optimal allocation of a directory inode. 971 */ 972 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 973 minifree = avgifree - (avgifree / 4); 974 if (minifree < 1) 975 minifree = 1; 976 minbfree = avgbfree - (avgbfree / 4); 977 if (minbfree < 1) 978 minbfree = 1; 979 980 cgsize = fs->fs_fsize * fs->fs_fpg; 981 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 982 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 983 if (dirsize < curdirsize) 984 dirsize = curdirsize; 985 if (dirsize <= 0) 986 maxcontigdirs = 0; /* dirsize overflowed */ 987 else 988 maxcontigdirs = min(avgbfree * fs->fs_bsize / dirsize, 255); 989 if (fs->fs_avgfpdir > 0) 990 maxcontigdirs = min(maxcontigdirs, 991 fs->fs_ipg / fs->fs_avgfpdir); 992 if (maxcontigdirs == 0) 993 maxcontigdirs = 1; 994 995 /* 996 * Limit number of dirs in one cg and reserve space for 997 * regular files, but only if we have no deficit in 998 * inodes or space. 999 */ 1000 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1001 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1002 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1003 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1004 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1005 goto end; 1006 } 1007 for (cg = 0; cg < prefcg; cg++) 1008 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 1009 fs->fs_cs(fs, cg).cs_nifree >= minifree && 1010 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 1011 if (fs->fs_contigdirs[cg] < maxcontigdirs) 1012 goto end; 1013 } 1014 /* 1015 * This is a backstop when we have deficit in space. 1016 */ 1017 for (cg = prefcg; cg < fs->fs_ncg; cg++) 1018 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1019 goto end; 1020 for (cg = 0; cg < prefcg; cg++) 1021 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 1022 goto end; 1023 end: 1024 return ((ino_t)(fs->fs_ipg * cg)); 1025 } 1026 1027 /* 1028 * Select the desired position for the next block in a file. The file is 1029 * logically divided into sections. The first section is composed of the 1030 * direct blocks. Each additional section contains fs_maxbpg blocks. 1031 * 1032 * If no blocks have been allocated in the first section, the policy is to 1033 * request a block in the same cylinder group as the inode that describes 1034 * the file. If no blocks have been allocated in any other section, the 1035 * policy is to place the section in a cylinder group with a greater than 1036 * average number of free blocks. An appropriate cylinder group is found 1037 * by using a rotor that sweeps the cylinder groups. When a new group of 1038 * blocks is needed, the sweep begins in the cylinder group following the 1039 * cylinder group from which the previous allocation was made. The sweep 1040 * continues until a cylinder group with greater than the average number 1041 * of free blocks is found. If the allocation is for the first block in an 1042 * indirect block, the information on the previous allocation is unavailable; 1043 * here a best guess is made based upon the logical block number being 1044 * allocated. 1045 */ 1046 int32_t 1047 ffs1_blkpref(struct inode *ip, daddr64_t lbn, int indx, int32_t *bap) 1048 { 1049 struct fs *fs; 1050 int cg, avgbfree, startcg; 1051 1052 fs = ip->i_fs; 1053 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1054 if (lbn < NDADDR + NINDIR(fs)) { 1055 cg = ino_to_cg(fs, ip->i_number); 1056 return (cgbase(fs, cg) + fs->fs_frag); 1057 } 1058 /* 1059 * Find a cylinder with greater than average number of 1060 * unused data blocks. 1061 */ 1062 if (indx == 0 || bap[indx - 1] == 0) 1063 startcg = 1064 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 1065 else 1066 startcg = dtog(fs, bap[indx - 1]) + 1; 1067 startcg %= fs->fs_ncg; 1068 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1069 for (cg = startcg; cg < fs->fs_ncg; cg++) 1070 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1071 fs->fs_cgrotor = cg; 1072 return (cgbase(fs, cg) + fs->fs_frag); 1073 } 1074 for (cg = 0; cg <= startcg; cg++) 1075 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 1076 fs->fs_cgrotor = cg; 1077 return (cgbase(fs, cg) + fs->fs_frag); 1078 } 1079 return (0); 1080 } 1081 1082 return (bap[indx - 1] + fs->fs_frag); 1083 } 1084 1085 /* 1086 * Same as above, for UFS2. 1087 */ 1088 #ifdef FFS2 1089 int64_t 1090 ffs2_blkpref(struct inode *ip, daddr64_t lbn, int indx, int64_t *bap) 1091 { 1092 struct fs *fs; 1093 int cg, avgbfree, startcg; 1094 1095 fs = ip->i_fs; 1096 1097 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 1098 if (lbn < NDADDR + NINDIR(fs)) { 1099 cg = ino_to_cg(fs, ip->i_number); 1100 return (cgbase(fs, cg) + fs->fs_frag); 1101 } 1102 1103 /* 1104 * Find a cylinder with greater than average number of 1105 * unused data blocks. 1106 */ 1107 if (indx == 0 || bap[indx - 1] == 0) 1108 startcg = ino_to_cg(fs, ip->i_number) + 1109 lbn / fs->fs_maxbpg; 1110 else 1111 startcg = dtog(fs, bap[indx - 1] + 1); 1112 1113 startcg %= fs->fs_ncg; 1114 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 1115 1116 for (cg = startcg; cg < fs->fs_ncg; cg++) 1117 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) 1118 return (cgbase(fs, cg) + fs->fs_frag); 1119 1120 for (cg = 0; cg < startcg; cg++) 1121 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) 1122 return (cgbase(fs, cg) + fs->fs_frag); 1123 1124 return (0); 1125 } 1126 1127 /* 1128 * We always just try to lay things out contiguously. 1129 */ 1130 return (bap[indx - 1] + fs->fs_frag); 1131 } 1132 #endif /* FFS2 */ 1133 1134 /* 1135 * Implement the cylinder overflow algorithm. 1136 * 1137 * The policy implemented by this algorithm is: 1138 * 1) allocate the block in its requested cylinder group. 1139 * 2) quadratically rehash on the cylinder group number. 1140 * 3) brute force search for a free block. 1141 */ 1142 /*VARARGS5*/ 1143 daddr64_t 1144 ffs_hashalloc(struct inode *ip, int cg, daddr64_t pref, int size, 1145 daddr64_t (*allocator)(struct inode *, int, daddr64_t, int)) 1146 { 1147 struct fs *fs; 1148 daddr64_t result; 1149 int i, icg = cg; 1150 1151 fs = ip->i_fs; 1152 /* 1153 * 1: preferred cylinder group 1154 */ 1155 result = (*allocator)(ip, cg, pref, size); 1156 if (result) 1157 return (result); 1158 /* 1159 * 2: quadratic rehash 1160 */ 1161 for (i = 1; i < fs->fs_ncg; i *= 2) { 1162 cg += i; 1163 if (cg >= fs->fs_ncg) 1164 cg -= fs->fs_ncg; 1165 result = (*allocator)(ip, cg, 0, size); 1166 if (result) 1167 return (result); 1168 } 1169 /* 1170 * 3: brute force search 1171 * Note that we start at i == 2, since 0 was checked initially, 1172 * and 1 is always checked in the quadratic rehash. 1173 */ 1174 cg = (icg + 2) % fs->fs_ncg; 1175 for (i = 2; i < fs->fs_ncg; i++) { 1176 result = (*allocator)(ip, cg, 0, size); 1177 if (result) 1178 return (result); 1179 cg++; 1180 if (cg == fs->fs_ncg) 1181 cg = 0; 1182 } 1183 return (0); 1184 } 1185 1186 struct buf * 1187 ffs_cgread(struct fs *fs, struct inode *ip, int cg) 1188 { 1189 struct buf *bp; 1190 1191 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1192 (int)fs->fs_cgsize, &bp)) { 1193 brelse(bp); 1194 return (NULL); 1195 } 1196 1197 if (!cg_chkmagic((struct cg *)bp->b_data)) { 1198 brelse(bp); 1199 return (NULL); 1200 } 1201 1202 return bp; 1203 } 1204 1205 /* 1206 * Determine whether a fragment can be extended. 1207 * 1208 * Check to see if the necessary fragments are available, and 1209 * if they are, allocate them. 1210 */ 1211 daddr64_t 1212 ffs_fragextend(struct inode *ip, int cg, daddr64_t bprev, int osize, int nsize) 1213 { 1214 struct fs *fs; 1215 struct cg *cgp; 1216 struct buf *bp; 1217 daddr64_t bno; 1218 int i, frags, bbase; 1219 1220 fs = ip->i_fs; 1221 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 1222 return (0); 1223 frags = numfrags(fs, nsize); 1224 bbase = fragnum(fs, bprev); 1225 if (bbase > fragnum(fs, (bprev + frags - 1))) { 1226 /* cannot extend across a block boundary */ 1227 return (0); 1228 } 1229 1230 if (!(bp = ffs_cgread(fs, ip, cg))) 1231 return (0); 1232 1233 cgp = (struct cg *)bp->b_data; 1234 cgp->cg_ffs2_time = cgp->cg_time = time_second; 1235 1236 bno = dtogd(fs, bprev); 1237 for (i = numfrags(fs, osize); i < frags; i++) 1238 if (isclr(cg_blksfree(cgp), bno + i)) { 1239 brelse(bp); 1240 return (0); 1241 } 1242 /* 1243 * the current fragment can be extended 1244 * deduct the count on fragment being extended into 1245 * increase the count on the remaining fragment (if any) 1246 * allocate the extended piece 1247 */ 1248 for (i = frags; i < fs->fs_frag - bbase; i++) 1249 if (isclr(cg_blksfree(cgp), bno + i)) 1250 break; 1251 cgp->cg_frsum[i - numfrags(fs, osize)]--; 1252 if (i != frags) 1253 cgp->cg_frsum[i - frags]++; 1254 for (i = numfrags(fs, osize); i < frags; i++) { 1255 clrbit(cg_blksfree(cgp), bno + i); 1256 cgp->cg_cs.cs_nffree--; 1257 fs->fs_cstotal.cs_nffree--; 1258 fs->fs_cs(fs, cg).cs_nffree--; 1259 } 1260 fs->fs_fmod = 1; 1261 if (DOINGSOFTDEP(ITOV(ip))) 1262 softdep_setup_blkmapdep(bp, fs, bprev); 1263 1264 bdwrite(bp); 1265 return (bprev); 1266 } 1267 1268 /* 1269 * Determine whether a block can be allocated. 1270 * 1271 * Check to see if a block of the appropriate size is available, 1272 * and if it is, allocate it. 1273 */ 1274 daddr64_t 1275 ffs_alloccg(struct inode *ip, int cg, daddr64_t bpref, int size) 1276 { 1277 struct fs *fs; 1278 struct cg *cgp; 1279 struct buf *bp; 1280 daddr64_t bno, blkno; 1281 int i, frags, allocsiz; 1282 1283 fs = ip->i_fs; 1284 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1285 return (0); 1286 1287 if (!(bp = ffs_cgread(fs, ip, cg))) 1288 return (0); 1289 1290 cgp = (struct cg *)bp->b_data; 1291 if (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize) { 1292 brelse(bp); 1293 return (0); 1294 } 1295 1296 cgp->cg_ffs2_time = cgp->cg_time = time_second; 1297 1298 if (size == fs->fs_bsize) { 1299 /* allocate and return a complete data block */ 1300 bno = ffs_alloccgblk(ip, bp, bpref); 1301 bdwrite(bp); 1302 return (bno); 1303 } 1304 /* 1305 * check to see if any fragments are already available 1306 * allocsiz is the size which will be allocated, hacking 1307 * it down to a smaller size if necessary 1308 */ 1309 frags = numfrags(fs, size); 1310 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1311 if (cgp->cg_frsum[allocsiz] != 0) 1312 break; 1313 if (allocsiz == fs->fs_frag) { 1314 /* 1315 * no fragments were available, so a block will be 1316 * allocated, and hacked up 1317 */ 1318 if (cgp->cg_cs.cs_nbfree == 0) { 1319 brelse(bp); 1320 return (0); 1321 } 1322 bno = ffs_alloccgblk(ip, bp, bpref); 1323 bpref = dtogd(fs, bno); 1324 for (i = frags; i < fs->fs_frag; i++) 1325 setbit(cg_blksfree(cgp), bpref + i); 1326 i = fs->fs_frag - frags; 1327 cgp->cg_cs.cs_nffree += i; 1328 fs->fs_cstotal.cs_nffree += i; 1329 fs->fs_cs(fs, cg).cs_nffree += i; 1330 fs->fs_fmod = 1; 1331 cgp->cg_frsum[i]++; 1332 bdwrite(bp); 1333 return (bno); 1334 } 1335 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1336 if (bno < 0) { 1337 brelse(bp); 1338 return (0); 1339 } 1340 1341 for (i = 0; i < frags; i++) 1342 clrbit(cg_blksfree(cgp), bno + i); 1343 cgp->cg_cs.cs_nffree -= frags; 1344 fs->fs_cstotal.cs_nffree -= frags; 1345 fs->fs_cs(fs, cg).cs_nffree -= frags; 1346 fs->fs_fmod = 1; 1347 cgp->cg_frsum[allocsiz]--; 1348 if (frags != allocsiz) 1349 cgp->cg_frsum[allocsiz - frags]++; 1350 1351 blkno = cgbase(fs, cg) + bno; 1352 if (DOINGSOFTDEP(ITOV(ip))) 1353 softdep_setup_blkmapdep(bp, fs, blkno); 1354 bdwrite(bp); 1355 return (blkno); 1356 } 1357 1358 /* 1359 * Allocate a block in a cylinder group. 1360 * Note that this routine only allocates fs_bsize blocks; these 1361 * blocks may be fragmented by the routine that allocates them. 1362 */ 1363 daddr64_t 1364 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr64_t bpref) 1365 { 1366 struct fs *fs; 1367 struct cg *cgp; 1368 daddr64_t bno, blkno; 1369 u_int8_t *blksfree; 1370 int cylno; 1371 1372 fs = ip->i_fs; 1373 cgp = (struct cg *) bp->b_data; 1374 blksfree = cg_blksfree(cgp); 1375 1376 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) 1377 bpref = cgp->cg_rotor; 1378 else { 1379 bpref = blknum(fs, bpref); 1380 bno = dtogd(fs, bpref); 1381 /* 1382 * If the requested block is available, use it. 1383 */ 1384 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1385 goto gotit; 1386 } 1387 1388 /* 1389 * Take the next available block in this cylinder group. 1390 */ 1391 bno = ffs_mapsearch(fs, cgp, bpref, (int) fs->fs_frag); 1392 if (bno < 0) 1393 return (0); 1394 1395 cgp->cg_rotor = bno; 1396 1397 gotit: 1398 blkno = fragstoblks(fs, bno); 1399 ffs_clrblock(fs, blksfree, blkno); 1400 ffs_clusteracct(fs, cgp, blkno, -1); 1401 cgp->cg_cs.cs_nbfree--; 1402 fs->fs_cstotal.cs_nbfree--; 1403 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1404 1405 if (fs->fs_magic != FS_UFS2_MAGIC) { 1406 cylno = cbtocylno(fs, bno); 1407 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1408 cg_blktot(cgp)[cylno]--; 1409 } 1410 1411 fs->fs_fmod = 1; 1412 blkno = cgbase(fs, cgp->cg_cgx) + bno; 1413 1414 if (DOINGSOFTDEP(ITOV(ip))) 1415 softdep_setup_blkmapdep(bp, fs, blkno); 1416 1417 return (blkno); 1418 } 1419 1420 /* 1421 * Determine whether a cluster can be allocated. 1422 * 1423 * We do not currently check for optimal rotational layout if there 1424 * are multiple choices in the same cylinder group. Instead we just 1425 * take the first one that we find following bpref. 1426 */ 1427 daddr64_t 1428 ffs_clusteralloc(struct inode *ip, int cg, daddr64_t bpref, int len) 1429 { 1430 struct fs *fs; 1431 struct cg *cgp; 1432 struct buf *bp; 1433 int i, got, run, bno, bit, map; 1434 u_char *mapp; 1435 int32_t *lp; 1436 1437 fs = ip->i_fs; 1438 if (fs->fs_maxcluster[cg] < len) 1439 return (0); 1440 1441 if (!(bp = ffs_cgread(fs, ip, cg))) 1442 return (0); 1443 1444 cgp = (struct cg *)bp->b_data; 1445 1446 /* 1447 * Check to see if a cluster of the needed size (or bigger) is 1448 * available in this cylinder group. 1449 */ 1450 lp = &cg_clustersum(cgp)[len]; 1451 for (i = len; i <= fs->fs_contigsumsize; i++) 1452 if (*lp++ > 0) 1453 break; 1454 if (i > fs->fs_contigsumsize) { 1455 /* 1456 * This is the first time looking for a cluster in this 1457 * cylinder group. Update the cluster summary information 1458 * to reflect the true maximum sized cluster so that 1459 * future cluster allocation requests can avoid reading 1460 * the cylinder group map only to find no clusters. 1461 */ 1462 lp = &cg_clustersum(cgp)[len - 1]; 1463 for (i = len - 1; i > 0; i--) 1464 if (*lp-- > 0) 1465 break; 1466 fs->fs_maxcluster[cg] = i; 1467 goto fail; 1468 } 1469 /* 1470 * Search the cluster map to find a big enough cluster. 1471 * We take the first one that we find, even if it is larger 1472 * than we need as we prefer to get one close to the previous 1473 * block allocation. We do not search before the current 1474 * preference point as we do not want to allocate a block 1475 * that is allocated before the previous one (as we will 1476 * then have to wait for another pass of the elevator 1477 * algorithm before it will be read). We prefer to fail and 1478 * be recalled to try an allocation in the next cylinder group. 1479 */ 1480 if (dtog(fs, bpref) != cg) 1481 bpref = 0; 1482 else 1483 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1484 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1485 map = *mapp++; 1486 bit = 1 << (bpref % NBBY); 1487 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1488 if ((map & bit) == 0) { 1489 run = 0; 1490 } else { 1491 run++; 1492 if (run == len) 1493 break; 1494 } 1495 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1496 bit <<= 1; 1497 } else { 1498 map = *mapp++; 1499 bit = 1; 1500 } 1501 } 1502 if (got >= cgp->cg_nclusterblks) 1503 goto fail; 1504 /* 1505 * Allocate the cluster that we have found. 1506 */ 1507 cgp->cg_ffs2_time = cgp->cg_time = time_second; 1508 1509 #ifdef DIAGNOSTIC 1510 for (i = 1; i <= len; i++) 1511 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) 1512 panic("ffs_clusteralloc: map mismatch"); 1513 #endif 1514 bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1); 1515 #ifdef DIAGNOSTIC 1516 if (dtog(fs, bno) != cg) 1517 panic("ffs_clusteralloc: allocated out of group"); 1518 #endif 1519 1520 len = blkstofrags(fs, len); 1521 for (i = 0; i < len; i += fs->fs_frag) 1522 if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) 1523 panic("ffs_clusteralloc: lost block"); 1524 bdwrite(bp); 1525 return (bno); 1526 1527 fail: 1528 brelse(bp); 1529 return (0); 1530 } 1531 1532 /* inode allocation routine */ 1533 daddr64_t 1534 ffs_nodealloccg(struct inode *ip, int cg, daddr64_t ipref, int mode) 1535 { 1536 struct fs *fs; 1537 struct cg *cgp; 1538 struct buf *bp; 1539 int start, len, loc, map, i; 1540 #ifdef FFS2 1541 struct buf *ibp = NULL; 1542 struct ufs2_dinode *dp2; 1543 #endif 1544 1545 /* 1546 * For efficiency, before looking at the bitmaps for free inodes, 1547 * check the counters kept in the superblock cylinder group summaries, 1548 * and in the cylinder group itself. 1549 */ 1550 fs = ip->i_fs; 1551 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1552 return (0); 1553 1554 if (!(bp = ffs_cgread(fs, ip, cg))) 1555 return (0); 1556 1557 cgp = (struct cg *)bp->b_data; 1558 if (cgp->cg_cs.cs_nifree == 0) { 1559 brelse(bp); 1560 return (0); 1561 } 1562 1563 /* 1564 * We are committed to the allocation from now on, so update the time 1565 * on the cylinder group. 1566 */ 1567 cgp->cg_ffs2_time = cgp->cg_time = time_second; 1568 1569 /* 1570 * If there was a preferred location for the new inode, try to find it. 1571 */ 1572 if (ipref) { 1573 ipref %= fs->fs_ipg; 1574 if (isclr(cg_inosused(cgp), ipref)) 1575 goto gotit; /* inode is free, grab it. */ 1576 } 1577 1578 /* 1579 * Otherwise, look for the next available inode, starting at cg_irotor 1580 * (the position in the bitmap of the last used inode). 1581 */ 1582 start = cgp->cg_irotor / NBBY; 1583 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1584 loc = skpc(0xff, len, &cg_inosused(cgp)[start]); 1585 if (loc == 0) { 1586 /* 1587 * If we didn't find a free inode in the upper part of the 1588 * bitmap (from cg_irotor to the end), then look at the bottom 1589 * part (from 0 to cg_irotor). 1590 */ 1591 len = start + 1; 1592 start = 0; 1593 loc = skpc(0xff, len, &cg_inosused(cgp)[0]); 1594 if (loc == 0) { 1595 /* 1596 * If we failed again, then either the bitmap or the 1597 * counters kept for the cylinder group are wrong. 1598 */ 1599 printf("cg = %d, irotor = %d, fs = %s\n", 1600 cg, cgp->cg_irotor, fs->fs_fsmnt); 1601 panic("ffs_nodealloccg: map corrupted"); 1602 /* NOTREACHED */ 1603 } 1604 } 1605 1606 /* skpc() returns the position relative to the end */ 1607 i = start + len - loc; 1608 1609 /* 1610 * Okay, so now in 'i' we have the location in the bitmap of a byte 1611 * holding a free inode. Find the corresponding bit and set it, 1612 * updating cg_irotor as well, accordingly. 1613 */ 1614 map = cg_inosused(cgp)[i]; 1615 ipref = i * NBBY; 1616 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1617 if ((map & i) == 0) { 1618 cgp->cg_irotor = ipref; 1619 goto gotit; 1620 } 1621 } 1622 1623 printf("fs = %s\n", fs->fs_fsmnt); 1624 panic("ffs_nodealloccg: block not in map"); 1625 /* NOTREACHED */ 1626 1627 gotit: 1628 1629 #ifdef FFS2 1630 /* 1631 * For FFS2, check if all inodes in this cylinder group have been used 1632 * at least once. If they haven't, and we are allocating an inode past 1633 * the last allocated block of inodes, read in a block and initialize 1634 * all inodes in it. 1635 */ 1636 if (fs->fs_magic == FS_UFS2_MAGIC && 1637 /* Inode is beyond last initialized block of inodes? */ 1638 ipref + INOPB(fs) > cgp->cg_initediblk && 1639 /* Has any inode not been used at least once? */ 1640 cgp->cg_initediblk < cgp->cg_ffs2_niblk) { 1641 1642 ibp = getblk(ip->i_devvp, fsbtodb(fs, 1643 ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), 1644 (int)fs->fs_bsize, 0, 0); 1645 1646 bzero(ibp->b_data, (int)fs->fs_bsize); 1647 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1648 1649 /* Give each inode a positive generation number */ 1650 for (i = 0; i < INOPB(fs); i++) { 1651 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1; 1652 dp2++; 1653 } 1654 1655 /* Update the counter of initialized inodes */ 1656 cgp->cg_initediblk += INOPB(fs); 1657 } 1658 #endif /* FFS2 */ 1659 1660 if (DOINGSOFTDEP(ITOV(ip))) 1661 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1662 1663 setbit(cg_inosused(cgp), ipref); 1664 1665 /* Update the counters we keep on free inodes */ 1666 cgp->cg_cs.cs_nifree--; 1667 fs->fs_cstotal.cs_nifree--; 1668 fs->fs_cs(fs, cg).cs_nifree--; 1669 fs->fs_fmod = 1; /* file system was modified */ 1670 1671 /* Update the counters we keep on allocated directories */ 1672 if ((mode & IFMT) == IFDIR) { 1673 cgp->cg_cs.cs_ndir++; 1674 fs->fs_cstotal.cs_ndir++; 1675 fs->fs_cs(fs, cg).cs_ndir++; 1676 } 1677 1678 bdwrite(bp); 1679 1680 #ifdef FFS2 1681 if (ibp != NULL) 1682 bawrite(ibp); 1683 #endif 1684 1685 /* Return the allocated inode number */ 1686 return (cg * fs->fs_ipg + ipref); 1687 } 1688 1689 /* 1690 * Free a block or fragment. 1691 * 1692 * The specified block or fragment is placed back in the 1693 * free map. If a fragment is deallocated, a possible 1694 * block reassembly is checked. 1695 */ 1696 void 1697 ffs_blkfree(struct inode *ip, daddr64_t bno, long size) 1698 { 1699 struct fs *fs; 1700 struct cg *cgp; 1701 struct buf *bp; 1702 daddr64_t blkno; 1703 int i, cg, blk, frags, bbase; 1704 1705 fs = ip->i_fs; 1706 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1707 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1708 printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n", 1709 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); 1710 panic("ffs_blkfree: bad size"); 1711 } 1712 cg = dtog(fs, bno); 1713 if ((u_int)bno >= fs->fs_size) { 1714 printf("bad block %lld, ino %u\n", bno, ip->i_number); 1715 ffs_fserr(fs, DIP(ip, uid), "bad block"); 1716 return; 1717 } 1718 if (!(bp = ffs_cgread(fs, ip, cg))) 1719 return; 1720 1721 cgp = (struct cg *)bp->b_data; 1722 cgp->cg_ffs2_time = cgp->cg_time = time_second; 1723 1724 bno = dtogd(fs, bno); 1725 if (size == fs->fs_bsize) { 1726 blkno = fragstoblks(fs, bno); 1727 if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { 1728 printf("dev = 0x%x, block = %lld, fs = %s\n", 1729 ip->i_dev, bno, fs->fs_fsmnt); 1730 panic("ffs_blkfree: freeing free block"); 1731 } 1732 ffs_setblock(fs, cg_blksfree(cgp), blkno); 1733 ffs_clusteracct(fs, cgp, blkno, 1); 1734 cgp->cg_cs.cs_nbfree++; 1735 fs->fs_cstotal.cs_nbfree++; 1736 fs->fs_cs(fs, cg).cs_nbfree++; 1737 1738 if (fs->fs_magic != FS_UFS2_MAGIC) { 1739 i = cbtocylno(fs, bno); 1740 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1741 cg_blktot(cgp)[i]++; 1742 } 1743 1744 } else { 1745 bbase = bno - fragnum(fs, bno); 1746 /* 1747 * decrement the counts associated with the old frags 1748 */ 1749 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1750 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1751 /* 1752 * deallocate the fragment 1753 */ 1754 frags = numfrags(fs, size); 1755 for (i = 0; i < frags; i++) { 1756 if (isset(cg_blksfree(cgp), bno + i)) { 1757 printf("dev = 0x%x, block = %lld, fs = %s\n", 1758 ip->i_dev, bno + i, fs->fs_fsmnt); 1759 panic("ffs_blkfree: freeing free frag"); 1760 } 1761 setbit(cg_blksfree(cgp), bno + i); 1762 } 1763 cgp->cg_cs.cs_nffree += i; 1764 fs->fs_cstotal.cs_nffree += i; 1765 fs->fs_cs(fs, cg).cs_nffree += i; 1766 /* 1767 * add back in counts associated with the new frags 1768 */ 1769 blk = blkmap(fs, cg_blksfree(cgp), bbase); 1770 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1771 /* 1772 * if a complete block has been reassembled, account for it 1773 */ 1774 blkno = fragstoblks(fs, bbase); 1775 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { 1776 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1777 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1778 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1779 ffs_clusteracct(fs, cgp, blkno, 1); 1780 cgp->cg_cs.cs_nbfree++; 1781 fs->fs_cstotal.cs_nbfree++; 1782 fs->fs_cs(fs, cg).cs_nbfree++; 1783 1784 if (fs->fs_magic != FS_UFS2_MAGIC) { 1785 i = cbtocylno(fs, bbase); 1786 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1787 cg_blktot(cgp)[i]++; 1788 } 1789 } 1790 } 1791 fs->fs_fmod = 1; 1792 bdwrite(bp); 1793 } 1794 1795 int 1796 ffs_inode_free(struct inode *pip, ino_t ino, mode_t mode) 1797 { 1798 struct vnode *pvp = ITOV(pip); 1799 1800 if (DOINGSOFTDEP(pvp)) { 1801 softdep_freefile(pvp, ino, mode); 1802 return (0); 1803 } 1804 1805 return (ffs_freefile(pip, ino, mode)); 1806 } 1807 1808 /* 1809 * Do the actual free operation. 1810 * The specified inode is placed back in the free map. 1811 */ 1812 int 1813 ffs_freefile(struct inode *pip, ino_t ino, mode_t mode) 1814 { 1815 struct fs *fs; 1816 struct cg *cgp; 1817 struct buf *bp; 1818 int cg; 1819 1820 fs = pip->i_fs; 1821 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1822 panic("ffs_freefile: range: dev = 0x%x, ino = %d, fs = %s", 1823 pip->i_dev, ino, fs->fs_fsmnt); 1824 1825 cg = ino_to_cg(fs, ino); 1826 if (!(bp = ffs_cgread(fs, pip, cg))) 1827 return (0); 1828 1829 cgp = (struct cg *)bp->b_data; 1830 cgp->cg_ffs2_time = cgp->cg_time = time_second; 1831 1832 ino %= fs->fs_ipg; 1833 if (isclr(cg_inosused(cgp), ino)) { 1834 printf("dev = 0x%x, ino = %u, fs = %s\n", 1835 pip->i_dev, ino, fs->fs_fsmnt); 1836 if (fs->fs_ronly == 0) 1837 panic("ffs_freefile: freeing free inode"); 1838 } 1839 clrbit(cg_inosused(cgp), ino); 1840 if (ino < cgp->cg_irotor) 1841 cgp->cg_irotor = ino; 1842 cgp->cg_cs.cs_nifree++; 1843 fs->fs_cstotal.cs_nifree++; 1844 fs->fs_cs(fs, cg).cs_nifree++; 1845 if ((mode & IFMT) == IFDIR) { 1846 cgp->cg_cs.cs_ndir--; 1847 fs->fs_cstotal.cs_ndir--; 1848 fs->fs_cs(fs, cg).cs_ndir--; 1849 } 1850 fs->fs_fmod = 1; 1851 bdwrite(bp); 1852 return (0); 1853 } 1854 1855 #ifdef DIAGNOSTIC 1856 /* 1857 * Verify allocation of a block or fragment. Returns true if block or 1858 * fragment is allocated, false if it is free. 1859 */ 1860 int 1861 ffs_checkblk(struct inode *ip, daddr64_t bno, long size) 1862 { 1863 struct fs *fs; 1864 struct cg *cgp; 1865 struct buf *bp; 1866 int i, frags, free; 1867 1868 fs = ip->i_fs; 1869 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1870 printf("bsize = %d, size = %ld, fs = %s\n", 1871 fs->fs_bsize, size, fs->fs_fsmnt); 1872 panic("ffs_checkblk: bad size"); 1873 } 1874 if ((u_int)bno >= fs->fs_size) 1875 panic("ffs_checkblk: bad block %lld", bno); 1876 1877 if (!(bp = ffs_cgread(fs, ip, dtog(fs, bno)))) 1878 return (0); 1879 1880 cgp = (struct cg *)bp->b_data; 1881 bno = dtogd(fs, bno); 1882 if (size == fs->fs_bsize) { 1883 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); 1884 } else { 1885 frags = numfrags(fs, size); 1886 for (free = 0, i = 0; i < frags; i++) 1887 if (isset(cg_blksfree(cgp), bno + i)) 1888 free++; 1889 if (free != 0 && free != frags) 1890 panic("ffs_checkblk: partially free fragment"); 1891 } 1892 brelse(bp); 1893 return (!free); 1894 } 1895 #endif /* DIAGNOSTIC */ 1896 1897 1898 /* 1899 * Find a block of the specified size in the specified cylinder group. 1900 * 1901 * It is a panic if a request is made to find a block if none are 1902 * available. 1903 */ 1904 daddr64_t 1905 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr64_t bpref, int allocsiz) 1906 { 1907 daddr64_t bno; 1908 int start, len, loc, i; 1909 int blk, field, subfield, pos; 1910 1911 /* 1912 * find the fragment by searching through the free block 1913 * map for an appropriate bit pattern 1914 */ 1915 if (bpref) 1916 start = dtogd(fs, bpref) / NBBY; 1917 else 1918 start = cgp->cg_frotor / NBBY; 1919 len = howmany(fs->fs_fpg, NBBY) - start; 1920 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], 1921 (u_char *)fragtbl[fs->fs_frag], 1922 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1923 if (loc == 0) { 1924 len = start + 1; 1925 start = 0; 1926 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], 1927 (u_char *)fragtbl[fs->fs_frag], 1928 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1929 if (loc == 0) { 1930 printf("start = %d, len = %d, fs = %s\n", 1931 start, len, fs->fs_fsmnt); 1932 panic("ffs_alloccg: map corrupted"); 1933 /* NOTREACHED */ 1934 } 1935 } 1936 bno = (start + len - loc) * NBBY; 1937 cgp->cg_frotor = bno; 1938 /* 1939 * found the byte in the map 1940 * sift through the bits to find the selected frag 1941 */ 1942 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1943 blk = blkmap(fs, cg_blksfree(cgp), bno); 1944 blk <<= 1; 1945 field = around[allocsiz]; 1946 subfield = inside[allocsiz]; 1947 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1948 if ((blk & field) == subfield) 1949 return (bno + pos); 1950 field <<= 1; 1951 subfield <<= 1; 1952 } 1953 } 1954 printf("bno = %lld, fs = %s\n", bno, fs->fs_fsmnt); 1955 panic("ffs_alloccg: block not in map"); 1956 return (-1); 1957 } 1958 1959 /* 1960 * Update the cluster map because of an allocation or free. 1961 * 1962 * Cnt == 1 means free; cnt == -1 means allocating. 1963 */ 1964 void 1965 ffs_clusteracct(struct fs *fs, struct cg *cgp, daddr64_t blkno, int cnt) 1966 { 1967 int32_t *sump; 1968 int32_t *lp; 1969 u_char *freemapp, *mapp; 1970 int i, start, end, forw, back, map, bit; 1971 1972 if (fs->fs_contigsumsize <= 0) 1973 return; 1974 freemapp = cg_clustersfree(cgp); 1975 sump = cg_clustersum(cgp); 1976 /* 1977 * Allocate or clear the actual block. 1978 */ 1979 if (cnt > 0) 1980 setbit(freemapp, blkno); 1981 else 1982 clrbit(freemapp, blkno); 1983 /* 1984 * Find the size of the cluster going forward. 1985 */ 1986 start = blkno + 1; 1987 end = start + fs->fs_contigsumsize; 1988 if (end >= cgp->cg_nclusterblks) 1989 end = cgp->cg_nclusterblks; 1990 mapp = &freemapp[start / NBBY]; 1991 map = *mapp++; 1992 bit = 1 << (start % NBBY); 1993 for (i = start; i < end; i++) { 1994 if ((map & bit) == 0) 1995 break; 1996 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1997 bit <<= 1; 1998 } else { 1999 map = *mapp++; 2000 bit = 1; 2001 } 2002 } 2003 forw = i - start; 2004 /* 2005 * Find the size of the cluster going backward. 2006 */ 2007 start = blkno - 1; 2008 end = start - fs->fs_contigsumsize; 2009 if (end < 0) 2010 end = -1; 2011 mapp = &freemapp[start / NBBY]; 2012 map = *mapp--; 2013 bit = 1 << (start % NBBY); 2014 for (i = start; i > end; i--) { 2015 if ((map & bit) == 0) 2016 break; 2017 if ((i & (NBBY - 1)) != 0) { 2018 bit >>= 1; 2019 } else { 2020 map = *mapp--; 2021 bit = 1 << (NBBY - 1); 2022 } 2023 } 2024 back = start - i; 2025 /* 2026 * Account for old cluster and the possibly new forward and 2027 * back clusters. 2028 */ 2029 i = back + forw + 1; 2030 if (i > fs->fs_contigsumsize) 2031 i = fs->fs_contigsumsize; 2032 sump[i] += cnt; 2033 if (back > 0) 2034 sump[back] -= cnt; 2035 if (forw > 0) 2036 sump[forw] -= cnt; 2037 /* 2038 * Update cluster summary information. 2039 */ 2040 lp = &sump[fs->fs_contigsumsize]; 2041 for (i = fs->fs_contigsumsize; i > 0; i--) 2042 if (*lp-- > 0) 2043 break; 2044 fs->fs_maxcluster[cgp->cg_cgx] = i; 2045 } 2046