1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 30 * $FreeBSD: src/sys/ufs/ffs/ffs_alloc.c,v 1.64.2.2 2001/09/21 19:15:21 dillon Exp $ 31 */ 32 33 #include "opt_quota.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/buf.h> 38 #include <sys/conf.h> 39 #include <sys/malloc.h> 40 #include <sys/proc.h> 41 #include <sys/vnode.h> 42 #include <sys/mount.h> 43 #include <sys/kernel.h> 44 #include <sys/sysctl.h> 45 #include <sys/syslog.h> 46 47 #include <sys/taskqueue.h> 48 #include <machine/inttypes.h> 49 50 #include <sys/buf2.h> 51 52 #include "quota.h" 53 #include "inode.h" 54 #include "ufs_extern.h" 55 #include "ufsmount.h" 56 57 #include "fs.h" 58 #include "ffs_extern.h" 59 60 typedef ufs_daddr_t allocfcn_t (struct inode *ip, int cg, ufs_daddr_t bpref, 61 int size); 62 63 static ufs_daddr_t ffs_alloccg (struct inode *, int, ufs_daddr_t, int); 64 static ufs_daddr_t 65 ffs_alloccgblk (struct inode *, struct buf *, ufs_daddr_t); 66 static void ffs_blkfree_cg(struct fs *, struct vnode *, cdev_t , ino_t, 67 uint32_t , ufs_daddr_t, long ); 68 #ifdef DIAGNOSTIC 69 static int ffs_checkblk (struct inode *, ufs_daddr_t, long); 70 #endif 71 static void ffs_clusteracct (struct fs *, struct cg *, ufs_daddr_t, 72 int); 73 static ufs_daddr_t ffs_clusteralloc (struct inode *, int, ufs_daddr_t, 74 int); 75 static ino_t ffs_dirpref (struct inode *); 76 static ufs_daddr_t ffs_fragextend (struct inode *, int, long, int, int); 77 static void ffs_fserr (struct fs *, uint, char *); 78 static u_long ffs_hashalloc 79 (struct inode *, int, long, int, allocfcn_t *); 80 static ino_t ffs_nodealloccg (struct inode *, int, ufs_daddr_t, int); 81 static ufs_daddr_t ffs_mapsearch (struct fs *, struct cg *, ufs_daddr_t, 82 int); 83 84 /* 85 * Allocate a block in the filesystem. 86 * 87 * The size of the requested block is given, which must be some 88 * multiple of fs_fsize and <= fs_bsize. 89 * A preference may be optionally specified. If a preference is given 90 * the following hierarchy is used to allocate a block: 91 * 1) allocate the requested block. 92 * 2) allocate a rotationally optimal block in the same cylinder. 93 * 3) allocate a block in the same cylinder group. 94 * 4) quadradically rehash into other cylinder groups, until an 95 * available block is located. 96 * If no block preference is given the following heirarchy is used 97 * to allocate a block: 98 * 1) allocate a block in the cylinder group that contains the 99 * inode for the file. 100 * 2) quadradically rehash into other cylinder groups, until an 101 * available block is located. 102 */ 103 int 104 ffs_alloc(struct inode *ip, ufs_daddr_t lbn, ufs_daddr_t bpref, int size, 105 struct ucred *cred, ufs_daddr_t *bnp) 106 { 107 struct fs *fs; 108 ufs_daddr_t bno; 109 int cg; 110 #ifdef QUOTA 111 int error; 112 #endif 113 114 *bnp = 0; 115 fs = ip->i_fs; 116 #ifdef DIAGNOSTIC 117 if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0) { 118 kprintf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 119 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 120 fs->fs_fsmnt); 121 panic("ffs_alloc: bad size"); 122 } 123 if (cred == NOCRED) 124 panic("ffs_alloc: missing credential"); 125 #endif /* DIAGNOSTIC */ 126 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 127 goto nospace; 128 if (cred->cr_uid != 0 && 129 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 130 goto nospace; 131 #ifdef QUOTA 132 error = ufs_chkdq(ip, (long)btodb(size), cred, 0); 133 if (error) 134 return (error); 135 #endif 136 if (bpref >= fs->fs_size) 137 bpref = 0; 138 if (bpref == 0) 139 cg = ino_to_cg(fs, ip->i_number); 140 else 141 cg = dtog(fs, bpref); 142 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 143 ffs_alloccg); 144 if (bno > 0) { 145 ip->i_blocks += btodb(size); 146 ip->i_flag |= IN_CHANGE | IN_UPDATE; 147 *bnp = bno; 148 return (0); 149 } 150 #ifdef QUOTA 151 /* 152 * Restore user's disk quota because allocation failed. 153 */ 154 (void) ufs_chkdq(ip, (long)-btodb(size), cred, FORCE); 155 #endif 156 nospace: 157 ffs_fserr(fs, cred->cr_uid, "filesystem full"); 158 uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 159 return (ENOSPC); 160 } 161 162 /* 163 * Reallocate a fragment to a bigger size 164 * 165 * The number and size of the old block is given, and a preference 166 * and new size is also specified. The allocator attempts to extend 167 * the original block. Failing that, the regular block allocator is 168 * invoked to get an appropriate block. 169 */ 170 int 171 ffs_realloccg(struct inode *ip, ufs_daddr_t lbprev, ufs_daddr_t bpref, 172 int osize, int nsize, struct ucred *cred, struct buf **bpp) 173 { 174 struct fs *fs; 175 struct buf *bp; 176 int cg, request, error; 177 ufs_daddr_t bprev, bno; 178 179 *bpp = NULL; 180 fs = ip->i_fs; 181 #ifdef DIAGNOSTIC 182 if ((uint)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 183 (uint)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 184 kprintf( 185 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 186 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 187 nsize, fs->fs_fsmnt); 188 panic("ffs_realloccg: bad size"); 189 } 190 if (cred == NOCRED) 191 panic("ffs_realloccg: missing credential"); 192 #endif /* DIAGNOSTIC */ 193 if (cred->cr_uid != 0 && 194 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) 195 goto nospace; 196 if ((bprev = ip->i_db[lbprev]) == 0) { 197 kprintf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n", 198 devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev, 199 fs->fs_fsmnt); 200 panic("ffs_realloccg: bad bprev"); 201 } 202 /* 203 * Allocate the extra space in the buffer. 204 */ 205 error = bread(ITOV(ip), lblktodoff(fs, lbprev), osize, &bp); 206 if (error) { 207 brelse(bp); 208 return (error); 209 } 210 211 if(bp->b_bio2.bio_offset == NOOFFSET) { 212 if (lbprev >= UFS_NDADDR) 213 panic("ffs_realloccg: lbprev out of range"); 214 bp->b_bio2.bio_offset = fsbtodoff(fs, bprev); 215 } 216 217 #ifdef QUOTA 218 error = ufs_chkdq(ip, (long)btodb(nsize - osize), cred, 0); 219 if (error) { 220 brelse(bp); 221 return (error); 222 } 223 #endif 224 /* 225 * Check for extension in the existing location. 226 */ 227 cg = dtog(fs, bprev); 228 bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 229 if (bno) { 230 if (bp->b_bio2.bio_offset != fsbtodoff(fs, bno)) 231 panic("ffs_realloccg: bad blockno"); 232 ip->i_blocks += btodb(nsize - osize); 233 ip->i_flag |= IN_CHANGE | IN_UPDATE; 234 allocbuf(bp, nsize); 235 bzero((char *)bp->b_data + osize, (uint)nsize - osize); 236 *bpp = bp; 237 return (0); 238 } 239 /* 240 * Allocate a new disk location. 241 */ 242 if (bpref >= fs->fs_size) 243 bpref = 0; 244 switch ((int)fs->fs_optim) { 245 case FS_OPTSPACE: 246 /* 247 * Allocate an exact sized fragment. Although this makes 248 * best use of space, we will waste time relocating it if 249 * the file continues to grow. If the fragmentation is 250 * less than half of the minimum free reserve, we choose 251 * to begin optimizing for time. 252 */ 253 request = nsize; 254 if (fs->fs_minfree <= 5 || 255 fs->fs_cstotal.cs_nffree > 256 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 257 break; 258 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 259 fs->fs_fsmnt); 260 fs->fs_optim = FS_OPTTIME; 261 break; 262 case FS_OPTTIME: 263 /* 264 * At this point we have discovered a file that is trying to 265 * grow a small fragment to a larger fragment. To save time, 266 * we allocate a full sized block, then free the unused portion. 267 * If the file continues to grow, the `ffs_fragextend' call 268 * above will be able to grow it in place without further 269 * copying. If aberrant programs cause disk fragmentation to 270 * grow within 2% of the free reserve, we choose to begin 271 * optimizing for space. 272 */ 273 request = fs->fs_bsize; 274 if (fs->fs_cstotal.cs_nffree < 275 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 276 break; 277 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 278 fs->fs_fsmnt); 279 fs->fs_optim = FS_OPTSPACE; 280 break; 281 default: 282 kprintf("dev = %s, optim = %ld, fs = %s\n", 283 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 284 panic("ffs_realloccg: bad optim"); 285 /* NOTREACHED */ 286 } 287 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 288 ffs_alloccg); 289 if (bno > 0) { 290 bp->b_bio2.bio_offset = fsbtodoff(fs, bno); 291 if (!DOINGSOFTDEP(ITOV(ip))) 292 ffs_blkfree(ip, bprev, (long)osize); 293 if (nsize < request) 294 ffs_blkfree(ip, bno + numfrags(fs, nsize), 295 (long)(request - nsize)); 296 ip->i_blocks += btodb(nsize - osize); 297 ip->i_flag |= IN_CHANGE | IN_UPDATE; 298 allocbuf(bp, nsize); 299 bzero((char *)bp->b_data + osize, (uint)nsize - osize); 300 *bpp = bp; 301 return (0); 302 } 303 #ifdef QUOTA 304 /* 305 * Restore user's disk quota because allocation failed. 306 */ 307 (void) ufs_chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 308 #endif 309 brelse(bp); 310 nospace: 311 /* 312 * no space available 313 */ 314 ffs_fserr(fs, cred->cr_uid, "filesystem full"); 315 uprintf("\n%s: write failed, filesystem is full\n", fs->fs_fsmnt); 316 return (ENOSPC); 317 } 318 319 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 320 321 /* 322 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 323 * 324 * The vnode and an array of buffer pointers for a range of sequential 325 * logical blocks to be made contiguous is given. The allocator attempts 326 * to find a range of sequential blocks starting as close as possible to 327 * an fs_rotdelay offset from the end of the allocation for the logical 328 * block immediately preceeding the current range. If successful, the 329 * physical block numbers in the buffer pointers and in the inode are 330 * changed to reflect the new allocation. If unsuccessful, the allocation 331 * is left unchanged. The success in doing the reallocation is returned. 332 * Note that the error return is not reflected back to the user. Rather 333 * the previous block allocation will be used. 334 */ 335 static int doasyncfree = 1; 336 SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 337 338 static int doreallocblks = 1; 339 SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 340 341 #ifdef DEBUG 342 static volatile int prtrealloc = 0; 343 #endif 344 345 /* 346 * ffs_reallocblks(struct vnode *a_vp, struct cluster_save *a_buflist) 347 */ 348 int 349 ffs_reallocblks(struct vop_reallocblks_args *ap) 350 { 351 struct fs *fs; 352 struct inode *ip; 353 struct vnode *vp; 354 struct buf *sbp, *ebp; 355 ufs_daddr_t *bap, *sbap, *ebap = NULL; 356 struct cluster_save *buflist; 357 ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; 358 #ifdef DIAGNOSTIC 359 off_t boffset; 360 #endif 361 struct indir start_ap[UFS_NIADDR + 1], end_ap[UFS_NIADDR + 1], *idp; 362 int i, len, slen, start_lvl, end_lvl, pref, ssize; 363 364 if (doreallocblks == 0) 365 return (ENOSPC); 366 vp = ap->a_vp; 367 ip = VTOI(vp); 368 fs = ip->i_fs; 369 if (fs->fs_contigsumsize <= 0) 370 return (ENOSPC); 371 buflist = ap->a_buflist; 372 len = buflist->bs_nchildren; 373 start_lbn = lblkno(fs, buflist->bs_children[0]->b_loffset); 374 end_lbn = start_lbn + len - 1; 375 #ifdef DIAGNOSTIC 376 for (i = 0; i < len; i++) 377 if (!ffs_checkblk(ip, 378 dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->fs_bsize)) 379 panic("ffs_reallocblks: unallocated block 1"); 380 for (i = 1; i < len; i++) { 381 if (buflist->bs_children[i]->b_loffset != lblktodoff(fs, start_lbn) + lblktodoff(fs, i)) 382 panic("ffs_reallocblks: non-logical cluster"); 383 } 384 boffset = buflist->bs_children[0]->b_bio2.bio_offset; 385 ssize = (int)fsbtodoff(fs, fs->fs_frag); 386 for (i = 1; i < len - 1; i++) 387 if (buflist->bs_children[i]->b_bio2.bio_offset != boffset + (i * ssize)) 388 panic("ffs_reallocblks: non-physical cluster %d", i); 389 #endif 390 /* 391 * If the latest allocation is in a new cylinder group, assume that 392 * the filesystem has decided to move and do not force it back to 393 * the previous cylinder group. 394 */ 395 if (dtog(fs, dofftofsb(fs, buflist->bs_children[0]->b_bio2.bio_offset)) != 396 dtog(fs, dofftofsb(fs, buflist->bs_children[len - 1]->b_bio2.bio_offset))) 397 return (ENOSPC); 398 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 399 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 400 return (ENOSPC); 401 /* 402 * Get the starting offset and block map for the first block and 403 * the number of blocks that will fit into sbap starting at soff. 404 */ 405 if (start_lvl == 0) { 406 sbap = &ip->i_db[0]; 407 soff = start_lbn; 408 slen = UFS_NDADDR - soff; 409 } else { 410 idp = &start_ap[start_lvl - 1]; 411 if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->fs_bsize, &sbp)) { 412 brelse(sbp); 413 return (ENOSPC); 414 } 415 sbap = (ufs_daddr_t *)sbp->b_data; 416 soff = idp->in_off; 417 slen = fs->fs_nindir - soff; 418 } 419 /* 420 * Find the preferred location for the cluster. 421 */ 422 pref = ffs_blkpref(ip, start_lbn, soff, sbap); 423 424 /* 425 * If the block range spans two block maps, get the second map. 426 */ 427 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 428 ssize = len; 429 } else { 430 #ifdef DIAGNOSTIC 431 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 432 panic("ffs_reallocblk: start == end"); 433 #endif 434 ssize = len - (idp->in_off + 1); 435 if (bread(vp, lblktodoff(fs, idp->in_lbn), (int)fs->fs_bsize, &ebp)) 436 goto fail; 437 ebap = (ufs_daddr_t *)ebp->b_data; 438 } 439 440 /* 441 * Make sure we aren't spanning more then two blockmaps. ssize is 442 * our calculation of the span we have to scan in the first blockmap, 443 * while slen is our calculation of the number of entries available 444 * in the first blockmap (from soff). 445 */ 446 if (ssize > slen) { 447 panic("ffs_reallocblks: range spans more than two blockmaps!" 448 " start_lbn %ld len %d (%d/%d)", 449 (long)start_lbn, len, slen, ssize); 450 } 451 /* 452 * Search the block map looking for an allocation of the desired size. 453 */ 454 if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 455 len, ffs_clusteralloc)) == 0) 456 goto fail; 457 /* 458 * We have found a new contiguous block. 459 * 460 * First we have to replace the old block pointers with the new 461 * block pointers in the inode and indirect blocks associated 462 * with the file. 463 */ 464 #ifdef DEBUG 465 if (prtrealloc) 466 kprintf("realloc: ino %ju, lbns %d-%d\n\told:", 467 (uintmax_t)ip->i_number, start_lbn, end_lbn); 468 #endif 469 blkno = newblk; 470 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 471 if (i == ssize) { 472 bap = ebap; 473 soff = -i; 474 } 475 #ifdef DIAGNOSTIC 476 if (!ffs_checkblk(ip, 477 dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->fs_bsize)) 478 panic("ffs_reallocblks: unallocated block 2"); 479 if (dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset) != *bap) 480 panic("ffs_reallocblks: alloc mismatch"); 481 #endif 482 #ifdef DEBUG 483 if (prtrealloc) 484 kprintf(" %d,", *bap); 485 #endif 486 if (DOINGSOFTDEP(vp)) { 487 if (sbap == &ip->i_db[0] && i < ssize) 488 softdep_setup_allocdirect(ip, start_lbn + i, 489 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 490 buflist->bs_children[i]); 491 else 492 softdep_setup_allocindir_page(ip, start_lbn + i, 493 i < ssize ? sbp : ebp, soff + i, blkno, 494 *bap, buflist->bs_children[i]); 495 } 496 *bap++ = blkno; 497 } 498 /* 499 * Next we must write out the modified inode and indirect blocks. 500 * For strict correctness, the writes should be synchronous since 501 * the old block values may have been written to disk. In practise 502 * they are almost never written, but if we are concerned about 503 * strict correctness, the `doasyncfree' flag should be set to zero. 504 * 505 * The test on `doasyncfree' should be changed to test a flag 506 * that shows whether the associated buffers and inodes have 507 * been written. The flag should be set when the cluster is 508 * started and cleared whenever the buffer or inode is flushed. 509 * We can then check below to see if it is set, and do the 510 * synchronous write only when it has been cleared. 511 */ 512 if (sbap != &ip->i_db[0]) { 513 if (doasyncfree) 514 bdwrite(sbp); 515 else 516 bwrite(sbp); 517 } else { 518 ip->i_flag |= IN_CHANGE | IN_UPDATE; 519 if (!doasyncfree) 520 ffs_update(vp, 1); 521 } 522 if (ssize < len) { 523 if (doasyncfree) 524 bdwrite(ebp); 525 else 526 bwrite(ebp); 527 } 528 /* 529 * Last, free the old blocks and assign the new blocks to the buffers. 530 */ 531 #ifdef DEBUG 532 if (prtrealloc) 533 kprintf("\n\tnew:"); 534 #endif 535 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 536 if (!DOINGSOFTDEP(vp) && 537 buflist->bs_children[i]->b_bio2.bio_offset != NOOFFSET) { 538 ffs_blkfree(ip, 539 dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), 540 fs->fs_bsize); 541 } 542 buflist->bs_children[i]->b_bio2.bio_offset = fsbtodoff(fs, blkno); 543 #ifdef DIAGNOSTIC 544 if (!ffs_checkblk(ip, 545 dofftofsb(fs, buflist->bs_children[i]->b_bio2.bio_offset), fs->fs_bsize)) 546 panic("ffs_reallocblks: unallocated block 3"); 547 #endif 548 #ifdef DEBUG 549 if (prtrealloc) 550 kprintf(" %d,", blkno); 551 #endif 552 } 553 #ifdef DEBUG 554 if (prtrealloc) { 555 prtrealloc--; 556 kprintf("\n"); 557 } 558 #endif 559 return (0); 560 561 fail: 562 if (ssize < len) 563 brelse(ebp); 564 if (sbap != &ip->i_db[0]) 565 brelse(sbp); 566 return (ENOSPC); 567 } 568 569 /* 570 * Allocate an inode in the filesystem. 571 * 572 * If allocating a directory, use ffs_dirpref to select the inode. 573 * If allocating in a directory, the following hierarchy is followed: 574 * 1) allocate the preferred inode. 575 * 2) allocate an inode in the same cylinder group. 576 * 3) quadradically rehash into other cylinder groups, until an 577 * available inode is located. 578 * If no inode preference is given the following heirarchy is used 579 * to allocate an inode: 580 * 1) allocate an inode in cylinder group 0. 581 * 2) quadradically rehash into other cylinder groups, until an 582 * available inode is located. 583 */ 584 int 585 ffs_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) 586 { 587 struct inode *pip; 588 struct fs *fs; 589 struct inode *ip; 590 ino_t ino, ipref; 591 int cg, error; 592 593 *vpp = NULL; 594 pip = VTOI(pvp); 595 fs = pip->i_fs; 596 if (fs->fs_cstotal.cs_nifree == 0) 597 goto noinodes; 598 599 if ((mode & IFMT) == IFDIR) 600 ipref = ffs_dirpref(pip); 601 else 602 ipref = pip->i_number; 603 if (ipref >= fs->fs_ncg * fs->fs_ipg) 604 ipref = 0; 605 cg = ino_to_cg(fs, ipref); 606 /* 607 * Track number of dirs created one after another 608 * in a same cg without intervening by files. 609 */ 610 if ((mode & IFMT) == IFDIR) { 611 if (fs->fs_contigdirs[cg] < 255) 612 fs->fs_contigdirs[cg]++; 613 } else { 614 if (fs->fs_contigdirs[cg] > 0) 615 fs->fs_contigdirs[cg]--; 616 } 617 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, 618 (allocfcn_t *)ffs_nodealloccg); 619 if (ino == 0) 620 goto noinodes; 621 error = VFS_VGET(pvp->v_mount, NULL, ino, vpp); 622 if (error) { 623 ffs_vfree(pvp, ino, mode); 624 return (error); 625 } 626 ip = VTOI(*vpp); 627 if (ip->i_mode) { 628 kprintf("mode = 0%o, inum = %lu, fs = %s\n", 629 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 630 panic("ffs_valloc: dup alloc"); 631 } 632 if (ip->i_blocks) { /* XXX */ 633 kprintf("free inode %s/%lu had %ld blocks\n", 634 fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); 635 ip->i_blocks = 0; 636 } 637 ip->i_flags = 0; 638 /* 639 * Set up a new generation number for this inode. 640 */ 641 if (ip->i_gen == 0 || ++ip->i_gen == 0) 642 ip->i_gen = krandom() / 2 + 1; 643 return (0); 644 noinodes: 645 ffs_fserr(fs, cred->cr_uid, "out of inodes"); 646 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 647 return (ENOSPC); 648 } 649 650 /* 651 * Find a cylinder group to place a directory. 652 * 653 * The policy implemented by this algorithm is to allocate a 654 * directory inode in the same cylinder group as its parent 655 * directory, but also to reserve space for its files inodes 656 * and data. Restrict the number of directories which may be 657 * allocated one after another in the same cylinder group 658 * without intervening allocation of files. 659 * 660 * If we allocate a first level directory then force allocation 661 * in another cylinder group. 662 */ 663 static ino_t 664 ffs_dirpref(struct inode *pip) 665 { 666 struct fs *fs; 667 int cg, prefcg, dirsize, cgsize; 668 int64_t dirsize64; 669 int avgifree, avgbfree, avgndir, curdirsize; 670 int minifree, minbfree, maxndir; 671 int mincg, minndir; 672 int maxcontigdirs; 673 674 fs = pip->i_fs; 675 676 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 677 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 678 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 679 680 /* 681 * Force allocation in another cg if creating a first level dir. 682 */ 683 if (ITOV(pip)->v_flag & VROOT) { 684 prefcg = karc4random() % fs->fs_ncg; 685 mincg = prefcg; 686 minndir = fs->fs_ipg; 687 for (cg = prefcg; cg < fs->fs_ncg; cg++) 688 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 689 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 690 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 691 mincg = cg; 692 minndir = fs->fs_cs(fs, cg).cs_ndir; 693 } 694 for (cg = 0; cg < prefcg; cg++) 695 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 696 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 697 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 698 mincg = cg; 699 minndir = fs->fs_cs(fs, cg).cs_ndir; 700 } 701 return ((ino_t)(fs->fs_ipg * mincg)); 702 } 703 704 /* 705 * Count various limits which used for 706 * optimal allocation of a directory inode. 707 */ 708 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 709 minifree = avgifree - avgifree / 4; 710 if (minifree < 1) 711 minifree = 1; 712 minbfree = avgbfree - avgbfree / 4; 713 if (minbfree < 1) 714 minbfree = 1; 715 cgsize = fs->fs_fsize * fs->fs_fpg; 716 717 /* 718 * fs_avgfilesize and fs_avgfpdir are user-settable entities and 719 * multiplying them may overflow a 32 bit integer. 720 */ 721 dirsize64 = fs->fs_avgfilesize * (int64_t)fs->fs_avgfpdir; 722 if (dirsize64 > 0x7fffffff) { 723 maxcontigdirs = 1; 724 } else { 725 dirsize = (int)dirsize64; 726 curdirsize = avgndir ? 727 (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 728 if (dirsize < curdirsize) 729 dirsize = curdirsize; 730 maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255); 731 if (fs->fs_avgfpdir > 0) 732 maxcontigdirs = min(maxcontigdirs, 733 fs->fs_ipg / fs->fs_avgfpdir); 734 if (maxcontigdirs == 0) 735 maxcontigdirs = 1; 736 } 737 738 /* 739 * Limit number of dirs in one cg and reserve space for 740 * regular files, but only if we have no deficit in 741 * inodes or space. 742 */ 743 prefcg = ino_to_cg(fs, pip->i_number); 744 for (cg = prefcg; cg < fs->fs_ncg; cg++) 745 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 746 fs->fs_cs(fs, cg).cs_nifree >= minifree && 747 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 748 if (fs->fs_contigdirs[cg] < maxcontigdirs) 749 return ((ino_t)(fs->fs_ipg * cg)); 750 } 751 for (cg = 0; cg < prefcg; cg++) 752 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 753 fs->fs_cs(fs, cg).cs_nifree >= minifree && 754 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 755 if (fs->fs_contigdirs[cg] < maxcontigdirs) 756 return ((ino_t)(fs->fs_ipg * cg)); 757 } 758 /* 759 * This is a backstop when we have deficit in space. 760 */ 761 for (cg = prefcg; cg < fs->fs_ncg; cg++) 762 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 763 return ((ino_t)(fs->fs_ipg * cg)); 764 for (cg = 0; cg < prefcg; cg++) 765 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 766 break; 767 return ((ino_t)(fs->fs_ipg * cg)); 768 } 769 770 /* 771 * Select the desired position for the next block in a file. The file is 772 * logically divided into sections. The first section is composed of the 773 * direct blocks. Each additional section contains fs_maxbpg blocks. 774 * 775 * If no blocks have been allocated in the first section, the policy is to 776 * request a block in the same cylinder group as the inode that describes 777 * the file. If no blocks have been allocated in any other section, the 778 * policy is to place the section in a cylinder group with a greater than 779 * average number of free blocks. An appropriate cylinder group is found 780 * by using a rotor that sweeps the cylinder groups. When a new group of 781 * blocks is needed, the sweep begins in the cylinder group following the 782 * cylinder group from which the previous allocation was made. The sweep 783 * continues until a cylinder group with greater than the average number 784 * of free blocks is found. If the allocation is for the first block in an 785 * indirect block, the information on the previous allocation is unavailable; 786 * here a best guess is made based upon the logical block number being 787 * allocated. 788 * 789 * If a section is already partially allocated, the policy is to 790 * contiguously allocate fs_maxcontig blocks. The end of one of these 791 * contiguous blocks and the beginning of the next is physically separated 792 * so that the disk head will be in transit between them for at least 793 * fs_rotdelay milliseconds. This is to allow time for the processor to 794 * schedule another I/O transfer. 795 */ 796 ufs_daddr_t 797 ffs_blkpref(struct inode *ip, ufs_daddr_t lbn, int indx, ufs_daddr_t *bap) 798 { 799 struct fs *fs; 800 int cg; 801 int avgbfree, startcg; 802 ufs_daddr_t nextblk; 803 804 fs = ip->i_fs; 805 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 806 if (lbn < UFS_NDADDR + NINDIR(fs)) { 807 cg = ino_to_cg(fs, ip->i_number); 808 return (fs->fs_fpg * cg + fs->fs_frag); 809 } 810 /* 811 * Find a cylinder with greater than average number of 812 * unused data blocks. 813 */ 814 if (indx == 0 || bap[indx - 1] == 0) 815 startcg = 816 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 817 else 818 startcg = dtog(fs, bap[indx - 1]) + 1; 819 startcg %= fs->fs_ncg; 820 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 821 for (cg = startcg; cg < fs->fs_ncg; cg++) 822 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 823 fs->fs_cgrotor = cg; 824 return (fs->fs_fpg * cg + fs->fs_frag); 825 } 826 for (cg = 0; cg <= startcg; cg++) 827 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 828 fs->fs_cgrotor = cg; 829 return (fs->fs_fpg * cg + fs->fs_frag); 830 } 831 return (0); 832 } 833 /* 834 * One or more previous blocks have been laid out. If less 835 * than fs_maxcontig previous blocks are contiguous, the 836 * next block is requested contiguously, otherwise it is 837 * requested rotationally delayed by fs_rotdelay milliseconds. 838 */ 839 nextblk = bap[indx - 1] + fs->fs_frag; 840 if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || 841 bap[indx - fs->fs_maxcontig] + 842 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 843 return (nextblk); 844 /* 845 * Here we convert ms of delay to frags as: 846 * (frags) = (ms) * (rev/sec) * (sect/rev) / 847 * ((sect/frag) * (ms/sec)) 848 * then round up to the next block. 849 */ 850 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 851 (NSPF(fs) * 1000), fs->fs_frag); 852 return (nextblk); 853 } 854 855 /* 856 * Implement the cylinder overflow algorithm. 857 * 858 * The policy implemented by this algorithm is: 859 * 1) allocate the block in its requested cylinder group. 860 * 2) quadradically rehash on the cylinder group number. 861 * 3) brute force search for a free block. 862 */ 863 /*VARARGS5*/ 864 static u_long 865 ffs_hashalloc(struct inode *ip, int cg, long pref, 866 int size, /* size for data blocks, mode for inodes */ 867 allocfcn_t *allocator) 868 { 869 struct fs *fs; 870 long result; /* XXX why not same type as we return? */ 871 int i, icg = cg; 872 873 fs = ip->i_fs; 874 /* 875 * 1: preferred cylinder group 876 */ 877 result = (*allocator)(ip, cg, pref, size); 878 if (result) 879 return (result); 880 /* 881 * 2: quadratic rehash 882 */ 883 for (i = 1; i < fs->fs_ncg; i *= 2) { 884 cg += i; 885 if (cg >= fs->fs_ncg) 886 cg -= fs->fs_ncg; 887 result = (*allocator)(ip, cg, 0, size); 888 if (result) 889 return (result); 890 } 891 /* 892 * 3: brute force search 893 * Note that we start at i == 2, since 0 was checked initially, 894 * and 1 is always checked in the quadratic rehash. 895 */ 896 cg = (icg + 2) % fs->fs_ncg; 897 for (i = 2; i < fs->fs_ncg; i++) { 898 result = (*allocator)(ip, cg, 0, size); 899 if (result) 900 return (result); 901 cg++; 902 if (cg == fs->fs_ncg) 903 cg = 0; 904 } 905 return (0); 906 } 907 908 /* 909 * Determine whether a fragment can be extended. 910 * 911 * Check to see if the necessary fragments are available, and 912 * if they are, allocate them. 913 */ 914 static ufs_daddr_t 915 ffs_fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize) 916 { 917 struct fs *fs; 918 struct cg *cgp; 919 struct buf *bp; 920 long bno; 921 int frags, bbase; 922 int i, error; 923 uint8_t *blksfree; 924 925 fs = ip->i_fs; 926 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 927 return (0); 928 frags = numfrags(fs, nsize); 929 bbase = fragnum(fs, bprev); 930 if (bbase > fragnum(fs, (bprev + frags - 1))) { 931 /* cannot extend across a block boundary */ 932 return (0); 933 } 934 KKASSERT(blknum(fs, bprev) == blknum(fs, bprev + frags - 1)); 935 error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), 936 (int)fs->fs_cgsize, &bp); 937 if (error) { 938 brelse(bp); 939 return (0); 940 } 941 cgp = (struct cg *)bp->b_data; 942 if (!cg_chkmagic(cgp)) { 943 brelse(bp); 944 return (0); 945 } 946 cgp->cg_time = time_second; 947 bno = dtogd(fs, bprev); 948 blksfree = cg_blksfree(cgp); 949 for (i = numfrags(fs, osize); i < frags; i++) { 950 if (isclr(blksfree, bno + i)) { 951 brelse(bp); 952 return (0); 953 } 954 } 955 956 /* 957 * the current fragment can be extended 958 * deduct the count on fragment being extended into 959 * increase the count on the remaining fragment (if any) 960 * allocate the extended piece 961 * 962 * ---oooooooooonnnnnnn111---- 963 * [-----frags-----] 964 * ^ ^ 965 * bbase fs_frag 966 */ 967 for (i = frags; i < fs->fs_frag - bbase; i++) { 968 if (isclr(blksfree, bno + i)) 969 break; 970 } 971 972 /* 973 * Size of original free frag is [i - numfrags(fs, osize)] 974 * Size of remaining free frag is [i - frags] 975 */ 976 cgp->cg_frsum[i - numfrags(fs, osize)]--; 977 if (i != frags) 978 cgp->cg_frsum[i - frags]++; 979 for (i = numfrags(fs, osize); i < frags; i++) { 980 clrbit(blksfree, bno + i); 981 cgp->cg_cs.cs_nffree--; 982 fs->fs_cstotal.cs_nffree--; 983 fs->fs_cs(fs, cg).cs_nffree--; 984 } 985 fs->fs_fmod = 1; 986 if (DOINGSOFTDEP(ITOV(ip))) 987 softdep_setup_blkmapdep(bp, fs, bprev); 988 bdwrite(bp); 989 return (bprev); 990 } 991 992 /* 993 * Determine whether a block can be allocated. 994 * 995 * Check to see if a block of the appropriate size is available, 996 * and if it is, allocate it. 997 */ 998 static ufs_daddr_t 999 ffs_alloccg(struct inode *ip, int cg, ufs_daddr_t bpref, int size) 1000 { 1001 struct fs *fs; 1002 struct cg *cgp; 1003 struct buf *bp; 1004 int i; 1005 ufs_daddr_t bno, blkno; 1006 int allocsiz, error, frags; 1007 uint8_t *blksfree; 1008 1009 fs = ip->i_fs; 1010 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1011 return (0); 1012 error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), 1013 (int)fs->fs_cgsize, &bp); 1014 if (error) { 1015 brelse(bp); 1016 return (0); 1017 } 1018 cgp = (struct cg *)bp->b_data; 1019 if (!cg_chkmagic(cgp) || 1020 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 1021 brelse(bp); 1022 return (0); 1023 } 1024 cgp->cg_time = time_second; 1025 if (size == fs->fs_bsize) { 1026 bno = ffs_alloccgblk(ip, bp, bpref); 1027 bdwrite(bp); 1028 return (bno); 1029 } 1030 /* 1031 * Check to see if any fragments of sufficient size are already 1032 * available. Fit the data into a larger fragment if necessary, 1033 * before allocating a whole new block. 1034 */ 1035 blksfree = cg_blksfree(cgp); 1036 frags = numfrags(fs, size); 1037 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) { 1038 if (cgp->cg_frsum[allocsiz] != 0) 1039 break; 1040 } 1041 if (allocsiz == fs->fs_frag) { 1042 /* 1043 * No fragments were available, allocate a whole block and 1044 * cut the requested fragment (of size frags) out of it. 1045 */ 1046 if (cgp->cg_cs.cs_nbfree == 0) { 1047 brelse(bp); 1048 return (0); 1049 } 1050 bno = ffs_alloccgblk(ip, bp, bpref); 1051 bpref = dtogd(fs, bno); 1052 for (i = frags; i < fs->fs_frag; i++) 1053 setbit(blksfree, bpref + i); 1054 1055 /* 1056 * Calculate the number of free frags still remaining after 1057 * we have cut out the requested allocation. Indicate that 1058 * a fragment of that size is now available for future 1059 * allocation. 1060 */ 1061 i = fs->fs_frag - frags; 1062 cgp->cg_cs.cs_nffree += i; 1063 fs->fs_cstotal.cs_nffree += i; 1064 fs->fs_cs(fs, cg).cs_nffree += i; 1065 fs->fs_fmod = 1; 1066 cgp->cg_frsum[i]++; 1067 bdwrite(bp); 1068 return (bno); 1069 } 1070 1071 /* 1072 * cg_frsum[] has told us that a free fragment of allocsiz size is 1073 * available. Find it, then clear the bitmap bits associated with 1074 * the size we want. 1075 */ 1076 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1077 if (bno < 0) { 1078 brelse(bp); 1079 return (0); 1080 } 1081 for (i = 0; i < frags; i++) 1082 clrbit(blksfree, bno + i); 1083 cgp->cg_cs.cs_nffree -= frags; 1084 fs->fs_cstotal.cs_nffree -= frags; 1085 fs->fs_cs(fs, cg).cs_nffree -= frags; 1086 fs->fs_fmod = 1; 1087 1088 /* 1089 * Account for the allocation. The original searched size that we 1090 * found is no longer available. If we cut out a smaller piece then 1091 * a smaller fragment is now available. 1092 */ 1093 cgp->cg_frsum[allocsiz]--; 1094 if (frags != allocsiz) 1095 cgp->cg_frsum[allocsiz - frags]++; 1096 blkno = cg * fs->fs_fpg + bno; 1097 if (DOINGSOFTDEP(ITOV(ip))) 1098 softdep_setup_blkmapdep(bp, fs, blkno); 1099 bdwrite(bp); 1100 return ((u_long)blkno); 1101 } 1102 1103 /* 1104 * Allocate a block in a cylinder group. 1105 * 1106 * This algorithm implements the following policy: 1107 * 1) allocate the requested block. 1108 * 2) allocate a rotationally optimal block in the same cylinder. 1109 * 3) allocate the next available block on the block rotor for the 1110 * specified cylinder group. 1111 * Note that this routine only allocates fs_bsize blocks; these 1112 * blocks may be fragmented by the routine that allocates them. 1113 */ 1114 static ufs_daddr_t 1115 ffs_alloccgblk(struct inode *ip, struct buf *bp, ufs_daddr_t bpref) 1116 { 1117 struct fs *fs; 1118 struct cg *cgp; 1119 ufs_daddr_t bno, blkno; 1120 int cylno, pos, delta; 1121 short *cylbp; 1122 int i; 1123 uint8_t *blksfree; 1124 1125 fs = ip->i_fs; 1126 cgp = (struct cg *)bp->b_data; 1127 blksfree = cg_blksfree(cgp); 1128 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1129 bpref = cgp->cg_rotor; 1130 goto norot; 1131 } 1132 bpref = blknum(fs, bpref); 1133 bpref = dtogd(fs, bpref); 1134 /* 1135 * if the requested block is available, use it 1136 */ 1137 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) { 1138 bno = bpref; 1139 goto gotit; 1140 } 1141 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 1142 /* 1143 * Block layout information is not available. 1144 * Leaving bpref unchanged means we take the 1145 * next available free block following the one 1146 * we just allocated. Hopefully this will at 1147 * least hit a track cache on drives of unknown 1148 * geometry (e.g. SCSI). 1149 */ 1150 goto norot; 1151 } 1152 /* 1153 * check for a block available on the same cylinder 1154 */ 1155 cylno = cbtocylno(fs, bpref); 1156 if (cg_blktot(cgp)[cylno] == 0) 1157 goto norot; 1158 /* 1159 * check the summary information to see if a block is 1160 * available in the requested cylinder starting at the 1161 * requested rotational position and proceeding around. 1162 */ 1163 cylbp = cg_blks(fs, cgp, cylno); 1164 pos = cbtorpos(fs, bpref); 1165 for (i = pos; i < fs->fs_nrpos; i++) 1166 if (cylbp[i] > 0) 1167 break; 1168 if (i == fs->fs_nrpos) 1169 for (i = 0; i < pos; i++) 1170 if (cylbp[i] > 0) 1171 break; 1172 if (cylbp[i] > 0) { 1173 /* 1174 * found a rotational position, now find the actual 1175 * block. A panic if none is actually there. 1176 */ 1177 pos = cylno % fs->fs_cpc; 1178 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 1179 if (fs_postbl(fs, pos)[i] == -1) { 1180 kprintf("pos = %d, i = %d, fs = %s\n", 1181 pos, i, fs->fs_fsmnt); 1182 panic("ffs_alloccgblk: cyl groups corrupted"); 1183 } 1184 for (i = fs_postbl(fs, pos)[i];; ) { 1185 if (ffs_isblock(fs, blksfree, bno + i)) { 1186 bno = blkstofrags(fs, (bno + i)); 1187 goto gotit; 1188 } 1189 delta = fs_rotbl(fs)[i]; 1190 if (delta <= 0 || 1191 delta + i > fragstoblks(fs, fs->fs_fpg)) 1192 break; 1193 i += delta; 1194 } 1195 kprintf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 1196 panic("ffs_alloccgblk: can't find blk in cyl"); 1197 } 1198 norot: 1199 /* 1200 * no blocks in the requested cylinder, so take next 1201 * available one in this cylinder group. 1202 */ 1203 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1204 if (bno < 0) 1205 return (0); 1206 cgp->cg_rotor = bno; 1207 gotit: 1208 blkno = fragstoblks(fs, bno); 1209 ffs_clrblock(fs, blksfree, (long)blkno); 1210 ffs_clusteracct(fs, cgp, blkno, -1); 1211 cgp->cg_cs.cs_nbfree--; 1212 fs->fs_cstotal.cs_nbfree--; 1213 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1214 cylno = cbtocylno(fs, bno); 1215 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1216 cg_blktot(cgp)[cylno]--; 1217 fs->fs_fmod = 1; 1218 blkno = cgp->cg_cgx * fs->fs_fpg + bno; 1219 if (DOINGSOFTDEP(ITOV(ip))) 1220 softdep_setup_blkmapdep(bp, fs, blkno); 1221 return (blkno); 1222 } 1223 1224 /* 1225 * Determine whether a cluster can be allocated. 1226 * 1227 * We do not currently check for optimal rotational layout if there 1228 * are multiple choices in the same cylinder group. Instead we just 1229 * take the first one that we find following bpref. 1230 */ 1231 static ufs_daddr_t 1232 ffs_clusteralloc(struct inode *ip, int cg, ufs_daddr_t bpref, int len) 1233 { 1234 struct fs *fs; 1235 struct cg *cgp; 1236 struct buf *bp; 1237 int i, got, run, bno, bit, map; 1238 u_char *mapp; 1239 int32_t *lp; 1240 uint8_t *blksfree; 1241 1242 fs = ip->i_fs; 1243 if (fs->fs_maxcluster[cg] < len) 1244 return (0); 1245 if (bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), 1246 (int)fs->fs_cgsize, &bp)) { 1247 goto fail; 1248 } 1249 cgp = (struct cg *)bp->b_data; 1250 if (!cg_chkmagic(cgp)) 1251 goto fail; 1252 1253 /* 1254 * Check to see if a cluster of the needed size (or bigger) is 1255 * available in this cylinder group. 1256 */ 1257 lp = &cg_clustersum(cgp)[len]; 1258 for (i = len; i <= fs->fs_contigsumsize; i++) 1259 if (*lp++ > 0) 1260 break; 1261 if (i > fs->fs_contigsumsize) { 1262 /* 1263 * This is the first time looking for a cluster in this 1264 * cylinder group. Update the cluster summary information 1265 * to reflect the true maximum sized cluster so that 1266 * future cluster allocation requests can avoid reading 1267 * the cylinder group map only to find no clusters. 1268 */ 1269 lp = &cg_clustersum(cgp)[len - 1]; 1270 for (i = len - 1; i > 0; i--) 1271 if (*lp-- > 0) 1272 break; 1273 fs->fs_maxcluster[cg] = i; 1274 goto fail; 1275 } 1276 /* 1277 * Search the cluster map to find a big enough cluster. 1278 * We take the first one that we find, even if it is larger 1279 * than we need as we prefer to get one close to the previous 1280 * block allocation. We do not search before the current 1281 * preference point as we do not want to allocate a block 1282 * that is allocated before the previous one (as we will 1283 * then have to wait for another pass of the elevator 1284 * algorithm before it will be read). We prefer to fail and 1285 * be recalled to try an allocation in the next cylinder group. 1286 */ 1287 if (dtog(fs, bpref) != cg) 1288 bpref = 0; 1289 else 1290 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1291 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1292 map = *mapp++; 1293 bit = 1 << (bpref % NBBY); 1294 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1295 if ((map & bit) == 0) { 1296 run = 0; 1297 } else { 1298 run++; 1299 if (run == len) 1300 break; 1301 } 1302 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1303 bit <<= 1; 1304 } else { 1305 map = *mapp++; 1306 bit = 1; 1307 } 1308 } 1309 if (got >= cgp->cg_nclusterblks) 1310 goto fail; 1311 /* 1312 * Allocate the cluster that we have found. 1313 */ 1314 blksfree = cg_blksfree(cgp); 1315 for (i = 1; i <= len; i++) { 1316 if (!ffs_isblock(fs, blksfree, got - run + i)) 1317 panic("ffs_clusteralloc: map mismatch"); 1318 } 1319 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 1320 if (dtog(fs, bno) != cg) 1321 panic("ffs_clusteralloc: allocated out of group"); 1322 len = blkstofrags(fs, len); 1323 for (i = 0; i < len; i += fs->fs_frag) { 1324 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) 1325 panic("ffs_clusteralloc: lost block"); 1326 } 1327 bdwrite(bp); 1328 return (bno); 1329 1330 fail: 1331 brelse(bp); 1332 return (0); 1333 } 1334 1335 /* 1336 * Determine whether an inode can be allocated. 1337 * 1338 * Check to see if an inode is available, and if it is, 1339 * allocate it using the following policy: 1340 * 1) allocate the requested inode. 1341 * 2) allocate the next available inode after the requested 1342 * inode in the specified cylinder group. 1343 * 3) the inode must not already be in the inode hash table. We 1344 * can encounter such a case because the vnode reclamation sequence 1345 * frees the bit 1346 * 3) the inode must not already be in the inode hash, otherwise it 1347 * may be in the process of being deallocated. This can occur 1348 * because the bitmap is updated before the inode is removed from 1349 * hash. If we were to reallocate the inode the caller could wind 1350 * up returning a vnode/inode combination which is in an indeterminate 1351 * state. 1352 */ 1353 static ino_t 1354 ffs_nodealloccg(struct inode *ip, int cg, ufs_daddr_t ipref, int mode) 1355 { 1356 struct ufsmount *ump; 1357 struct fs *fs; 1358 struct cg *cgp; 1359 struct buf *bp; 1360 uint8_t *inosused; 1361 uint8_t map; 1362 int error, len, arraysize, i; 1363 int icheckmiss; 1364 ufs_daddr_t ibase; 1365 struct vnode *vp; 1366 1367 vp = ITOV(ip); 1368 ump = VFSTOUFS(vp->v_mount); 1369 fs = ip->i_fs; 1370 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1371 return (0); 1372 error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), 1373 (int)fs->fs_cgsize, &bp); 1374 if (error) { 1375 brelse(bp); 1376 return (0); 1377 } 1378 cgp = (struct cg *)bp->b_data; 1379 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1380 brelse(bp); 1381 return (0); 1382 } 1383 inosused = cg_inosused(cgp); 1384 icheckmiss = 0; 1385 1386 /* 1387 * Quick check, reuse the most recently free inode or continue 1388 * a scan from where we left off the last time. 1389 */ 1390 ibase = cg * fs->fs_ipg; 1391 if (ipref) { 1392 ipref %= fs->fs_ipg; 1393 if (isclr(inosused, ipref)) { 1394 if (ufs_ihashcheck(ump, ip->i_dev, ibase + ipref) == 0) 1395 goto gotit; 1396 } 1397 } 1398 1399 /* 1400 * Scan the inode bitmap starting at irotor, be sure to handle 1401 * the edge case by going back to the beginning of the array. 1402 * 1403 * If the number of inodes is not byte-aligned, the unused bits 1404 * should be set to 1. This will be sanity checked in gotit. Note 1405 * that we have to be sure not to overlap the beginning and end 1406 * when irotor is in the middle of a byte as this will cause the 1407 * same bitmap byte to be checked twice. To solve this problem we 1408 * just convert everything to a byte index for the loop. 1409 */ 1410 ipref = (cgp->cg_irotor % fs->fs_ipg) >> 3; /* byte index */ 1411 len = (fs->fs_ipg + 7) >> 3; /* byte size */ 1412 arraysize = len; 1413 1414 while (len > 0) { 1415 map = inosused[ipref]; 1416 if (map != 255) { 1417 for (i = 0; i < NBBY; ++i) { 1418 /* 1419 * If we find a free bit we have to make sure 1420 * that the inode is not in the middle of 1421 * being destroyed. The inode should not exist 1422 * in the inode hash. 1423 * 1424 * Adjust the rotor to try to hit the 1425 * quick-check up above. 1426 */ 1427 if ((map & (1 << i)) == 0) { 1428 if (ufs_ihashcheck(ump, ip->i_dev, ibase + (ipref << 3) + i) == 0) { 1429 ipref = (ipref << 3) + i; 1430 cgp->cg_irotor = (ipref + 1) % fs->fs_ipg; 1431 goto gotit; 1432 } 1433 ++icheckmiss; 1434 } 1435 } 1436 } 1437 1438 /* 1439 * Setup for the next byte, start at the beginning again if 1440 * we hit the end of the array. 1441 */ 1442 if (++ipref == arraysize) 1443 ipref = 0; 1444 --len; 1445 } 1446 if (icheckmiss == cgp->cg_cs.cs_nifree) { 1447 brelse(bp); 1448 return(0); 1449 } 1450 kprintf("fs = %s\n", fs->fs_fsmnt); 1451 panic("ffs_nodealloccg: block not in map, icheckmiss/nfree %d/%d", 1452 icheckmiss, cgp->cg_cs.cs_nifree); 1453 /* NOTREACHED */ 1454 1455 /* 1456 * ipref is a bit index as of the gotit label. 1457 */ 1458 gotit: 1459 KKASSERT(ipref >= 0 && ipref < fs->fs_ipg); 1460 cgp->cg_time = time_second; 1461 if (DOINGSOFTDEP(ITOV(ip))) 1462 softdep_setup_inomapdep(bp, ip, ibase + ipref); 1463 setbit(inosused, ipref); 1464 cgp->cg_cs.cs_nifree--; 1465 fs->fs_cstotal.cs_nifree--; 1466 fs->fs_cs(fs, cg).cs_nifree--; 1467 fs->fs_fmod = 1; 1468 if ((mode & IFMT) == IFDIR) { 1469 cgp->cg_cs.cs_ndir++; 1470 fs->fs_cstotal.cs_ndir++; 1471 fs->fs_cs(fs, cg).cs_ndir++; 1472 } 1473 bdwrite(bp); 1474 return (ibase + ipref); 1475 } 1476 1477 /* 1478 * Free a block or fragment. 1479 * 1480 * The specified block or fragment is placed back in the 1481 * free map. If a fragment is deallocated, a possible 1482 * block reassembly is checked. 1483 */ 1484 void 1485 ffs_blkfree_cg(struct fs * fs, struct vnode * i_devvp, cdev_t i_dev, ino_t i_number, 1486 uint32_t i_din_uid, ufs_daddr_t bno, long size) 1487 { 1488 struct cg *cgp; 1489 struct buf *bp; 1490 ufs_daddr_t blkno; 1491 int i, error, cg, blk, frags, bbase; 1492 uint8_t *blksfree; 1493 1494 #if 0 1495 /* 1496 * ffs_blkfree() handles TRIM if UFS is mounted with the 'trim' 1497 * option, do not issue an unconditional duplicate here! 1498 * VOP_FREEBLKS(i_devvp, fsbtodoff(fs, bno), size); 1499 */ 1500 #endif 1501 if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1502 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1503 kprintf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", 1504 devtoname(i_dev), (long)bno, (long)fs->fs_bsize, size, 1505 fs->fs_fsmnt); 1506 panic("ffs_blkfree: bad size"); 1507 } 1508 cg = dtog(fs, bno); 1509 if ((uint)bno >= fs->fs_size) { 1510 kprintf("bad block %ld, ino %lu\n", 1511 (long)bno, (u_long)i_number); 1512 ffs_fserr(fs, i_din_uid, "bad block"); 1513 return; 1514 } 1515 1516 /* 1517 * Load the cylinder group 1518 */ 1519 error = bread(i_devvp, fsbtodoff(fs, cgtod(fs, cg)), 1520 (int)fs->fs_cgsize, &bp); 1521 if (error) { 1522 brelse(bp); 1523 return; 1524 } 1525 cgp = (struct cg *)bp->b_data; 1526 if (!cg_chkmagic(cgp)) { 1527 brelse(bp); 1528 return; 1529 } 1530 cgp->cg_time = time_second; 1531 bno = dtogd(fs, bno); 1532 blksfree = cg_blksfree(cgp); 1533 1534 if (size == fs->fs_bsize) { 1535 /* 1536 * Free a whole block 1537 */ 1538 blkno = fragstoblks(fs, bno); 1539 if (!ffs_isfreeblock(fs, blksfree, blkno)) { 1540 kprintf("dev = %s, block = %ld, fs = %s\n", 1541 devtoname(i_dev), (long)bno, fs->fs_fsmnt); 1542 panic("ffs_blkfree: freeing free block"); 1543 } 1544 ffs_setblock(fs, blksfree, blkno); 1545 ffs_clusteracct(fs, cgp, blkno, 1); 1546 cgp->cg_cs.cs_nbfree++; 1547 fs->fs_cstotal.cs_nbfree++; 1548 fs->fs_cs(fs, cg).cs_nbfree++; 1549 i = cbtocylno(fs, bno); 1550 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1551 cg_blktot(cgp)[i]++; 1552 } else { 1553 /* 1554 * Free a fragment within a block. 1555 * 1556 * bno is the starting block number of the fragment being 1557 * freed. 1558 * 1559 * bbase is the starting block number for the filesystem 1560 * block containing the fragment. 1561 * 1562 * blk is the current bitmap for the fragments within the 1563 * filesystem block containing the fragment. 1564 * 1565 * frags is the number of fragments being freed 1566 * 1567 * Call ffs_fragacct() to account for the removal of all 1568 * current fragments, then adjust the bitmap to free the 1569 * requested fragment, and finally call ffs_fragacct() again 1570 * to regenerate the accounting. 1571 */ 1572 bbase = bno - fragnum(fs, bno); 1573 blk = blkmap(fs, blksfree, bbase); 1574 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1575 frags = numfrags(fs, size); 1576 for (i = 0; i < frags; i++) { 1577 if (isset(blksfree, bno + i)) { 1578 kprintf("dev = %s, block = %ld, fs = %s\n", 1579 devtoname(i_dev), (long)(bno + i), 1580 fs->fs_fsmnt); 1581 panic("ffs_blkfree: freeing free frag"); 1582 } 1583 setbit(blksfree, bno + i); 1584 } 1585 cgp->cg_cs.cs_nffree += i; 1586 fs->fs_cstotal.cs_nffree += i; 1587 fs->fs_cs(fs, cg).cs_nffree += i; 1588 1589 /* 1590 * Add back in counts associated with the new frags 1591 */ 1592 blk = blkmap(fs, blksfree, bbase); 1593 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1594 1595 /* 1596 * If a complete block has been reassembled, account for it 1597 */ 1598 blkno = fragstoblks(fs, bbase); 1599 if (ffs_isblock(fs, blksfree, blkno)) { 1600 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1601 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1602 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1603 ffs_clusteracct(fs, cgp, blkno, 1); 1604 cgp->cg_cs.cs_nbfree++; 1605 fs->fs_cstotal.cs_nbfree++; 1606 fs->fs_cs(fs, cg).cs_nbfree++; 1607 i = cbtocylno(fs, bbase); 1608 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1609 cg_blktot(cgp)[i]++; 1610 } 1611 } 1612 fs->fs_fmod = 1; 1613 bdwrite(bp); 1614 } 1615 1616 struct ffs_blkfree_trim_params { 1617 struct task task; 1618 ufs_daddr_t bno; 1619 long size; 1620 1621 /* 1622 * With TRIM, inode pointer is gone in the callback but we still need 1623 * the following fields for ffs_blkfree_cg() 1624 */ 1625 struct vnode *i_devvp; 1626 struct fs *i_fs; 1627 cdev_t i_dev; 1628 ino_t i_number; 1629 uint32_t i_din_uid; 1630 }; 1631 1632 1633 static void 1634 ffs_blkfree_trim_task(void *ctx, int pending) 1635 { 1636 struct ffs_blkfree_trim_params *tp; 1637 1638 tp = ctx; 1639 ffs_blkfree_cg(tp->i_fs, tp->i_devvp, tp->i_dev, tp->i_number, 1640 tp->i_din_uid, tp->bno, tp->size); 1641 kfree(tp, M_TEMP); 1642 } 1643 1644 1645 1646 static void 1647 ffs_blkfree_trim_completed(struct bio *biop) 1648 { 1649 struct buf *bp = biop->bio_buf; 1650 struct ffs_blkfree_trim_params *tp; 1651 1652 tp = bp->b_bio1.bio_caller_info1.ptr; 1653 TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp); 1654 tp = biop->bio_caller_info1.ptr; 1655 taskqueue_enqueue(taskqueue_swi, &tp->task); 1656 biodone(biop); 1657 } 1658 1659 1660 /* 1661 * If TRIM is enabled, we TRIM the blocks first then free them. We do this 1662 * after TRIM is finished and the callback handler is called. The logic here 1663 * is that we free the blocks before updating the bitmap so that we don't 1664 * reuse a block before we actually trim it, which would result in trimming 1665 * a valid block. 1666 */ 1667 void 1668 ffs_blkfree(struct inode *ip, ufs_daddr_t bno, long size) 1669 { 1670 struct mount *mp = ip->i_devvp->v_mount; 1671 struct ffs_blkfree_trim_params *tp; 1672 1673 if (!(mp->mnt_flag & MNT_TRIM)) { 1674 ffs_blkfree_cg(ip->i_fs, ip->i_devvp,ip->i_dev,ip->i_number, 1675 ip->i_uid, bno, size); 1676 return; 1677 } 1678 1679 struct buf *bp; 1680 1681 tp = kmalloc(sizeof(struct ffs_blkfree_trim_params), M_TEMP, M_WAITOK); 1682 tp->bno = bno; 1683 tp->i_fs= ip->i_fs; 1684 tp->i_devvp = ip->i_devvp; 1685 tp->i_dev = ip->i_dev; 1686 tp->i_din_uid = ip->i_uid; 1687 tp->i_number = ip->i_number; 1688 tp->size = size; 1689 1690 bp = getnewbuf(0, 0, 0, 1); 1691 BUF_KERNPROC(bp); 1692 bp->b_cmd = BUF_CMD_FREEBLKS; 1693 bp->b_bio1.bio_offset = fsbtodoff(ip->i_fs, bno); 1694 bp->b_bcount = size; 1695 bp->b_bio1.bio_caller_info1.ptr = tp; 1696 bp->b_bio1.bio_done = ffs_blkfree_trim_completed; 1697 vn_strategy(ip->i_devvp, &bp->b_bio1); 1698 } 1699 1700 #ifdef DIAGNOSTIC 1701 /* 1702 * Verify allocation of a block or fragment. Returns true if block or 1703 * fragment is allocated, false if it is free. 1704 */ 1705 static int 1706 ffs_checkblk(struct inode *ip, ufs_daddr_t bno, long size) 1707 { 1708 struct fs *fs; 1709 struct cg *cgp; 1710 struct buf *bp; 1711 int i, error, frags, free; 1712 uint8_t *blksfree; 1713 1714 fs = ip->i_fs; 1715 if ((uint)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1716 kprintf("bsize = %ld, size = %ld, fs = %s\n", 1717 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1718 panic("ffs_checkblk: bad size"); 1719 } 1720 if ((uint)bno >= fs->fs_size) 1721 panic("ffs_checkblk: bad block %d", bno); 1722 error = bread(ip->i_devvp, fsbtodoff(fs, cgtod(fs, dtog(fs, bno))), 1723 (int)fs->fs_cgsize, &bp); 1724 if (error) 1725 panic("ffs_checkblk: cg bread failed"); 1726 cgp = (struct cg *)bp->b_data; 1727 if (!cg_chkmagic(cgp)) 1728 panic("ffs_checkblk: cg magic mismatch"); 1729 blksfree = cg_blksfree(cgp); 1730 bno = dtogd(fs, bno); 1731 if (size == fs->fs_bsize) { 1732 free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno)); 1733 } else { 1734 frags = numfrags(fs, size); 1735 for (free = 0, i = 0; i < frags; i++) 1736 if (isset(blksfree, bno + i)) 1737 free++; 1738 if (free != 0 && free != frags) 1739 panic("ffs_checkblk: partially free fragment"); 1740 } 1741 brelse(bp); 1742 return (!free); 1743 } 1744 #endif /* DIAGNOSTIC */ 1745 1746 /* 1747 * Free an inode. 1748 */ 1749 int 1750 ffs_vfree(struct vnode *pvp, ino_t ino, int mode) 1751 { 1752 if (DOINGSOFTDEP(pvp)) { 1753 softdep_freefile(pvp, ino, mode); 1754 return (0); 1755 } 1756 return (ffs_freefile(pvp, ino, mode)); 1757 } 1758 1759 /* 1760 * Do the actual free operation. 1761 * The specified inode is placed back in the free map. 1762 */ 1763 int 1764 ffs_freefile(struct vnode *pvp, ino_t ino, int mode) 1765 { 1766 struct fs *fs; 1767 struct cg *cgp; 1768 struct inode *pip; 1769 struct buf *bp; 1770 int error, cg; 1771 uint8_t *inosused; 1772 1773 pip = VTOI(pvp); 1774 fs = pip->i_fs; 1775 if ((uint)ino >= fs->fs_ipg * fs->fs_ncg) 1776 panic("ffs_vfree: range: dev = (%d,%d), ino = %"PRId64", fs = %s", 1777 major(pip->i_dev), minor(pip->i_dev), ino, fs->fs_fsmnt); 1778 cg = ino_to_cg(fs, ino); 1779 error = bread(pip->i_devvp, fsbtodoff(fs, cgtod(fs, cg)), 1780 (int)fs->fs_cgsize, &bp); 1781 if (error) { 1782 brelse(bp); 1783 return (error); 1784 } 1785 cgp = (struct cg *)bp->b_data; 1786 if (!cg_chkmagic(cgp)) { 1787 brelse(bp); 1788 return (0); 1789 } 1790 cgp->cg_time = time_second; 1791 inosused = cg_inosused(cgp); 1792 ino %= fs->fs_ipg; 1793 if (isclr(inosused, ino)) { 1794 kprintf("dev = %s, ino = %lu, fs = %s\n", 1795 devtoname(pip->i_dev), (u_long)ino, fs->fs_fsmnt); 1796 if (fs->fs_ronly == 0) 1797 panic("ffs_vfree: freeing free inode"); 1798 } 1799 clrbit(inosused, ino); 1800 if (ino < cgp->cg_irotor) 1801 cgp->cg_irotor = ino; 1802 cgp->cg_cs.cs_nifree++; 1803 fs->fs_cstotal.cs_nifree++; 1804 fs->fs_cs(fs, cg).cs_nifree++; 1805 if ((mode & IFMT) == IFDIR) { 1806 cgp->cg_cs.cs_ndir--; 1807 fs->fs_cstotal.cs_ndir--; 1808 fs->fs_cs(fs, cg).cs_ndir--; 1809 } 1810 fs->fs_fmod = 1; 1811 bdwrite(bp); 1812 return (0); 1813 } 1814 1815 /* 1816 * Find a block of the specified size in the specified cylinder group. 1817 * 1818 * It is a panic if a request is made to find a block if none are 1819 * available. 1820 */ 1821 static ufs_daddr_t 1822 ffs_mapsearch(struct fs *fs, struct cg *cgp, ufs_daddr_t bpref, int allocsiz) 1823 { 1824 ufs_daddr_t bno; 1825 int start, len, loc, i; 1826 int blk, field, subfield, pos; 1827 uint8_t *blksfree; 1828 1829 /* 1830 * find the fragment by searching through the free block 1831 * map for an appropriate bit pattern. 1832 */ 1833 if (bpref) 1834 start = dtogd(fs, bpref) / NBBY; 1835 else 1836 start = cgp->cg_frotor / NBBY; 1837 blksfree = cg_blksfree(cgp); 1838 len = howmany(fs->fs_fpg, NBBY) - start; 1839 loc = scanc((uint)len, (u_char *)&blksfree[start], 1840 (u_char *)fragtbl[fs->fs_frag], 1841 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1842 if (loc == 0) { 1843 len = start + 1; /* XXX why overlap here? */ 1844 start = 0; 1845 loc = scanc((uint)len, (u_char *)&blksfree[0], 1846 (u_char *)fragtbl[fs->fs_frag], 1847 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1848 if (loc == 0) { 1849 kprintf("start = %d, len = %d, fs = %s\n", 1850 start, len, fs->fs_fsmnt); 1851 panic("ffs_alloccg: map corrupted"); 1852 /* NOTREACHED */ 1853 } 1854 } 1855 bno = (start + len - loc) * NBBY; 1856 cgp->cg_frotor = bno; 1857 /* 1858 * found the byte in the map 1859 * sift through the bits to find the selected frag 1860 */ 1861 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1862 blk = blkmap(fs, blksfree, bno); 1863 blk <<= 1; 1864 field = around[allocsiz]; 1865 subfield = inside[allocsiz]; 1866 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1867 if ((blk & field) == subfield) 1868 return (bno + pos); 1869 field <<= 1; 1870 subfield <<= 1; 1871 } 1872 } 1873 kprintf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 1874 panic("ffs_alloccg: block not in map"); 1875 return (-1); 1876 } 1877 1878 /* 1879 * Update the cluster map because of an allocation or free. 1880 * 1881 * Cnt == 1 means free; cnt == -1 means allocating. 1882 */ 1883 static void 1884 ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs_daddr_t blkno, int cnt) 1885 { 1886 int32_t *sump; 1887 int32_t *lp; 1888 u_char *freemapp, *mapp; 1889 int i, start, end, forw, back, map, bit; 1890 1891 if (fs->fs_contigsumsize <= 0) 1892 return; 1893 freemapp = cg_clustersfree(cgp); 1894 sump = cg_clustersum(cgp); 1895 /* 1896 * Allocate or clear the actual block. 1897 */ 1898 if (cnt > 0) 1899 setbit(freemapp, blkno); 1900 else 1901 clrbit(freemapp, blkno); 1902 /* 1903 * Find the size of the cluster going forward. 1904 */ 1905 start = blkno + 1; 1906 end = start + fs->fs_contigsumsize; 1907 if (end >= cgp->cg_nclusterblks) 1908 end = cgp->cg_nclusterblks; 1909 mapp = &freemapp[start / NBBY]; 1910 map = *mapp++; 1911 bit = 1 << (start % NBBY); 1912 for (i = start; i < end; i++) { 1913 if ((map & bit) == 0) 1914 break; 1915 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1916 bit <<= 1; 1917 } else { 1918 map = *mapp++; 1919 bit = 1; 1920 } 1921 } 1922 forw = i - start; 1923 /* 1924 * Find the size of the cluster going backward. 1925 */ 1926 start = blkno - 1; 1927 end = start - fs->fs_contigsumsize; 1928 if (end < 0) 1929 end = -1; 1930 mapp = &freemapp[start / NBBY]; 1931 map = *mapp--; 1932 bit = 1 << (start % NBBY); 1933 for (i = start; i > end; i--) { 1934 if ((map & bit) == 0) 1935 break; 1936 if ((i & (NBBY - 1)) != 0) { 1937 bit >>= 1; 1938 } else { 1939 map = *mapp--; 1940 bit = 1 << (NBBY - 1); 1941 } 1942 } 1943 back = start - i; 1944 /* 1945 * Account for old cluster and the possibly new forward and 1946 * back clusters. 1947 */ 1948 i = back + forw + 1; 1949 if (i > fs->fs_contigsumsize) 1950 i = fs->fs_contigsumsize; 1951 sump[i] += cnt; 1952 if (back > 0) 1953 sump[back] -= cnt; 1954 if (forw > 0) 1955 sump[forw] -= cnt; 1956 /* 1957 * Update cluster summary information. 1958 */ 1959 lp = &sump[fs->fs_contigsumsize]; 1960 for (i = fs->fs_contigsumsize; i > 0; i--) 1961 if (*lp-- > 0) 1962 break; 1963 fs->fs_maxcluster[cgp->cg_cgx] = i; 1964 } 1965 1966 /* 1967 * Fserr prints the name of a filesystem with an error diagnostic. 1968 * 1969 * The form of the error message is: 1970 * fs: error message 1971 */ 1972 static void 1973 ffs_fserr(struct fs *fs, uint uid, char *cp) 1974 { 1975 struct thread *td = curthread; 1976 struct proc *p; 1977 1978 if ((p = td->td_proc) != NULL) { 1979 log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1, 1980 p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp); 1981 } else { 1982 log(LOG_ERR, "system thread %p, uid %d on %s: %s\n", 1983 td, uid, fs->fs_fsmnt, cp); 1984 } 1985 } 1986