1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95 34 * $FreeBSD: src/sys/ufs/ffs/ffs_alloc.c,v 1.64.2.2 2001/09/21 19:15:21 dillon Exp $ 35 * $DragonFly: src/sys/vfs/ufs/ffs_alloc.c,v 1.7 2003/08/20 09:56:34 rob Exp $ 36 */ 37 38 #include "opt_quota.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/buf.h> 43 #include <sys/conf.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 #include <sys/mount.h> 47 #include <sys/kernel.h> 48 #include <sys/sysctl.h> 49 #include <sys/syslog.h> 50 51 #include "quota.h" 52 #include "inode.h" 53 #include "ufs_extern.h" 54 #include "ufsmount.h" 55 56 #include "fs.h" 57 #include "ffs_extern.h" 58 59 typedef ufs_daddr_t allocfcn_t (struct inode *ip, int cg, ufs_daddr_t bpref, 60 int size); 61 62 static ufs_daddr_t ffs_alloccg (struct inode *, int, ufs_daddr_t, int); 63 static ufs_daddr_t 64 ffs_alloccgblk (struct inode *, struct buf *, ufs_daddr_t); 65 #ifdef DIAGNOSTIC 66 static int ffs_checkblk (struct inode *, ufs_daddr_t, long); 67 #endif 68 static void ffs_clusteracct (struct fs *, struct cg *, ufs_daddr_t, 69 int); 70 static ufs_daddr_t ffs_clusteralloc (struct inode *, int, ufs_daddr_t, 71 int); 72 static ino_t ffs_dirpref (struct inode *); 73 static ufs_daddr_t ffs_fragextend (struct inode *, int, long, int, int); 74 static void ffs_fserr (struct fs *, u_int, char *); 75 static u_long ffs_hashalloc 76 (struct inode *, int, long, int, allocfcn_t *); 77 static ino_t ffs_nodealloccg (struct inode *, int, ufs_daddr_t, int); 78 static ufs_daddr_t ffs_mapsearch (struct fs *, struct cg *, ufs_daddr_t, 79 int); 80 81 /* 82 * Allocate a block in the file system. 83 * 84 * The size of the requested block is given, which must be some 85 * multiple of fs_fsize and <= fs_bsize. 86 * A preference may be optionally specified. If a preference is given 87 * the following hierarchy is used to allocate a block: 88 * 1) allocate the requested block. 89 * 2) allocate a rotationally optimal block in the same cylinder. 90 * 3) allocate a block in the same cylinder group. 91 * 4) quadradically rehash into other cylinder groups, until an 92 * available block is located. 93 * If no block preference is given the following heirarchy is used 94 * to allocate a block: 95 * 1) allocate a block in the cylinder group that contains the 96 * inode for the file. 97 * 2) quadradically rehash into other cylinder groups, until an 98 * available block is located. 99 */ 100 int 101 ffs_alloc(ip, lbn, bpref, size, cred, bnp) 102 struct inode *ip; 103 ufs_daddr_t lbn, bpref; 104 int size; 105 struct ucred *cred; 106 ufs_daddr_t *bnp; 107 { 108 struct fs *fs; 109 ufs_daddr_t bno; 110 int cg; 111 #ifdef QUOTA 112 int error; 113 #endif 114 115 *bnp = 0; 116 fs = ip->i_fs; 117 #ifdef DIAGNOSTIC 118 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 119 printf("dev = %s, bsize = %ld, size = %d, fs = %s\n", 120 devtoname(ip->i_dev), (long)fs->fs_bsize, size, 121 fs->fs_fsmnt); 122 panic("ffs_alloc: bad size"); 123 } 124 if (cred == NOCRED) 125 panic("ffs_alloc: missing credential"); 126 #endif /* DIAGNOSTIC */ 127 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 128 goto nospace; 129 if (cred->cr_uid != 0 && 130 freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) 131 goto nospace; 132 #ifdef QUOTA 133 error = chkdq(ip, (long)btodb(size), cred, 0); 134 if (error) 135 return (error); 136 #endif 137 if (bpref >= fs->fs_size) 138 bpref = 0; 139 if (bpref == 0) 140 cg = ino_to_cg(fs, ip->i_number); 141 else 142 cg = dtog(fs, bpref); 143 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, 144 ffs_alloccg); 145 if (bno > 0) { 146 ip->i_blocks += btodb(size); 147 ip->i_flag |= IN_CHANGE | IN_UPDATE; 148 *bnp = bno; 149 return (0); 150 } 151 #ifdef QUOTA 152 /* 153 * Restore user's disk quota because allocation failed. 154 */ 155 (void) chkdq(ip, (long)-btodb(size), cred, FORCE); 156 #endif 157 nospace: 158 ffs_fserr(fs, cred->cr_uid, "file system full"); 159 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 160 return (ENOSPC); 161 } 162 163 /* 164 * Reallocate a fragment to a bigger size 165 * 166 * The number and size of the old block is given, and a preference 167 * and new size is also specified. The allocator attempts to extend 168 * the original block. Failing that, the regular block allocator is 169 * invoked to get an appropriate block. 170 */ 171 int 172 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) 173 struct inode *ip; 174 ufs_daddr_t lbprev; 175 ufs_daddr_t bpref; 176 int osize, nsize; 177 struct ucred *cred; 178 struct buf **bpp; 179 { 180 struct fs *fs; 181 struct buf *bp; 182 int cg, request, error; 183 ufs_daddr_t bprev, bno; 184 185 *bpp = 0; 186 fs = ip->i_fs; 187 #ifdef DIAGNOSTIC 188 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 189 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 190 printf( 191 "dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n", 192 devtoname(ip->i_dev), (long)fs->fs_bsize, osize, 193 nsize, fs->fs_fsmnt); 194 panic("ffs_realloccg: bad size"); 195 } 196 if (cred == NOCRED) 197 panic("ffs_realloccg: missing credential"); 198 #endif /* DIAGNOSTIC */ 199 if (cred->cr_uid != 0 && 200 freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) 201 goto nospace; 202 if ((bprev = ip->i_db[lbprev]) == 0) { 203 printf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n", 204 devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev, 205 fs->fs_fsmnt); 206 panic("ffs_realloccg: bad bprev"); 207 } 208 /* 209 * Allocate the extra space in the buffer. 210 */ 211 error = bread(ITOV(ip), lbprev, osize, &bp); 212 if (error) { 213 brelse(bp); 214 return (error); 215 } 216 217 if( bp->b_blkno == bp->b_lblkno) { 218 if( lbprev >= NDADDR) 219 panic("ffs_realloccg: lbprev out of range"); 220 bp->b_blkno = fsbtodb(fs, bprev); 221 } 222 223 #ifdef QUOTA 224 error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); 225 if (error) { 226 brelse(bp); 227 return (error); 228 } 229 #endif 230 /* 231 * Check for extension in the existing location. 232 */ 233 cg = dtog(fs, bprev); 234 bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); 235 if (bno) { 236 if (bp->b_blkno != fsbtodb(fs, bno)) 237 panic("ffs_realloccg: bad blockno"); 238 ip->i_blocks += btodb(nsize - osize); 239 ip->i_flag |= IN_CHANGE | IN_UPDATE; 240 allocbuf(bp, nsize); 241 bp->b_flags |= B_DONE; 242 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 243 *bpp = bp; 244 return (0); 245 } 246 /* 247 * Allocate a new disk location. 248 */ 249 if (bpref >= fs->fs_size) 250 bpref = 0; 251 switch ((int)fs->fs_optim) { 252 case FS_OPTSPACE: 253 /* 254 * Allocate an exact sized fragment. Although this makes 255 * best use of space, we will waste time relocating it if 256 * the file continues to grow. If the fragmentation is 257 * less than half of the minimum free reserve, we choose 258 * to begin optimizing for time. 259 */ 260 request = nsize; 261 if (fs->fs_minfree <= 5 || 262 fs->fs_cstotal.cs_nffree > 263 (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100)) 264 break; 265 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", 266 fs->fs_fsmnt); 267 fs->fs_optim = FS_OPTTIME; 268 break; 269 case FS_OPTTIME: 270 /* 271 * At this point we have discovered a file that is trying to 272 * grow a small fragment to a larger fragment. To save time, 273 * we allocate a full sized block, then free the unused portion. 274 * If the file continues to grow, the `ffs_fragextend' call 275 * above will be able to grow it in place without further 276 * copying. If aberrant programs cause disk fragmentation to 277 * grow within 2% of the free reserve, we choose to begin 278 * optimizing for space. 279 */ 280 request = fs->fs_bsize; 281 if (fs->fs_cstotal.cs_nffree < 282 (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100) 283 break; 284 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", 285 fs->fs_fsmnt); 286 fs->fs_optim = FS_OPTSPACE; 287 break; 288 default: 289 printf("dev = %s, optim = %ld, fs = %s\n", 290 devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt); 291 panic("ffs_realloccg: bad optim"); 292 /* NOTREACHED */ 293 } 294 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, 295 ffs_alloccg); 296 if (bno > 0) { 297 bp->b_blkno = fsbtodb(fs, bno); 298 if (!DOINGSOFTDEP(ITOV(ip))) 299 ffs_blkfree(ip, bprev, (long)osize); 300 if (nsize < request) 301 ffs_blkfree(ip, bno + numfrags(fs, nsize), 302 (long)(request - nsize)); 303 ip->i_blocks += btodb(nsize - osize); 304 ip->i_flag |= IN_CHANGE | IN_UPDATE; 305 allocbuf(bp, nsize); 306 bp->b_flags |= B_DONE; 307 bzero((char *)bp->b_data + osize, (u_int)nsize - osize); 308 *bpp = bp; 309 return (0); 310 } 311 #ifdef QUOTA 312 /* 313 * Restore user's disk quota because allocation failed. 314 */ 315 (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); 316 #endif 317 brelse(bp); 318 nospace: 319 /* 320 * no space available 321 */ 322 ffs_fserr(fs, cred->cr_uid, "file system full"); 323 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 324 return (ENOSPC); 325 } 326 327 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); 328 329 /* 330 * Reallocate a sequence of blocks into a contiguous sequence of blocks. 331 * 332 * The vnode and an array of buffer pointers for a range of sequential 333 * logical blocks to be made contiguous is given. The allocator attempts 334 * to find a range of sequential blocks starting as close as possible to 335 * an fs_rotdelay offset from the end of the allocation for the logical 336 * block immediately preceeding the current range. If successful, the 337 * physical block numbers in the buffer pointers and in the inode are 338 * changed to reflect the new allocation. If unsuccessful, the allocation 339 * is left unchanged. The success in doing the reallocation is returned. 340 * Note that the error return is not reflected back to the user. Rather 341 * the previous block allocation will be used. 342 */ 343 static int doasyncfree = 1; 344 SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); 345 346 static int doreallocblks = 1; 347 SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); 348 349 #ifdef DEBUG 350 static volatile int prtrealloc = 0; 351 #endif 352 353 int 354 ffs_reallocblks(ap) 355 struct vop_reallocblks_args /* { 356 struct vnode *a_vp; 357 struct cluster_save *a_buflist; 358 } */ *ap; 359 { 360 struct fs *fs; 361 struct inode *ip; 362 struct vnode *vp; 363 struct buf *sbp, *ebp; 364 ufs_daddr_t *bap, *sbap, *ebap = 0; 365 struct cluster_save *buflist; 366 ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; 367 struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; 368 int i, len, start_lvl, end_lvl, pref, ssize; 369 370 if (doreallocblks == 0) 371 return (ENOSPC); 372 vp = ap->a_vp; 373 ip = VTOI(vp); 374 fs = ip->i_fs; 375 if (fs->fs_contigsumsize <= 0) 376 return (ENOSPC); 377 buflist = ap->a_buflist; 378 len = buflist->bs_nchildren; 379 start_lbn = buflist->bs_children[0]->b_lblkno; 380 end_lbn = start_lbn + len - 1; 381 #ifdef DIAGNOSTIC 382 for (i = 0; i < len; i++) 383 if (!ffs_checkblk(ip, 384 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 385 panic("ffs_reallocblks: unallocated block 1"); 386 for (i = 1; i < len; i++) 387 if (buflist->bs_children[i]->b_lblkno != start_lbn + i) 388 panic("ffs_reallocblks: non-logical cluster"); 389 blkno = buflist->bs_children[0]->b_blkno; 390 ssize = fsbtodb(fs, fs->fs_frag); 391 for (i = 1; i < len - 1; i++) 392 if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) 393 panic("ffs_reallocblks: non-physical cluster %d", i); 394 #endif 395 /* 396 * If the latest allocation is in a new cylinder group, assume that 397 * the filesystem has decided to move and do not force it back to 398 * the previous cylinder group. 399 */ 400 if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != 401 dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) 402 return (ENOSPC); 403 if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || 404 ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) 405 return (ENOSPC); 406 /* 407 * Get the starting offset and block map for the first block. 408 */ 409 if (start_lvl == 0) { 410 sbap = &ip->i_db[0]; 411 soff = start_lbn; 412 } else { 413 idp = &start_ap[start_lvl - 1]; 414 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, &sbp)) { 415 brelse(sbp); 416 return (ENOSPC); 417 } 418 sbap = (ufs_daddr_t *)sbp->b_data; 419 soff = idp->in_off; 420 } 421 /* 422 * Find the preferred location for the cluster. 423 */ 424 pref = ffs_blkpref(ip, start_lbn, soff, sbap); 425 /* 426 * If the block range spans two block maps, get the second map. 427 */ 428 if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { 429 ssize = len; 430 } else { 431 #ifdef DIAGNOSTIC 432 if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) 433 panic("ffs_reallocblk: start == end"); 434 #endif 435 ssize = len - (idp->in_off + 1); 436 if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, &ebp)) 437 goto fail; 438 ebap = (ufs_daddr_t *)ebp->b_data; 439 } 440 /* 441 * Search the block map looking for an allocation of the desired size. 442 */ 443 if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, 444 len, ffs_clusteralloc)) == 0) 445 goto fail; 446 /* 447 * We have found a new contiguous block. 448 * 449 * First we have to replace the old block pointers with the new 450 * block pointers in the inode and indirect blocks associated 451 * with the file. 452 */ 453 #ifdef DEBUG 454 if (prtrealloc) 455 printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number, 456 start_lbn, end_lbn); 457 #endif 458 blkno = newblk; 459 for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { 460 if (i == ssize) { 461 bap = ebap; 462 soff = -i; 463 } 464 #ifdef DIAGNOSTIC 465 if (!ffs_checkblk(ip, 466 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 467 panic("ffs_reallocblks: unallocated block 2"); 468 if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) 469 panic("ffs_reallocblks: alloc mismatch"); 470 #endif 471 #ifdef DEBUG 472 if (prtrealloc) 473 printf(" %d,", *bap); 474 #endif 475 if (DOINGSOFTDEP(vp)) { 476 if (sbap == &ip->i_db[0] && i < ssize) 477 softdep_setup_allocdirect(ip, start_lbn + i, 478 blkno, *bap, fs->fs_bsize, fs->fs_bsize, 479 buflist->bs_children[i]); 480 else 481 softdep_setup_allocindir_page(ip, start_lbn + i, 482 i < ssize ? sbp : ebp, soff + i, blkno, 483 *bap, buflist->bs_children[i]); 484 } 485 *bap++ = blkno; 486 } 487 /* 488 * Next we must write out the modified inode and indirect blocks. 489 * For strict correctness, the writes should be synchronous since 490 * the old block values may have been written to disk. In practise 491 * they are almost never written, but if we are concerned about 492 * strict correctness, the `doasyncfree' flag should be set to zero. 493 * 494 * The test on `doasyncfree' should be changed to test a flag 495 * that shows whether the associated buffers and inodes have 496 * been written. The flag should be set when the cluster is 497 * started and cleared whenever the buffer or inode is flushed. 498 * We can then check below to see if it is set, and do the 499 * synchronous write only when it has been cleared. 500 */ 501 if (sbap != &ip->i_db[0]) { 502 if (doasyncfree) 503 bdwrite(sbp); 504 else 505 bwrite(sbp); 506 } else { 507 ip->i_flag |= IN_CHANGE | IN_UPDATE; 508 if (!doasyncfree) 509 UFS_UPDATE(vp, 1); 510 } 511 if (ssize < len) { 512 if (doasyncfree) 513 bdwrite(ebp); 514 else 515 bwrite(ebp); 516 } 517 /* 518 * Last, free the old blocks and assign the new blocks to the buffers. 519 */ 520 #ifdef DEBUG 521 if (prtrealloc) 522 printf("\n\tnew:"); 523 #endif 524 for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { 525 if (!DOINGSOFTDEP(vp)) 526 ffs_blkfree(ip, 527 dbtofsb(fs, buflist->bs_children[i]->b_blkno), 528 fs->fs_bsize); 529 buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); 530 #ifdef DIAGNOSTIC 531 if (!ffs_checkblk(ip, 532 dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) 533 panic("ffs_reallocblks: unallocated block 3"); 534 #endif 535 #ifdef DEBUG 536 if (prtrealloc) 537 printf(" %d,", blkno); 538 #endif 539 } 540 #ifdef DEBUG 541 if (prtrealloc) { 542 prtrealloc--; 543 printf("\n"); 544 } 545 #endif 546 return (0); 547 548 fail: 549 if (ssize < len) 550 brelse(ebp); 551 if (sbap != &ip->i_db[0]) 552 brelse(sbp); 553 return (ENOSPC); 554 } 555 556 /* 557 * Allocate an inode in the file system. 558 * 559 * If allocating a directory, use ffs_dirpref to select the inode. 560 * If allocating in a directory, the following hierarchy is followed: 561 * 1) allocate the preferred inode. 562 * 2) allocate an inode in the same cylinder group. 563 * 3) quadradically rehash into other cylinder groups, until an 564 * available inode is located. 565 * If no inode preference is given the following heirarchy is used 566 * to allocate an inode: 567 * 1) allocate an inode in cylinder group 0. 568 * 2) quadradically rehash into other cylinder groups, until an 569 * available inode is located. 570 */ 571 int 572 ffs_valloc(pvp, mode, cred, vpp) 573 struct vnode *pvp; 574 int mode; 575 struct ucred *cred; 576 struct vnode **vpp; 577 { 578 struct inode *pip; 579 struct fs *fs; 580 struct inode *ip; 581 ino_t ino, ipref; 582 int cg, error; 583 584 *vpp = NULL; 585 pip = VTOI(pvp); 586 fs = pip->i_fs; 587 if (fs->fs_cstotal.cs_nifree == 0) 588 goto noinodes; 589 590 if ((mode & IFMT) == IFDIR) 591 ipref = ffs_dirpref(pip); 592 else 593 ipref = pip->i_number; 594 if (ipref >= fs->fs_ncg * fs->fs_ipg) 595 ipref = 0; 596 cg = ino_to_cg(fs, ipref); 597 /* 598 * Track number of dirs created one after another 599 * in a same cg without intervening by files. 600 */ 601 if ((mode & IFMT) == IFDIR) { 602 if (fs->fs_contigdirs[cg] < 255) 603 fs->fs_contigdirs[cg]++; 604 } else { 605 if (fs->fs_contigdirs[cg] > 0) 606 fs->fs_contigdirs[cg]--; 607 } 608 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, 609 (allocfcn_t *)ffs_nodealloccg); 610 if (ino == 0) 611 goto noinodes; 612 error = VFS_VGET(pvp->v_mount, ino, vpp); 613 if (error) { 614 UFS_VFREE(pvp, ino, mode); 615 return (error); 616 } 617 ip = VTOI(*vpp); 618 if (ip->i_mode) { 619 printf("mode = 0%o, inum = %lu, fs = %s\n", 620 ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); 621 panic("ffs_valloc: dup alloc"); 622 } 623 if (ip->i_blocks) { /* XXX */ 624 printf("free inode %s/%lu had %ld blocks\n", 625 fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); 626 ip->i_blocks = 0; 627 } 628 ip->i_flags = 0; 629 /* 630 * Set up a new generation number for this inode. 631 */ 632 if (ip->i_gen == 0 || ++ip->i_gen == 0) 633 ip->i_gen = random() / 2 + 1; 634 return (0); 635 noinodes: 636 ffs_fserr(fs, cred->cr_uid, "out of inodes"); 637 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 638 return (ENOSPC); 639 } 640 641 /* 642 * Find a cylinder group to place a directory. 643 * 644 * The policy implemented by this algorithm is to allocate a 645 * directory inode in the same cylinder group as its parent 646 * directory, but also to reserve space for its files inodes 647 * and data. Restrict the number of directories which may be 648 * allocated one after another in the same cylinder group 649 * without intervening allocation of files. 650 * 651 * If we allocate a first level directory then force allocation 652 * in another cylinder group. 653 */ 654 static ino_t 655 ffs_dirpref(pip) 656 struct inode *pip; 657 { 658 struct fs *fs; 659 int cg, prefcg, dirsize, cgsize; 660 int avgifree, avgbfree, avgndir, curdirsize; 661 int minifree, minbfree, maxndir; 662 int mincg, minndir; 663 int maxcontigdirs; 664 665 fs = pip->i_fs; 666 667 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 668 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 669 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 670 671 /* 672 * Force allocation in another cg if creating a first level dir. 673 */ 674 if (ITOV(pip)->v_flag & VROOT) { 675 prefcg = arc4random() % fs->fs_ncg; 676 mincg = prefcg; 677 minndir = fs->fs_ipg; 678 for (cg = prefcg; cg < fs->fs_ncg; cg++) 679 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 680 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 681 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 682 mincg = cg; 683 minndir = fs->fs_cs(fs, cg).cs_ndir; 684 } 685 for (cg = 0; cg < prefcg; cg++) 686 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 687 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 688 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 689 mincg = cg; 690 minndir = fs->fs_cs(fs, cg).cs_ndir; 691 } 692 return ((ino_t)(fs->fs_ipg * mincg)); 693 } 694 695 /* 696 * Count various limits which used for 697 * optimal allocation of a directory inode. 698 */ 699 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 700 minifree = avgifree - fs->fs_ipg / 4; 701 if (minifree < 0) 702 minifree = 0; 703 minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4; 704 if (minbfree < 0) 705 minbfree = 0; 706 cgsize = fs->fs_fsize * fs->fs_fpg; 707 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir; 708 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0; 709 if (dirsize < curdirsize) 710 dirsize = curdirsize; 711 maxcontigdirs = min(cgsize / dirsize, 255); 712 if (fs->fs_avgfpdir > 0) 713 maxcontigdirs = min(maxcontigdirs, 714 fs->fs_ipg / fs->fs_avgfpdir); 715 if (maxcontigdirs == 0) 716 maxcontigdirs = 1; 717 718 /* 719 * Limit number of dirs in one cg and reserve space for 720 * regular files, but only if we have no deficit in 721 * inodes or space. 722 */ 723 prefcg = ino_to_cg(fs, pip->i_number); 724 for (cg = prefcg; cg < fs->fs_ncg; cg++) 725 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 726 fs->fs_cs(fs, cg).cs_nifree >= minifree && 727 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 728 if (fs->fs_contigdirs[cg] < maxcontigdirs) 729 return ((ino_t)(fs->fs_ipg * cg)); 730 } 731 for (cg = 0; cg < prefcg; cg++) 732 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 733 fs->fs_cs(fs, cg).cs_nifree >= minifree && 734 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 735 if (fs->fs_contigdirs[cg] < maxcontigdirs) 736 return ((ino_t)(fs->fs_ipg * cg)); 737 } 738 /* 739 * This is a backstop when we have deficit in space. 740 */ 741 for (cg = prefcg; cg < fs->fs_ncg; cg++) 742 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 743 return ((ino_t)(fs->fs_ipg * cg)); 744 for (cg = 0; cg < prefcg; cg++) 745 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 746 break; 747 return ((ino_t)(fs->fs_ipg * cg)); 748 } 749 750 /* 751 * Select the desired position for the next block in a file. The file is 752 * logically divided into sections. The first section is composed of the 753 * direct blocks. Each additional section contains fs_maxbpg blocks. 754 * 755 * If no blocks have been allocated in the first section, the policy is to 756 * request a block in the same cylinder group as the inode that describes 757 * the file. If no blocks have been allocated in any other section, the 758 * policy is to place the section in a cylinder group with a greater than 759 * average number of free blocks. An appropriate cylinder group is found 760 * by using a rotor that sweeps the cylinder groups. When a new group of 761 * blocks is needed, the sweep begins in the cylinder group following the 762 * cylinder group from which the previous allocation was made. The sweep 763 * continues until a cylinder group with greater than the average number 764 * of free blocks is found. If the allocation is for the first block in an 765 * indirect block, the information on the previous allocation is unavailable; 766 * here a best guess is made based upon the logical block number being 767 * allocated. 768 * 769 * If a section is already partially allocated, the policy is to 770 * contiguously allocate fs_maxcontig blocks. The end of one of these 771 * contiguous blocks and the beginning of the next is physically separated 772 * so that the disk head will be in transit between them for at least 773 * fs_rotdelay milliseconds. This is to allow time for the processor to 774 * schedule another I/O transfer. 775 */ 776 ufs_daddr_t 777 ffs_blkpref(ip, lbn, indx, bap) 778 struct inode *ip; 779 ufs_daddr_t lbn; 780 int indx; 781 ufs_daddr_t *bap; 782 { 783 struct fs *fs; 784 int cg; 785 int avgbfree, startcg; 786 ufs_daddr_t nextblk; 787 788 fs = ip->i_fs; 789 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 790 if (lbn < NDADDR + NINDIR(fs)) { 791 cg = ino_to_cg(fs, ip->i_number); 792 return (fs->fs_fpg * cg + fs->fs_frag); 793 } 794 /* 795 * Find a cylinder with greater than average number of 796 * unused data blocks. 797 */ 798 if (indx == 0 || bap[indx - 1] == 0) 799 startcg = 800 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 801 else 802 startcg = dtog(fs, bap[indx - 1]) + 1; 803 startcg %= fs->fs_ncg; 804 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 805 for (cg = startcg; cg < fs->fs_ncg; cg++) 806 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 807 fs->fs_cgrotor = cg; 808 return (fs->fs_fpg * cg + fs->fs_frag); 809 } 810 for (cg = 0; cg <= startcg; cg++) 811 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 812 fs->fs_cgrotor = cg; 813 return (fs->fs_fpg * cg + fs->fs_frag); 814 } 815 return (0); 816 } 817 /* 818 * One or more previous blocks have been laid out. If less 819 * than fs_maxcontig previous blocks are contiguous, the 820 * next block is requested contiguously, otherwise it is 821 * requested rotationally delayed by fs_rotdelay milliseconds. 822 */ 823 nextblk = bap[indx - 1] + fs->fs_frag; 824 if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || 825 bap[indx - fs->fs_maxcontig] + 826 blkstofrags(fs, fs->fs_maxcontig) != nextblk) 827 return (nextblk); 828 /* 829 * Here we convert ms of delay to frags as: 830 * (frags) = (ms) * (rev/sec) * (sect/rev) / 831 * ((sect/frag) * (ms/sec)) 832 * then round up to the next block. 833 */ 834 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / 835 (NSPF(fs) * 1000), fs->fs_frag); 836 return (nextblk); 837 } 838 839 /* 840 * Implement the cylinder overflow algorithm. 841 * 842 * The policy implemented by this algorithm is: 843 * 1) allocate the block in its requested cylinder group. 844 * 2) quadradically rehash on the cylinder group number. 845 * 3) brute force search for a free block. 846 */ 847 /*VARARGS5*/ 848 static u_long 849 ffs_hashalloc(ip, cg, pref, size, allocator) 850 struct inode *ip; 851 int cg; 852 long pref; 853 int size; /* size for data blocks, mode for inodes */ 854 allocfcn_t *allocator; 855 { 856 struct fs *fs; 857 long result; /* XXX why not same type as we return? */ 858 int i, icg = cg; 859 860 fs = ip->i_fs; 861 /* 862 * 1: preferred cylinder group 863 */ 864 result = (*allocator)(ip, cg, pref, size); 865 if (result) 866 return (result); 867 /* 868 * 2: quadratic rehash 869 */ 870 for (i = 1; i < fs->fs_ncg; i *= 2) { 871 cg += i; 872 if (cg >= fs->fs_ncg) 873 cg -= fs->fs_ncg; 874 result = (*allocator)(ip, cg, 0, size); 875 if (result) 876 return (result); 877 } 878 /* 879 * 3: brute force search 880 * Note that we start at i == 2, since 0 was checked initially, 881 * and 1 is always checked in the quadratic rehash. 882 */ 883 cg = (icg + 2) % fs->fs_ncg; 884 for (i = 2; i < fs->fs_ncg; i++) { 885 result = (*allocator)(ip, cg, 0, size); 886 if (result) 887 return (result); 888 cg++; 889 if (cg == fs->fs_ncg) 890 cg = 0; 891 } 892 return (0); 893 } 894 895 /* 896 * Determine whether a fragment can be extended. 897 * 898 * Check to see if the necessary fragments are available, and 899 * if they are, allocate them. 900 */ 901 static ufs_daddr_t 902 ffs_fragextend(ip, cg, bprev, osize, nsize) 903 struct inode *ip; 904 int cg; 905 long bprev; 906 int osize, nsize; 907 { 908 struct fs *fs; 909 struct cg *cgp; 910 struct buf *bp; 911 long bno; 912 int frags, bbase; 913 int i, error; 914 u_int8_t *blksfree; 915 916 fs = ip->i_fs; 917 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 918 return (0); 919 frags = numfrags(fs, nsize); 920 bbase = fragnum(fs, bprev); 921 if (bbase > fragnum(fs, (bprev + frags - 1))) { 922 /* cannot extend across a block boundary */ 923 return (0); 924 } 925 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 926 (int)fs->fs_cgsize, &bp); 927 if (error) { 928 brelse(bp); 929 return (0); 930 } 931 cgp = (struct cg *)bp->b_data; 932 if (!cg_chkmagic(cgp)) { 933 brelse(bp); 934 return (0); 935 } 936 bp->b_xflags |= BX_BKGRDWRITE; 937 cgp->cg_time = time_second; 938 bno = dtogd(fs, bprev); 939 blksfree = cg_blksfree(cgp); 940 for (i = numfrags(fs, osize); i < frags; i++) 941 if (isclr(blksfree, bno + i)) { 942 brelse(bp); 943 return (0); 944 } 945 /* 946 * the current fragment can be extended 947 * deduct the count on fragment being extended into 948 * increase the count on the remaining fragment (if any) 949 * allocate the extended piece 950 */ 951 for (i = frags; i < fs->fs_frag - bbase; i++) 952 if (isclr(blksfree, bno + i)) 953 break; 954 cgp->cg_frsum[i - numfrags(fs, osize)]--; 955 if (i != frags) 956 cgp->cg_frsum[i - frags]++; 957 for (i = numfrags(fs, osize); i < frags; i++) { 958 clrbit(blksfree, bno + i); 959 cgp->cg_cs.cs_nffree--; 960 fs->fs_cstotal.cs_nffree--; 961 fs->fs_cs(fs, cg).cs_nffree--; 962 } 963 fs->fs_fmod = 1; 964 if (DOINGSOFTDEP(ITOV(ip))) 965 softdep_setup_blkmapdep(bp, fs, bprev); 966 bdwrite(bp); 967 return (bprev); 968 } 969 970 /* 971 * Determine whether a block can be allocated. 972 * 973 * Check to see if a block of the appropriate size is available, 974 * and if it is, allocate it. 975 */ 976 static ufs_daddr_t 977 ffs_alloccg(ip, cg, bpref, size) 978 struct inode *ip; 979 int cg; 980 ufs_daddr_t bpref; 981 int size; 982 { 983 struct fs *fs; 984 struct cg *cgp; 985 struct buf *bp; 986 int i; 987 ufs_daddr_t bno, blkno; 988 int allocsiz, error, frags; 989 u_int8_t *blksfree; 990 991 fs = ip->i_fs; 992 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 993 return (0); 994 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 995 (int)fs->fs_cgsize, &bp); 996 if (error) { 997 brelse(bp); 998 return (0); 999 } 1000 cgp = (struct cg *)bp->b_data; 1001 if (!cg_chkmagic(cgp) || 1002 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 1003 brelse(bp); 1004 return (0); 1005 } 1006 bp->b_xflags |= BX_BKGRDWRITE; 1007 cgp->cg_time = time_second; 1008 if (size == fs->fs_bsize) { 1009 bno = ffs_alloccgblk(ip, bp, bpref); 1010 bdwrite(bp); 1011 return (bno); 1012 } 1013 /* 1014 * check to see if any fragments are already available 1015 * allocsiz is the size which will be allocated, hacking 1016 * it down to a smaller size if necessary 1017 */ 1018 blksfree = cg_blksfree(cgp); 1019 frags = numfrags(fs, size); 1020 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1021 if (cgp->cg_frsum[allocsiz] != 0) 1022 break; 1023 if (allocsiz == fs->fs_frag) { 1024 /* 1025 * no fragments were available, so a block will be 1026 * allocated, and hacked up 1027 */ 1028 if (cgp->cg_cs.cs_nbfree == 0) { 1029 brelse(bp); 1030 return (0); 1031 } 1032 bno = ffs_alloccgblk(ip, bp, bpref); 1033 bpref = dtogd(fs, bno); 1034 for (i = frags; i < fs->fs_frag; i++) 1035 setbit(blksfree, bpref + i); 1036 i = fs->fs_frag - frags; 1037 cgp->cg_cs.cs_nffree += i; 1038 fs->fs_cstotal.cs_nffree += i; 1039 fs->fs_cs(fs, cg).cs_nffree += i; 1040 fs->fs_fmod = 1; 1041 cgp->cg_frsum[i]++; 1042 bdwrite(bp); 1043 return (bno); 1044 } 1045 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1046 if (bno < 0) { 1047 brelse(bp); 1048 return (0); 1049 } 1050 for (i = 0; i < frags; i++) 1051 clrbit(blksfree, bno + i); 1052 cgp->cg_cs.cs_nffree -= frags; 1053 fs->fs_cstotal.cs_nffree -= frags; 1054 fs->fs_cs(fs, cg).cs_nffree -= frags; 1055 fs->fs_fmod = 1; 1056 cgp->cg_frsum[allocsiz]--; 1057 if (frags != allocsiz) 1058 cgp->cg_frsum[allocsiz - frags]++; 1059 blkno = cg * fs->fs_fpg + bno; 1060 if (DOINGSOFTDEP(ITOV(ip))) 1061 softdep_setup_blkmapdep(bp, fs, blkno); 1062 bdwrite(bp); 1063 return ((u_long)blkno); 1064 } 1065 1066 /* 1067 * Allocate a block in a cylinder group. 1068 * 1069 * This algorithm implements the following policy: 1070 * 1) allocate the requested block. 1071 * 2) allocate a rotationally optimal block in the same cylinder. 1072 * 3) allocate the next available block on the block rotor for the 1073 * specified cylinder group. 1074 * Note that this routine only allocates fs_bsize blocks; these 1075 * blocks may be fragmented by the routine that allocates them. 1076 */ 1077 static ufs_daddr_t 1078 ffs_alloccgblk(ip, bp, bpref) 1079 struct inode *ip; 1080 struct buf *bp; 1081 ufs_daddr_t bpref; 1082 { 1083 struct fs *fs; 1084 struct cg *cgp; 1085 ufs_daddr_t bno, blkno; 1086 int cylno, pos, delta; 1087 short *cylbp; 1088 int i; 1089 u_int8_t *blksfree; 1090 1091 fs = ip->i_fs; 1092 cgp = (struct cg *)bp->b_data; 1093 blksfree = cg_blksfree(cgp); 1094 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 1095 bpref = cgp->cg_rotor; 1096 goto norot; 1097 } 1098 bpref = blknum(fs, bpref); 1099 bpref = dtogd(fs, bpref); 1100 /* 1101 * if the requested block is available, use it 1102 */ 1103 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) { 1104 bno = bpref; 1105 goto gotit; 1106 } 1107 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { 1108 /* 1109 * Block layout information is not available. 1110 * Leaving bpref unchanged means we take the 1111 * next available free block following the one 1112 * we just allocated. Hopefully this will at 1113 * least hit a track cache on drives of unknown 1114 * geometry (e.g. SCSI). 1115 */ 1116 goto norot; 1117 } 1118 /* 1119 * check for a block available on the same cylinder 1120 */ 1121 cylno = cbtocylno(fs, bpref); 1122 if (cg_blktot(cgp)[cylno] == 0) 1123 goto norot; 1124 /* 1125 * check the summary information to see if a block is 1126 * available in the requested cylinder starting at the 1127 * requested rotational position and proceeding around. 1128 */ 1129 cylbp = cg_blks(fs, cgp, cylno); 1130 pos = cbtorpos(fs, bpref); 1131 for (i = pos; i < fs->fs_nrpos; i++) 1132 if (cylbp[i] > 0) 1133 break; 1134 if (i == fs->fs_nrpos) 1135 for (i = 0; i < pos; i++) 1136 if (cylbp[i] > 0) 1137 break; 1138 if (cylbp[i] > 0) { 1139 /* 1140 * found a rotational position, now find the actual 1141 * block. A panic if none is actually there. 1142 */ 1143 pos = cylno % fs->fs_cpc; 1144 bno = (cylno - pos) * fs->fs_spc / NSPB(fs); 1145 if (fs_postbl(fs, pos)[i] == -1) { 1146 printf("pos = %d, i = %d, fs = %s\n", 1147 pos, i, fs->fs_fsmnt); 1148 panic("ffs_alloccgblk: cyl groups corrupted"); 1149 } 1150 for (i = fs_postbl(fs, pos)[i];; ) { 1151 if (ffs_isblock(fs, blksfree, bno + i)) { 1152 bno = blkstofrags(fs, (bno + i)); 1153 goto gotit; 1154 } 1155 delta = fs_rotbl(fs)[i]; 1156 if (delta <= 0 || 1157 delta + i > fragstoblks(fs, fs->fs_fpg)) 1158 break; 1159 i += delta; 1160 } 1161 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); 1162 panic("ffs_alloccgblk: can't find blk in cyl"); 1163 } 1164 norot: 1165 /* 1166 * no blocks in the requested cylinder, so take next 1167 * available one in this cylinder group. 1168 */ 1169 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1170 if (bno < 0) 1171 return (0); 1172 cgp->cg_rotor = bno; 1173 gotit: 1174 blkno = fragstoblks(fs, bno); 1175 ffs_clrblock(fs, blksfree, (long)blkno); 1176 ffs_clusteracct(fs, cgp, blkno, -1); 1177 cgp->cg_cs.cs_nbfree--; 1178 fs->fs_cstotal.cs_nbfree--; 1179 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 1180 cylno = cbtocylno(fs, bno); 1181 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; 1182 cg_blktot(cgp)[cylno]--; 1183 fs->fs_fmod = 1; 1184 blkno = cgp->cg_cgx * fs->fs_fpg + bno; 1185 if (DOINGSOFTDEP(ITOV(ip))) 1186 softdep_setup_blkmapdep(bp, fs, blkno); 1187 return (blkno); 1188 } 1189 1190 /* 1191 * Determine whether a cluster can be allocated. 1192 * 1193 * We do not currently check for optimal rotational layout if there 1194 * are multiple choices in the same cylinder group. Instead we just 1195 * take the first one that we find following bpref. 1196 */ 1197 static ufs_daddr_t 1198 ffs_clusteralloc(ip, cg, bpref, len) 1199 struct inode *ip; 1200 int cg; 1201 ufs_daddr_t bpref; 1202 int len; 1203 { 1204 struct fs *fs; 1205 struct cg *cgp; 1206 struct buf *bp; 1207 int i, got, run, bno, bit, map; 1208 u_char *mapp; 1209 int32_t *lp; 1210 u_int8_t *blksfree; 1211 1212 fs = ip->i_fs; 1213 if (fs->fs_maxcluster[cg] < len) 1214 return (0); 1215 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, 1216 &bp)) 1217 goto fail; 1218 cgp = (struct cg *)bp->b_data; 1219 if (!cg_chkmagic(cgp)) 1220 goto fail; 1221 bp->b_xflags |= BX_BKGRDWRITE; 1222 /* 1223 * Check to see if a cluster of the needed size (or bigger) is 1224 * available in this cylinder group. 1225 */ 1226 lp = &cg_clustersum(cgp)[len]; 1227 for (i = len; i <= fs->fs_contigsumsize; i++) 1228 if (*lp++ > 0) 1229 break; 1230 if (i > fs->fs_contigsumsize) { 1231 /* 1232 * This is the first time looking for a cluster in this 1233 * cylinder group. Update the cluster summary information 1234 * to reflect the true maximum sized cluster so that 1235 * future cluster allocation requests can avoid reading 1236 * the cylinder group map only to find no clusters. 1237 */ 1238 lp = &cg_clustersum(cgp)[len - 1]; 1239 for (i = len - 1; i > 0; i--) 1240 if (*lp-- > 0) 1241 break; 1242 fs->fs_maxcluster[cg] = i; 1243 goto fail; 1244 } 1245 /* 1246 * Search the cluster map to find a big enough cluster. 1247 * We take the first one that we find, even if it is larger 1248 * than we need as we prefer to get one close to the previous 1249 * block allocation. We do not search before the current 1250 * preference point as we do not want to allocate a block 1251 * that is allocated before the previous one (as we will 1252 * then have to wait for another pass of the elevator 1253 * algorithm before it will be read). We prefer to fail and 1254 * be recalled to try an allocation in the next cylinder group. 1255 */ 1256 if (dtog(fs, bpref) != cg) 1257 bpref = 0; 1258 else 1259 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); 1260 mapp = &cg_clustersfree(cgp)[bpref / NBBY]; 1261 map = *mapp++; 1262 bit = 1 << (bpref % NBBY); 1263 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { 1264 if ((map & bit) == 0) { 1265 run = 0; 1266 } else { 1267 run++; 1268 if (run == len) 1269 break; 1270 } 1271 if ((got & (NBBY - 1)) != (NBBY - 1)) { 1272 bit <<= 1; 1273 } else { 1274 map = *mapp++; 1275 bit = 1; 1276 } 1277 } 1278 if (got >= cgp->cg_nclusterblks) 1279 goto fail; 1280 /* 1281 * Allocate the cluster that we have found. 1282 */ 1283 blksfree = cg_blksfree(cgp); 1284 for (i = 1; i <= len; i++) 1285 if (!ffs_isblock(fs, blksfree, got - run + i)) 1286 panic("ffs_clusteralloc: map mismatch"); 1287 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); 1288 if (dtog(fs, bno) != cg) 1289 panic("ffs_clusteralloc: allocated out of group"); 1290 len = blkstofrags(fs, len); 1291 for (i = 0; i < len; i += fs->fs_frag) 1292 if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) 1293 panic("ffs_clusteralloc: lost block"); 1294 bdwrite(bp); 1295 return (bno); 1296 1297 fail: 1298 brelse(bp); 1299 return (0); 1300 } 1301 1302 /* 1303 * Determine whether an inode can be allocated. 1304 * 1305 * Check to see if an inode is available, and if it is, 1306 * allocate it using the following policy: 1307 * 1) allocate the requested inode. 1308 * 2) allocate the next available inode after the requested 1309 * inode in the specified cylinder group. 1310 */ 1311 static ino_t 1312 ffs_nodealloccg(ip, cg, ipref, mode) 1313 struct inode *ip; 1314 int cg; 1315 ufs_daddr_t ipref; 1316 int mode; 1317 { 1318 struct fs *fs; 1319 struct cg *cgp; 1320 struct buf *bp; 1321 u_int8_t *inosused; 1322 int error, start, len, loc, map, i; 1323 1324 fs = ip->i_fs; 1325 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1326 return (0); 1327 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1328 (int)fs->fs_cgsize, &bp); 1329 if (error) { 1330 brelse(bp); 1331 return (0); 1332 } 1333 cgp = (struct cg *)bp->b_data; 1334 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { 1335 brelse(bp); 1336 return (0); 1337 } 1338 bp->b_xflags |= BX_BKGRDWRITE; 1339 cgp->cg_time = time_second; 1340 inosused = cg_inosused(cgp); 1341 if (ipref) { 1342 ipref %= fs->fs_ipg; 1343 if (isclr(inosused, ipref)) 1344 goto gotit; 1345 } 1346 start = cgp->cg_irotor / NBBY; 1347 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); 1348 loc = skpc(0xff, len, &inosused[start]); 1349 if (loc == 0) { 1350 len = start + 1; 1351 start = 0; 1352 loc = skpc(0xff, len, &inosused[0]); 1353 if (loc == 0) { 1354 printf("cg = %d, irotor = %ld, fs = %s\n", 1355 cg, (long)cgp->cg_irotor, fs->fs_fsmnt); 1356 panic("ffs_nodealloccg: map corrupted"); 1357 /* NOTREACHED */ 1358 } 1359 } 1360 i = start + len - loc; 1361 map = inosused[i]; 1362 ipref = i * NBBY; 1363 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1364 if ((map & i) == 0) { 1365 cgp->cg_irotor = ipref; 1366 goto gotit; 1367 } 1368 } 1369 printf("fs = %s\n", fs->fs_fsmnt); 1370 panic("ffs_nodealloccg: block not in map"); 1371 /* NOTREACHED */ 1372 gotit: 1373 if (DOINGSOFTDEP(ITOV(ip))) 1374 softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); 1375 setbit(inosused, ipref); 1376 cgp->cg_cs.cs_nifree--; 1377 fs->fs_cstotal.cs_nifree--; 1378 fs->fs_cs(fs, cg).cs_nifree--; 1379 fs->fs_fmod = 1; 1380 if ((mode & IFMT) == IFDIR) { 1381 cgp->cg_cs.cs_ndir++; 1382 fs->fs_cstotal.cs_ndir++; 1383 fs->fs_cs(fs, cg).cs_ndir++; 1384 } 1385 bdwrite(bp); 1386 return (cg * fs->fs_ipg + ipref); 1387 } 1388 1389 /* 1390 * Free a block or fragment. 1391 * 1392 * The specified block or fragment is placed back in the 1393 * free map. If a fragment is deallocated, a possible 1394 * block reassembly is checked. 1395 */ 1396 void 1397 ffs_blkfree(ip, bno, size) 1398 struct inode *ip; 1399 ufs_daddr_t bno; 1400 long size; 1401 { 1402 struct fs *fs; 1403 struct cg *cgp; 1404 struct buf *bp; 1405 ufs_daddr_t blkno; 1406 int i, error, cg, blk, frags, bbase; 1407 u_int8_t *blksfree; 1408 1409 fs = ip->i_fs; 1410 VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size); 1411 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 1412 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 1413 printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", 1414 devtoname(ip->i_dev), (long)bno, (long)fs->fs_bsize, size, 1415 fs->fs_fsmnt); 1416 panic("ffs_blkfree: bad size"); 1417 } 1418 cg = dtog(fs, bno); 1419 if ((u_int)bno >= fs->fs_size) { 1420 printf("bad block %ld, ino %lu\n", 1421 (long)bno, (u_long)ip->i_number); 1422 ffs_fserr(fs, ip->i_uid, "bad block"); 1423 return; 1424 } 1425 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1426 (int)fs->fs_cgsize, &bp); 1427 if (error) { 1428 brelse(bp); 1429 return; 1430 } 1431 cgp = (struct cg *)bp->b_data; 1432 if (!cg_chkmagic(cgp)) { 1433 brelse(bp); 1434 return; 1435 } 1436 bp->b_xflags |= BX_BKGRDWRITE; 1437 cgp->cg_time = time_second; 1438 bno = dtogd(fs, bno); 1439 blksfree = cg_blksfree(cgp); 1440 if (size == fs->fs_bsize) { 1441 blkno = fragstoblks(fs, bno); 1442 if (!ffs_isfreeblock(fs, blksfree, blkno)) { 1443 printf("dev = %s, block = %ld, fs = %s\n", 1444 devtoname(ip->i_dev), (long)bno, fs->fs_fsmnt); 1445 panic("ffs_blkfree: freeing free block"); 1446 } 1447 ffs_setblock(fs, blksfree, blkno); 1448 ffs_clusteracct(fs, cgp, blkno, 1); 1449 cgp->cg_cs.cs_nbfree++; 1450 fs->fs_cstotal.cs_nbfree++; 1451 fs->fs_cs(fs, cg).cs_nbfree++; 1452 i = cbtocylno(fs, bno); 1453 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; 1454 cg_blktot(cgp)[i]++; 1455 } else { 1456 bbase = bno - fragnum(fs, bno); 1457 /* 1458 * decrement the counts associated with the old frags 1459 */ 1460 blk = blkmap(fs, blksfree, bbase); 1461 ffs_fragacct(fs, blk, cgp->cg_frsum, -1); 1462 /* 1463 * deallocate the fragment 1464 */ 1465 frags = numfrags(fs, size); 1466 for (i = 0; i < frags; i++) { 1467 if (isset(blksfree, bno + i)) { 1468 printf("dev = %s, block = %ld, fs = %s\n", 1469 devtoname(ip->i_dev), (long)(bno + i), 1470 fs->fs_fsmnt); 1471 panic("ffs_blkfree: freeing free frag"); 1472 } 1473 setbit(blksfree, bno + i); 1474 } 1475 cgp->cg_cs.cs_nffree += i; 1476 fs->fs_cstotal.cs_nffree += i; 1477 fs->fs_cs(fs, cg).cs_nffree += i; 1478 /* 1479 * add back in counts associated with the new frags 1480 */ 1481 blk = blkmap(fs, blksfree, bbase); 1482 ffs_fragacct(fs, blk, cgp->cg_frsum, 1); 1483 /* 1484 * if a complete block has been reassembled, account for it 1485 */ 1486 blkno = fragstoblks(fs, bbase); 1487 if (ffs_isblock(fs, blksfree, blkno)) { 1488 cgp->cg_cs.cs_nffree -= fs->fs_frag; 1489 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1490 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1491 ffs_clusteracct(fs, cgp, blkno, 1); 1492 cgp->cg_cs.cs_nbfree++; 1493 fs->fs_cstotal.cs_nbfree++; 1494 fs->fs_cs(fs, cg).cs_nbfree++; 1495 i = cbtocylno(fs, bbase); 1496 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; 1497 cg_blktot(cgp)[i]++; 1498 } 1499 } 1500 fs->fs_fmod = 1; 1501 bdwrite(bp); 1502 } 1503 1504 #ifdef DIAGNOSTIC 1505 /* 1506 * Verify allocation of a block or fragment. Returns true if block or 1507 * fragment is allocated, false if it is free. 1508 */ 1509 static int 1510 ffs_checkblk(ip, bno, size) 1511 struct inode *ip; 1512 ufs_daddr_t bno; 1513 long size; 1514 { 1515 struct fs *fs; 1516 struct cg *cgp; 1517 struct buf *bp; 1518 int i, error, frags, free; 1519 u_int8_t *blksfree; 1520 1521 fs = ip->i_fs; 1522 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 1523 printf("bsize = %ld, size = %ld, fs = %s\n", 1524 (long)fs->fs_bsize, size, fs->fs_fsmnt); 1525 panic("ffs_checkblk: bad size"); 1526 } 1527 if ((u_int)bno >= fs->fs_size) 1528 panic("ffs_checkblk: bad block %d", bno); 1529 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), 1530 (int)fs->fs_cgsize, &bp); 1531 if (error) 1532 panic("ffs_checkblk: cg bread failed"); 1533 cgp = (struct cg *)bp->b_data; 1534 if (!cg_chkmagic(cgp)) 1535 panic("ffs_checkblk: cg magic mismatch"); 1536 bp->b_xflags |= BX_BKGRDWRITE; 1537 blksfree = cg_blksfree(cgp); 1538 bno = dtogd(fs, bno); 1539 if (size == fs->fs_bsize) { 1540 free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno)); 1541 } else { 1542 frags = numfrags(fs, size); 1543 for (free = 0, i = 0; i < frags; i++) 1544 if (isset(blksfree, bno + i)) 1545 free++; 1546 if (free != 0 && free != frags) 1547 panic("ffs_checkblk: partially free fragment"); 1548 } 1549 brelse(bp); 1550 return (!free); 1551 } 1552 #endif /* DIAGNOSTIC */ 1553 1554 /* 1555 * Free an inode. 1556 */ 1557 int 1558 ffs_vfree( pvp, ino, mode) 1559 struct vnode *pvp; 1560 ino_t ino; 1561 int mode; 1562 { 1563 if (DOINGSOFTDEP(pvp)) { 1564 softdep_freefile(pvp, ino, mode); 1565 return (0); 1566 } 1567 return (ffs_freefile(pvp, ino, mode)); 1568 } 1569 1570 /* 1571 * Do the actual free operation. 1572 * The specified inode is placed back in the free map. 1573 */ 1574 int 1575 ffs_freefile( pvp, ino, mode) 1576 struct vnode *pvp; 1577 ino_t ino; 1578 int mode; 1579 { 1580 struct fs *fs; 1581 struct cg *cgp; 1582 struct inode *pip; 1583 struct buf *bp; 1584 int error, cg; 1585 u_int8_t *inosused; 1586 1587 pip = VTOI(pvp); 1588 fs = pip->i_fs; 1589 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1590 panic("ffs_vfree: range: dev = (%d,%d), ino = %d, fs = %s", 1591 major(pip->i_dev), minor(pip->i_dev), ino, fs->fs_fsmnt); 1592 cg = ino_to_cg(fs, ino); 1593 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1594 (int)fs->fs_cgsize, &bp); 1595 if (error) { 1596 brelse(bp); 1597 return (error); 1598 } 1599 cgp = (struct cg *)bp->b_data; 1600 if (!cg_chkmagic(cgp)) { 1601 brelse(bp); 1602 return (0); 1603 } 1604 bp->b_xflags |= BX_BKGRDWRITE; 1605 cgp->cg_time = time_second; 1606 inosused = cg_inosused(cgp); 1607 ino %= fs->fs_ipg; 1608 if (isclr(inosused, ino)) { 1609 printf("dev = %s, ino = %lu, fs = %s\n", 1610 devtoname(pip->i_dev), (u_long)ino, fs->fs_fsmnt); 1611 if (fs->fs_ronly == 0) 1612 panic("ffs_vfree: freeing free inode"); 1613 } 1614 clrbit(inosused, ino); 1615 if (ino < cgp->cg_irotor) 1616 cgp->cg_irotor = ino; 1617 cgp->cg_cs.cs_nifree++; 1618 fs->fs_cstotal.cs_nifree++; 1619 fs->fs_cs(fs, cg).cs_nifree++; 1620 if ((mode & IFMT) == IFDIR) { 1621 cgp->cg_cs.cs_ndir--; 1622 fs->fs_cstotal.cs_ndir--; 1623 fs->fs_cs(fs, cg).cs_ndir--; 1624 } 1625 fs->fs_fmod = 1; 1626 bdwrite(bp); 1627 return (0); 1628 } 1629 1630 /* 1631 * Find a block of the specified size in the specified cylinder group. 1632 * 1633 * It is a panic if a request is made to find a block if none are 1634 * available. 1635 */ 1636 static ufs_daddr_t 1637 ffs_mapsearch(fs, cgp, bpref, allocsiz) 1638 struct fs *fs; 1639 struct cg *cgp; 1640 ufs_daddr_t bpref; 1641 int allocsiz; 1642 { 1643 ufs_daddr_t bno; 1644 int start, len, loc, i; 1645 int blk, field, subfield, pos; 1646 u_int8_t *blksfree; 1647 1648 /* 1649 * find the fragment by searching through the free block 1650 * map for an appropriate bit pattern 1651 */ 1652 if (bpref) 1653 start = dtogd(fs, bpref) / NBBY; 1654 else 1655 start = cgp->cg_frotor / NBBY; 1656 blksfree = cg_blksfree(cgp); 1657 len = howmany(fs->fs_fpg, NBBY) - start; 1658 loc = scanc((u_int)len, (u_char *)&blksfree[start], 1659 (u_char *)fragtbl[fs->fs_frag], 1660 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1661 if (loc == 0) { 1662 len = start + 1; 1663 start = 0; 1664 loc = scanc((u_int)len, (u_char *)&blksfree[0], 1665 (u_char *)fragtbl[fs->fs_frag], 1666 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 1667 if (loc == 0) { 1668 printf("start = %d, len = %d, fs = %s\n", 1669 start, len, fs->fs_fsmnt); 1670 panic("ffs_alloccg: map corrupted"); 1671 /* NOTREACHED */ 1672 } 1673 } 1674 bno = (start + len - loc) * NBBY; 1675 cgp->cg_frotor = bno; 1676 /* 1677 * found the byte in the map 1678 * sift through the bits to find the selected frag 1679 */ 1680 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1681 blk = blkmap(fs, blksfree, bno); 1682 blk <<= 1; 1683 field = around[allocsiz]; 1684 subfield = inside[allocsiz]; 1685 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1686 if ((blk & field) == subfield) 1687 return (bno + pos); 1688 field <<= 1; 1689 subfield <<= 1; 1690 } 1691 } 1692 printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt); 1693 panic("ffs_alloccg: block not in map"); 1694 return (-1); 1695 } 1696 1697 /* 1698 * Update the cluster map because of an allocation or free. 1699 * 1700 * Cnt == 1 means free; cnt == -1 means allocating. 1701 */ 1702 static void 1703 ffs_clusteracct(fs, cgp, blkno, cnt) 1704 struct fs *fs; 1705 struct cg *cgp; 1706 ufs_daddr_t blkno; 1707 int cnt; 1708 { 1709 int32_t *sump; 1710 int32_t *lp; 1711 u_char *freemapp, *mapp; 1712 int i, start, end, forw, back, map, bit; 1713 1714 if (fs->fs_contigsumsize <= 0) 1715 return; 1716 freemapp = cg_clustersfree(cgp); 1717 sump = cg_clustersum(cgp); 1718 /* 1719 * Allocate or clear the actual block. 1720 */ 1721 if (cnt > 0) 1722 setbit(freemapp, blkno); 1723 else 1724 clrbit(freemapp, blkno); 1725 /* 1726 * Find the size of the cluster going forward. 1727 */ 1728 start = blkno + 1; 1729 end = start + fs->fs_contigsumsize; 1730 if (end >= cgp->cg_nclusterblks) 1731 end = cgp->cg_nclusterblks; 1732 mapp = &freemapp[start / NBBY]; 1733 map = *mapp++; 1734 bit = 1 << (start % NBBY); 1735 for (i = start; i < end; i++) { 1736 if ((map & bit) == 0) 1737 break; 1738 if ((i & (NBBY - 1)) != (NBBY - 1)) { 1739 bit <<= 1; 1740 } else { 1741 map = *mapp++; 1742 bit = 1; 1743 } 1744 } 1745 forw = i - start; 1746 /* 1747 * Find the size of the cluster going backward. 1748 */ 1749 start = blkno - 1; 1750 end = start - fs->fs_contigsumsize; 1751 if (end < 0) 1752 end = -1; 1753 mapp = &freemapp[start / NBBY]; 1754 map = *mapp--; 1755 bit = 1 << (start % NBBY); 1756 for (i = start; i > end; i--) { 1757 if ((map & bit) == 0) 1758 break; 1759 if ((i & (NBBY - 1)) != 0) { 1760 bit >>= 1; 1761 } else { 1762 map = *mapp--; 1763 bit = 1 << (NBBY - 1); 1764 } 1765 } 1766 back = start - i; 1767 /* 1768 * Account for old cluster and the possibly new forward and 1769 * back clusters. 1770 */ 1771 i = back + forw + 1; 1772 if (i > fs->fs_contigsumsize) 1773 i = fs->fs_contigsumsize; 1774 sump[i] += cnt; 1775 if (back > 0) 1776 sump[back] -= cnt; 1777 if (forw > 0) 1778 sump[forw] -= cnt; 1779 /* 1780 * Update cluster summary information. 1781 */ 1782 lp = &sump[fs->fs_contigsumsize]; 1783 for (i = fs->fs_contigsumsize; i > 0; i--) 1784 if (*lp-- > 0) 1785 break; 1786 fs->fs_maxcluster[cgp->cg_cgx] = i; 1787 } 1788 1789 /* 1790 * Fserr prints the name of a file system with an error diagnostic. 1791 * 1792 * The form of the error message is: 1793 * fs: error message 1794 */ 1795 static void 1796 ffs_fserr(fs, uid, cp) 1797 struct fs *fs; 1798 u_int uid; 1799 char *cp; 1800 { 1801 struct thread *td = curthread; 1802 struct proc *p; 1803 1804 if ((p = td->td_proc) != NULL) { 1805 log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1, 1806 p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp); 1807 } else { 1808 log(LOG_ERR, "system thread %p, uid %d on %s: %s\n", 1809 td, uid, fs->fs_fsmnt, cp); 1810 } 1811 } 1812