1 /* $OpenBSD: ext2fs_alloc.c,v 1.12 2003/07/06 05:24:16 tedu Exp $ */ 2 /* $NetBSD: ext2fs_alloc.c,v 1.10 2001/07/05 08:38:27 toshii Exp $ */ 3 4 /* 5 * Copyright (c) 1997 Manuel Bouyer. 6 * Copyright (c) 1982, 1986, 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_alloc.c 8.11 (Berkeley) 10/27/94 34 * Modified for ext2fs by Manuel Bouyer. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/vnode.h> 42 #include <sys/mount.h> 43 #include <sys/kernel.h> 44 #include <sys/syslog.h> 45 46 #include <ufs/ufs/quota.h> 47 #include <ufs/ufs/inode.h> 48 #include <ufs/ufs/ufs_extern.h> 49 50 #include <ufs/ext2fs/ext2fs.h> 51 #include <ufs/ext2fs/ext2fs_extern.h> 52 53 u_long ext2gennumber; 54 55 static ufs1_daddr_t ext2fs_alloccg(struct inode *, int, ufs1_daddr_t, int); 56 static u_long ext2fs_dirpref(struct m_ext2fs *); 57 static void ext2fs_fserr(struct m_ext2fs *, u_int, char *); 58 static u_long ext2fs_hashalloc(struct inode *, int, long, int, 59 ufs1_daddr_t (*)(struct inode *, int, ufs1_daddr_t, int)); 60 static ufs1_daddr_t ext2fs_nodealloccg(struct inode *, int, ufs1_daddr_t, int); 61 static ufs1_daddr_t ext2fs_mapsearch(struct m_ext2fs *, char *, ufs1_daddr_t); 62 63 /* 64 * Allocate a block in the file system. 65 * 66 * A preference may be optionally specified. If a preference is given 67 * the following hierarchy is used to allocate a block: 68 * 1) allocate the requested block. 69 * 2) allocate a rotationally optimal block in the same cylinder. 70 * 3) allocate a block in the same cylinder group. 71 * 4) quadradically rehash into other cylinder groups, until an 72 * available block is located. 73 * If no block preference is given the following heirarchy is used 74 * to allocate a block: 75 * 1) allocate a block in the cylinder group that contains the 76 * inode for the file. 77 * 2) quadradically rehash into other cylinder groups, until an 78 * available block is located. 79 */ 80 int 81 ext2fs_alloc(ip, lbn, bpref, cred, bnp) 82 struct inode *ip; 83 ufs1_daddr_t lbn, bpref; 84 struct ucred *cred; 85 ufs1_daddr_t *bnp; 86 { 87 struct m_ext2fs *fs; 88 ufs1_daddr_t bno; 89 int cg; 90 91 *bnp = 0; 92 fs = ip->i_e2fs; 93 #ifdef DIAGNOSTIC 94 if (cred == NOCRED) 95 panic("ext2fs_alloc: missing credential"); 96 #endif /* DIAGNOSTIC */ 97 if (fs->e2fs.e2fs_fbcount == 0) 98 goto nospace; 99 if (cred->cr_uid != 0 && freespace(fs) <= 0) 100 goto nospace; 101 if (bpref >= fs->e2fs.e2fs_bcount) 102 bpref = 0; 103 if (bpref == 0) 104 cg = ino_to_cg(fs, ip->i_number); 105 else 106 cg = dtog(fs, bpref); 107 bno = (ufs1_daddr_t)ext2fs_hashalloc(ip, cg, bpref, fs->e2fs_bsize, 108 ext2fs_alloccg); 109 if (bno > 0) { 110 ip->i_e2fs_nblock += btodb(fs->e2fs_bsize); 111 ip->i_flag |= IN_CHANGE | IN_UPDATE; 112 *bnp = bno; 113 return (0); 114 } 115 nospace: 116 ext2fs_fserr(fs, cred->cr_uid, "file system full"); 117 uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); 118 return (ENOSPC); 119 } 120 121 /* 122 * Allocate an inode in the file system. 123 * 124 * If allocating a directory, use ext2fs_dirpref to select the inode. 125 * If allocating in a directory, the following hierarchy is followed: 126 * 1) allocate the preferred inode. 127 * 2) allocate an inode in the same cylinder group. 128 * 3) quadradically rehash into other cylinder groups, until an 129 * available inode is located. 130 * If no inode preference is given the following heirarchy is used 131 * to allocate an inode: 132 * 1) allocate an inode in cylinder group 0. 133 * 2) quadradically rehash into other cylinder groups, until an 134 * available inode is located. 135 */ 136 int 137 ext2fs_inode_alloc(struct inode *pip, int mode, struct ucred *cred, 138 struct vnode **vpp) 139 { 140 struct vnode *pvp; 141 struct m_ext2fs *fs; 142 struct inode *ip; 143 ino_t ino, ipref; 144 int cg, error; 145 146 *vpp = NULL; 147 pvp = ITOV(pip); 148 fs = pip->i_e2fs; 149 if (fs->e2fs.e2fs_ficount == 0) 150 goto noinodes; 151 152 if ((mode & IFMT) == IFDIR) 153 cg = ext2fs_dirpref(fs); 154 else 155 cg = ino_to_cg(fs, pip->i_number); 156 ipref = cg * fs->e2fs.e2fs_ipg + 1; 157 ino = (ino_t)ext2fs_hashalloc(pip, cg, (long)ipref, mode, ext2fs_nodealloccg); 158 if (ino == 0) 159 goto noinodes; 160 error = VFS_VGET(pvp->v_mount, ino, vpp); 161 if (error) { 162 ext2fs_inode_free(pip, ino, mode); 163 return (error); 164 } 165 ip = VTOI(*vpp); 166 if (ip->i_e2fs_mode && ip->i_e2fs_nlink != 0) { 167 printf("mode = 0%o, nlinks %d, inum = %d, fs = %s\n", 168 ip->i_e2fs_mode, ip->i_e2fs_nlink, ip->i_number, fs->e2fs_fsmnt); 169 panic("ext2fs_valloc: dup alloc"); 170 } 171 172 bzero(&(ip->i_din.e2fs_din), sizeof(struct ext2fs_dinode)); 173 174 /* 175 * Set up a new generation number for this inode. 176 */ 177 if (++ext2gennumber < (u_long)time.tv_sec) 178 ext2gennumber = time.tv_sec; 179 ip->i_e2fs_gen = ext2gennumber; 180 return (0); 181 noinodes: 182 ext2fs_fserr(fs, cred->cr_uid, "out of inodes"); 183 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); 184 return (ENOSPC); 185 } 186 187 /* 188 * Find a cylinder to place a directory. 189 * 190 * The policy implemented by this algorithm is to select from 191 * among those cylinder groups with above the average number of 192 * free inodes, the one with the smallest number of directories. 193 */ 194 static u_long 195 ext2fs_dirpref(fs) 196 struct m_ext2fs *fs; 197 { 198 int cg, maxspace, mincg, avgifree; 199 200 avgifree = fs->e2fs.e2fs_ficount / fs->e2fs_ncg; 201 maxspace = 0; 202 mincg = -1; 203 for (cg = 0; cg < fs->e2fs_ncg; cg++) 204 if ( fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) { 205 if (mincg == -1 || fs->e2fs_gd[cg].ext2bgd_nbfree > maxspace) { 206 mincg = cg; 207 maxspace = fs->e2fs_gd[cg].ext2bgd_nbfree; 208 } 209 } 210 return mincg; 211 } 212 213 /* 214 * Select the desired position for the next block in a file. The file is 215 * logically divided into sections. The first section is composed of the 216 * direct blocks. Each additional section contains fs_maxbpg blocks. 217 * 218 * If no blocks have been allocated in the first section, the policy is to 219 * request a block in the same cylinder group as the inode that describes 220 * the file. Otherwise, the policy is to try to allocate the blocks 221 * contigously. The two fields of the ext2 inode extension (see 222 * ufs/ufs/inode.h) help this. 223 */ 224 ufs1_daddr_t 225 ext2fs_blkpref(ip, lbn, indx, bap) 226 struct inode *ip; 227 ufs1_daddr_t lbn; 228 int indx; 229 ufs1_daddr_t *bap; 230 { 231 struct m_ext2fs *fs; 232 int cg, i; 233 234 fs = ip->i_e2fs; 235 /* 236 * if we are doing contigous lbn allocation, try to alloc blocks 237 * contigously on disk 238 */ 239 240 if ( ip->i_e2fs_last_blk && lbn == ip->i_e2fs_last_lblk + 1) { 241 return ip->i_e2fs_last_blk + 1; 242 } 243 244 /* 245 * bap, if provided, gives us a list of blocks to which we want to 246 * stay close 247 */ 248 249 if (bap) { 250 for (i = indx; i >= 0 ; i--) { 251 if (bap[i]) { 252 return fs2h32(bap[i]) + 1; 253 } 254 } 255 } 256 257 /* fall back to the first block of the cylinder containing the inode */ 258 259 cg = ino_to_cg(fs, ip->i_number); 260 return fs->e2fs.e2fs_bpg * cg + fs->e2fs.e2fs_first_dblock + 1; 261 } 262 263 /* 264 * Implement the cylinder overflow algorithm. 265 * 266 * The policy implemented by this algorithm is: 267 * 1) allocate the block in its requested cylinder group. 268 * 2) quadradically rehash on the cylinder group number. 269 * 3) brute force search for a free block. 270 */ 271 static u_long 272 ext2fs_hashalloc(ip, cg, pref, size, allocator) 273 struct inode *ip; 274 int cg; 275 long pref; 276 int size; /* size for data blocks, mode for inodes */ 277 ufs1_daddr_t (*allocator)(struct inode *, int, ufs1_daddr_t, int); 278 { 279 struct m_ext2fs *fs; 280 long result; 281 int i, icg = cg; 282 283 fs = ip->i_e2fs; 284 /* 285 * 1: preferred cylinder group 286 */ 287 result = (*allocator)(ip, cg, pref, size); 288 if (result) 289 return (result); 290 /* 291 * 2: quadratic rehash 292 */ 293 for (i = 1; i < fs->e2fs_ncg; i *= 2) { 294 cg += i; 295 if (cg >= fs->e2fs_ncg) 296 cg -= fs->e2fs_ncg; 297 result = (*allocator)(ip, cg, 0, size); 298 if (result) 299 return (result); 300 } 301 /* 302 * 3: brute force search 303 * Note that we start at i == 2, since 0 was checked initially, 304 * and 1 is always checked in the quadratic rehash. 305 */ 306 cg = (icg + 2) % fs->e2fs_ncg; 307 for (i = 2; i < fs->e2fs_ncg; i++) { 308 result = (*allocator)(ip, cg, 0, size); 309 if (result) 310 return (result); 311 cg++; 312 if (cg == fs->e2fs_ncg) 313 cg = 0; 314 } 315 return (0); 316 } 317 318 /* 319 * Determine whether a block can be allocated. 320 * 321 * Check to see if a block of the appropriate size is available, 322 * and if it is, allocate it. 323 */ 324 325 static ufs1_daddr_t 326 ext2fs_alloccg(ip, cg, bpref, size) 327 struct inode *ip; 328 int cg; 329 ufs1_daddr_t bpref; 330 int size; 331 { 332 struct m_ext2fs *fs; 333 char *bbp; 334 struct buf *bp; 335 int error, bno, start, end, loc; 336 337 fs = ip->i_e2fs; 338 if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) 339 return (0); 340 error = bread(ip->i_devvp, fsbtodb(fs, 341 fs->e2fs_gd[cg].ext2bgd_b_bitmap), 342 (int)fs->e2fs_bsize, NOCRED, &bp); 343 if (error) { 344 brelse(bp); 345 return (0); 346 } 347 bbp = (char *)bp->b_data; 348 349 if (dtog(fs, bpref) != cg) 350 bpref = 0; 351 if (bpref != 0) { 352 bpref = dtogd(fs, bpref); 353 /* 354 * if the requested block is available, use it 355 */ 356 if (isclr(bbp, bpref)) { 357 bno = bpref; 358 goto gotit; 359 } 360 } 361 /* 362 * no blocks in the requested cylinder, so take next 363 * available one in this cylinder group. 364 * first try to get 8 contigous blocks, then fall back to a single 365 * block. 366 */ 367 if (bpref) 368 start = dtogd(fs, bpref) / NBBY; 369 else 370 start = 0; 371 end = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 372 for (loc = start; loc < end; loc++) { 373 if (bbp[loc] == 0) { 374 bno = loc * NBBY; 375 goto gotit; 376 } 377 } 378 for (loc = 0; loc < start; loc++) { 379 if (bbp[loc] == 0) { 380 bno = loc * NBBY; 381 goto gotit; 382 } 383 } 384 385 bno = ext2fs_mapsearch(fs, bbp, bpref); 386 if (bno < 0) 387 return (0); 388 gotit: 389 #ifdef DIAGNOSTIC 390 if (isset(bbp, (long)bno)) { 391 printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", 392 cg, bno, fs->e2fs_fsmnt); 393 panic("ext2fs_alloccg: dup alloc"); 394 } 395 #endif 396 setbit(bbp, (long)bno); 397 fs->e2fs.e2fs_fbcount--; 398 fs->e2fs_gd[cg].ext2bgd_nbfree--; 399 fs->e2fs_fmod = 1; 400 bdwrite(bp); 401 return (cg * fs->e2fs.e2fs_fpg + fs->e2fs.e2fs_first_dblock + bno); 402 } 403 404 /* 405 * Determine whether an inode can be allocated. 406 * 407 * Check to see if an inode is available, and if it is, 408 * allocate it using the following policy: 409 * 1) allocate the requested inode. 410 * 2) allocate the next available inode after the requested 411 * inode in the specified cylinder group. 412 */ 413 static ufs1_daddr_t 414 ext2fs_nodealloccg(ip, cg, ipref, mode) 415 struct inode *ip; 416 int cg; 417 ufs1_daddr_t ipref; 418 int mode; 419 { 420 struct m_ext2fs *fs; 421 char *ibp; 422 struct buf *bp; 423 int error, start, len, loc, map, i; 424 425 ipref--; /* to avoid a lot of (ipref -1) */ 426 fs = ip->i_e2fs; 427 if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) 428 return (0); 429 error = bread(ip->i_devvp, fsbtodb(fs, 430 fs->e2fs_gd[cg].ext2bgd_i_bitmap), 431 (int)fs->e2fs_bsize, NOCRED, &bp); 432 if (error) { 433 brelse(bp); 434 return (0); 435 } 436 ibp = (char *)bp->b_data; 437 if (ipref) { 438 ipref %= fs->e2fs.e2fs_ipg; 439 if (isclr(ibp, ipref)) 440 goto gotit; 441 } 442 start = ipref / NBBY; 443 len = howmany(fs->e2fs.e2fs_ipg - ipref, NBBY); 444 loc = skpc(0xff, len, &ibp[start]); 445 if (loc == 0) { 446 len = start + 1; 447 start = 0; 448 loc = skpc(0xff, len, &ibp[0]); 449 if (loc == 0) { 450 printf("cg = %d, ipref = %d, fs = %s\n", 451 cg, ipref, fs->e2fs_fsmnt); 452 panic("ext2fs_nodealloccg: map corrupted"); 453 /* NOTREACHED */ 454 } 455 } 456 i = start + len - loc; 457 map = ibp[i]; 458 ipref = i * NBBY; 459 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 460 if ((map & i) == 0) { 461 goto gotit; 462 } 463 } 464 printf("fs = %s\n", fs->e2fs_fsmnt); 465 panic("ext2fs_nodealloccg: block not in map"); 466 /* NOTREACHED */ 467 gotit: 468 setbit(ibp, ipref); 469 fs->e2fs.e2fs_ficount--; 470 fs->e2fs_gd[cg].ext2bgd_nifree--; 471 fs->e2fs_fmod = 1; 472 if ((mode & IFMT) == IFDIR) { 473 fs->e2fs_gd[cg].ext2bgd_ndirs++; 474 } 475 bdwrite(bp); 476 return (cg * fs->e2fs.e2fs_ipg + ipref +1); 477 } 478 479 /* 480 * Free a block. 481 * 482 * The specified block is placed back in the 483 * free map. 484 */ 485 void 486 ext2fs_blkfree(ip, bno) 487 struct inode *ip; 488 ufs1_daddr_t bno; 489 { 490 struct m_ext2fs *fs; 491 char *bbp; 492 struct buf *bp; 493 int error, cg; 494 495 fs = ip->i_e2fs; 496 cg = dtog(fs, bno); 497 if ((u_int)bno >= fs->e2fs.e2fs_bcount) { 498 printf("bad block %d, ino %d\n", bno, ip->i_number); 499 ext2fs_fserr(fs, ip->i_e2fs_uid, "bad block"); 500 return; 501 } 502 error = bread(ip->i_devvp, 503 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), 504 (int)fs->e2fs_bsize, NOCRED, &bp); 505 if (error) { 506 brelse(bp); 507 return; 508 } 509 bbp = (char *)bp->b_data; 510 bno = dtogd(fs, bno); 511 if (isclr(bbp, bno)) { 512 printf("dev = 0x%x, block = %d, fs = %s\n", 513 ip->i_dev, bno, fs->e2fs_fsmnt); 514 panic("blkfree: freeing free block"); 515 } 516 clrbit(bbp, bno); 517 fs->e2fs.e2fs_fbcount++; 518 fs->e2fs_gd[cg].ext2bgd_nbfree++; 519 520 fs->e2fs_fmod = 1; 521 bdwrite(bp); 522 } 523 524 /* 525 * Free an inode. 526 * 527 * The specified inode is placed back in the free map. 528 */ 529 int 530 ext2fs_inode_free(struct inode *pip, ino_t ino, int mode) 531 { 532 register struct m_ext2fs *fs; 533 register char *ibp; 534 struct buf *bp; 535 int error, cg; 536 537 fs = pip->i_e2fs; 538 if ((u_int)ino > fs->e2fs.e2fs_icount || (u_int)ino < EXT2_FIRSTINO) 539 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s", 540 pip->i_dev, ino, fs->e2fs_fsmnt); 541 cg = ino_to_cg(fs, ino); 542 error = bread(pip->i_devvp, 543 fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), 544 (int)fs->e2fs_bsize, NOCRED, &bp); 545 if (error) { 546 brelse(bp); 547 return (0); 548 } 549 ibp = (char *)bp->b_data; 550 ino = (ino - 1) % fs->e2fs.e2fs_ipg; 551 if (isclr(ibp, ino)) { 552 printf("dev = 0x%x, ino = %d, fs = %s\n", 553 pip->i_dev, ino, fs->e2fs_fsmnt); 554 if (fs->e2fs_ronly == 0) 555 panic("ifree: freeing free inode"); 556 } 557 clrbit(ibp, ino); 558 fs->e2fs.e2fs_ficount++; 559 fs->e2fs_gd[cg].ext2bgd_nifree++; 560 if ((mode & IFMT) == IFDIR) { 561 fs->e2fs_gd[cg].ext2bgd_ndirs--; 562 } 563 fs->e2fs_fmod = 1; 564 bdwrite(bp); 565 return (0); 566 } 567 568 /* 569 * Find a block in the specified cylinder group. 570 * 571 * It is a panic if a request is made to find a block if none are 572 * available. 573 */ 574 575 static ufs1_daddr_t 576 ext2fs_mapsearch(fs, bbp, bpref) 577 struct m_ext2fs *fs; 578 char *bbp; 579 ufs1_daddr_t bpref; 580 { 581 ufs1_daddr_t bno; 582 int start, len, loc, i, map; 583 584 /* 585 * find the fragment by searching through the free block 586 * map for an appropriate bit pattern 587 */ 588 if (bpref) 589 start = dtogd(fs, bpref) / NBBY; 590 else 591 start = 0; 592 len = howmany(fs->e2fs.e2fs_fpg, NBBY) - start; 593 loc = skpc(0xff, len, &bbp[start]); 594 if (loc == 0) { 595 len = start + 1; 596 start = 0; 597 loc = skpc(0xff, len, &bbp[start]); 598 if (loc == 0) { 599 printf("start = %d, len = %d, fs = %s\n", 600 start, len, fs->e2fs_fsmnt); 601 panic("ext2fs_alloccg: map corrupted"); 602 /* NOTREACHED */ 603 } 604 } 605 i = start + len - loc; 606 map = bbp[i]; 607 bno = i * NBBY; 608 for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { 609 if ((map & i) == 0) 610 return (bno); 611 } 612 printf("fs = %s\n", fs->e2fs_fsmnt); 613 panic("ext2fs_mapsearch: block not in map"); 614 /* NOTREACHED */ 615 } 616 617 /* 618 * Fserr prints the name of a file system with an error diagnostic. 619 * 620 * The form of the error message is: 621 * fs: error message 622 */ 623 static void 624 ext2fs_fserr(fs, uid, cp) 625 struct m_ext2fs *fs; 626 u_int uid; 627 char *cp; 628 { 629 630 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->e2fs_fsmnt, cp); 631 } 632