1 /* $OpenBSD: ffs_alloc.c,v 1.15 2022/01/11 05:34:33 jsg Exp $ */ 2 /* $NetBSD: ffs_alloc.c,v 1.29 2016/06/24 19:24:11 christos Exp $ */ 3 /* From: NetBSD: ffs_alloc.c,v 1.50 2001/09/06 02:16:01 lukem Exp */ 4 5 /* 6 * Copyright (c) 2002 Networks Associates Technology, Inc. 7 * All rights reserved. 8 * 9 * This software was developed for the FreeBSD Project by Marshall 10 * Kirk McKusick and Network Associates Laboratories, the Security 11 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 12 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 13 * research program 14 * 15 * Copyright (c) 1982, 1986, 1989, 1993 16 * The Regents of the University of California. All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 3. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 43 */ 44 45 #include <sys/param.h> /* DEV_BSIZE setbit clrbit NBBY howmany */ 46 #include <sys/types.h> 47 48 #include <ufs/ufs/dinode.h> 49 #include <ufs/ffs/fs.h> 50 51 #include "ffs/buf.h" 52 #include "ffs/ufs_inode.h" 53 #include "ffs/ffs_extern.h" 54 55 #include <errno.h> 56 57 static int scanc(u_int, const u_char *, const u_char *, int); 58 59 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int); 60 static daddr_t ffs_alloccgblk(struct inode *, struct mkfsbuf *, daddr_t); 61 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, 62 daddr_t (*)(struct inode *, int, daddr_t, int)); 63 static int32_t ffs_mapsearch(struct fs *, struct cg *, daddr_t, int); 64 65 /* 66 * Allocate a block in the file system. 67 * 68 * The size of the requested block is given, which must be some 69 * multiple of fs_fsize and <= fs_bsize. 70 * A preference may be optionally specified. If a preference is given 71 * the following hierarchy is used to allocate a block: 72 * 1) allocate the requested block. 73 * 2) allocate a rotationally optimal block in the same cylinder. 74 * 3) allocate a block in the same cylinder group. 75 * 4) quadratically rehash into other cylinder groups, until an 76 * available block is located. 77 * If no block preference is given the following hierarchy is used 78 * to allocate a block: 79 * 1) allocate a block in the cylinder group that contains the 80 * inode for the file. 81 * 2) quadratically rehash into other cylinder groups, until an 82 * available block is located. 83 */ 84 int 85 ffs_alloc(struct inode *ip, daddr_t lbn __unused, daddr_t bpref, int size, 86 daddr_t *bnp) 87 { 88 struct fs *fs = ip->i_fs; 89 daddr_t bno; 90 int cg; 91 92 *bnp = 0; 93 if (size > fs->fs_bsize || fragoff(fs, size) != 0) { 94 errx(1, "%s: bad size: bsize %d size %d", __func__, 95 fs->fs_bsize, size); 96 } 97 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 98 goto nospace; 99 if (bpref >= fs->fs_size) 100 bpref = 0; 101 if (bpref == 0) 102 cg = ino_to_cg(fs, ip->i_number); 103 else 104 cg = dtog(fs, bpref); 105 bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); 106 if (bno > 0) { 107 DIP_ADD(ip, blocks, size / DEV_BSIZE); 108 *bnp = bno; 109 return (0); 110 } 111 nospace: 112 return (ENOSPC); 113 } 114 115 /* 116 * Select the desired position for the next block in a file. The file is 117 * logically divided into sections. The first section is composed of the 118 * direct blocks. Each additional section contains fs_maxbpg blocks. 119 * 120 * If no blocks have been allocated in the first section, the policy is to 121 * request a block in the same cylinder group as the inode that describes 122 * the file. If no blocks have been allocated in any other section, the 123 * policy is to place the section in a cylinder group with a greater than 124 * average number of free blocks. An appropriate cylinder group is found 125 * by using a rotor that sweeps the cylinder groups. When a new group of 126 * blocks is needed, the sweep begins in the cylinder group following the 127 * cylinder group from which the previous allocation was made. The sweep 128 * continues until a cylinder group with greater than the average number 129 * of free blocks is found. If the allocation is for the first block in an 130 * indirect block, the information on the previous allocation is unavailable; 131 * here a best guess is made based upon the logical block number being 132 * allocated. 133 * 134 * If a section is already partially allocated, the policy is to 135 * contiguously allocate fs_maxcontig blocks. The end of one of these 136 * contiguous blocks and the beginning of the next is physically separated 137 * so that the disk head will be in transit between them for at least 138 * fs_rotdelay milliseconds. This is to allow time for the processor to 139 * schedule another I/O transfer. 140 */ 141 /* XXX ondisk32 */ 142 daddr_t 143 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int32_t *bap) 144 { 145 struct fs *fs; 146 int cg; 147 int avgbfree, startcg; 148 149 fs = ip->i_fs; 150 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 151 if (lbn < NDADDR + NINDIR(fs)) { 152 cg = ino_to_cg(fs, ip->i_number); 153 return (fs->fs_fpg * cg + fs->fs_frag); 154 } 155 /* 156 * Find a cylinder with greater than average number of 157 * unused data blocks. 158 */ 159 if (indx == 0 || bap[indx - 1] == 0) 160 startcg = 161 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 162 else 163 startcg = dtog(fs, bap[indx - 1] + 1); 164 startcg %= fs->fs_ncg; 165 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 166 for (cg = startcg; cg < fs->fs_ncg; cg++) 167 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) 168 return (fs->fs_fpg * cg + fs->fs_frag); 169 for (cg = 0; cg <= startcg; cg++) 170 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) 171 return (fs->fs_fpg * cg + fs->fs_frag); 172 return (0); 173 } 174 /* 175 * We just always try to lay things out contiguously. 176 */ 177 return bap[indx - 1] + fs->fs_frag; 178 } 179 180 daddr_t 181 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int64_t *bap) 182 { 183 struct fs *fs; 184 int cg; 185 int avgbfree, startcg; 186 187 fs = ip->i_fs; 188 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 189 if (lbn < NDADDR + NINDIR(fs)) { 190 cg = ino_to_cg(fs, ip->i_number); 191 return (fs->fs_fpg * cg + fs->fs_frag); 192 } 193 /* 194 * Find a cylinder with greater than average number of 195 * unused data blocks. 196 */ 197 if (indx == 0 || bap[indx - 1] == 0) 198 startcg = 199 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 200 else 201 startcg = dtog(fs, bap[indx - 1] + 1); 202 startcg %= fs->fs_ncg; 203 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 204 for (cg = startcg; cg < fs->fs_ncg; cg++) 205 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 206 return (fs->fs_fpg * cg + fs->fs_frag); 207 } 208 for (cg = 0; cg < startcg; cg++) 209 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 210 return (fs->fs_fpg * cg + fs->fs_frag); 211 } 212 return (0); 213 } 214 /* 215 * We just always try to lay things out contiguously. 216 */ 217 return bap[indx - 1] + fs->fs_frag; 218 } 219 220 /* 221 * Implement the cylinder overflow algorithm. 222 * 223 * The policy implemented by this algorithm is: 224 * 1) allocate the block in its requested cylinder group. 225 * 2) quadratically rehash on the cylinder group number. 226 * 3) brute force search for a free block. 227 * 228 * `size': size for data blocks, mode for inodes 229 */ 230 /*VARARGS5*/ 231 static daddr_t 232 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, int size, 233 daddr_t (*allocator)(struct inode *, int, daddr_t, int)) 234 { 235 struct fs *fs; 236 daddr_t result; 237 int i, icg = cg; 238 239 fs = ip->i_fs; 240 /* 241 * 1: preferred cylinder group 242 */ 243 result = (*allocator)(ip, cg, pref, size); 244 if (result) 245 return (result); 246 /* 247 * 2: quadratic rehash 248 */ 249 for (i = 1; i < fs->fs_ncg; i *= 2) { 250 cg += i; 251 if (cg >= fs->fs_ncg) 252 cg -= fs->fs_ncg; 253 result = (*allocator)(ip, cg, 0, size); 254 if (result) 255 return (result); 256 } 257 /* 258 * 3: brute force search 259 * Note that we start at i == 2, since 0 was checked initially, 260 * and 1 is always checked in the quadratic rehash. 261 */ 262 cg = (icg + 2) % fs->fs_ncg; 263 for (i = 2; i < fs->fs_ncg; i++) { 264 result = (*allocator)(ip, cg, 0, size); 265 if (result) 266 return (result); 267 cg++; 268 if (cg == fs->fs_ncg) 269 cg = 0; 270 } 271 return (0); 272 } 273 274 /* 275 * Determine whether a block can be allocated. 276 * 277 * Check to see if a block of the appropriate size is available, 278 * and if it is, allocate it. 279 */ 280 static daddr_t 281 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) 282 { 283 struct cg *cgp; 284 struct mkfsbuf *bp; 285 daddr_t bno, blkno; 286 int error, frags, allocsiz, i; 287 struct fs *fs = ip->i_fs; 288 289 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 290 return (0); 291 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 292 (int)fs->fs_cgsize, 0, &bp); 293 if (error) { 294 return (0); 295 } 296 cgp = (struct cg *)bp->b_data; 297 if (!cg_chkmagic(cgp) || 298 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { 299 brelse(bp, 0); 300 return (0); 301 } 302 if (size == fs->fs_bsize) { 303 bno = ffs_alloccgblk(ip, bp, bpref); 304 bwrite(bp); 305 return (bno); 306 } 307 /* 308 * check to see if any fragments are already available 309 * allocsiz is the size which will be allocated, hacking 310 * it down to a smaller size if necessary 311 */ 312 frags = numfrags(fs, size); 313 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 314 if (cgp->cg_frsum[allocsiz] != 0) 315 break; 316 if (allocsiz == fs->fs_frag) { 317 /* 318 * no fragments were available, so a block will be 319 * allocated, and hacked up 320 */ 321 if (cgp->cg_cs.cs_nbfree == 0) { 322 brelse(bp, 0); 323 return (0); 324 } 325 bno = ffs_alloccgblk(ip, bp, bpref); 326 bpref = dtogd(fs, bno); 327 for (i = frags; i < fs->fs_frag; i++) 328 setbit(cg_blksfree(cgp), bpref + i); 329 i = fs->fs_frag - frags; 330 cgp->cg_cs.cs_nffree += i; 331 fs->fs_cstotal.cs_nffree += i; 332 fs->fs_cs(fs, cg).cs_nffree += i; 333 fs->fs_fmod = 1; 334 cgp->cg_frsum[i] += 1; 335 bdwrite(bp); 336 return (bno); 337 } 338 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 339 for (i = 0; i < frags; i++) 340 clrbit(cg_blksfree(cgp), bno + i); 341 cgp->cg_cs.cs_nffree -= frags; 342 fs->fs_cstotal.cs_nffree -= frags; 343 fs->fs_cs(fs, cg).cs_nffree -= frags; 344 fs->fs_fmod = 1; 345 cgp->cg_frsum[allocsiz] -= 1; 346 if (frags != allocsiz) 347 cgp->cg_frsum[allocsiz - frags] += 1; 348 blkno = cg * fs->fs_fpg + bno; 349 bdwrite(bp); 350 return blkno; 351 } 352 353 /* 354 * Allocate a block in a cylinder group. 355 * 356 * This algorithm implements the following policy: 357 * 1) allocate the requested block. 358 * 2) allocate a rotationally optimal block in the same cylinder. 359 * 3) allocate the next available block on the block rotor for the 360 * specified cylinder group. 361 * Note that this routine only allocates fs_bsize blocks; these 362 * blocks may be fragmented by the routine that allocates them. 363 */ 364 static daddr_t 365 ffs_alloccgblk(struct inode *ip, struct mkfsbuf *bp, daddr_t bpref) 366 { 367 struct cg *cgp; 368 daddr_t blkno; 369 int32_t bno; 370 struct fs *fs = ip->i_fs; 371 u_int8_t *blksfree; 372 373 cgp = (struct cg *)bp->b_data; 374 blksfree = cg_blksfree(cgp); 375 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { 376 bpref = cgp->cg_rotor; 377 } else { 378 bpref = blknum(fs, bpref); 379 bno = dtogd(fs, bpref); 380 /* 381 * if the requested block is available, use it 382 */ 383 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 384 goto gotit; 385 } 386 /* 387 * Take the next available one in this cylinder group. 388 */ 389 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 390 if (bno < 0) 391 return (0); 392 cgp->cg_rotor = bno; 393 gotit: 394 blkno = fragstoblks(fs, bno); 395 ffs_clrblock(fs, blksfree, (long)blkno); 396 ffs_clusteracct(fs, cgp, blkno, -1); 397 cgp->cg_cs.cs_nbfree -= 1; 398 fs->fs_cstotal.cs_nbfree--; 399 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; 400 fs->fs_fmod = 1; 401 blkno = cgp->cg_cgx * fs->fs_fpg + bno; 402 return (blkno); 403 } 404 405 static int 406 scanc(u_int size, const u_char *cp, const u_char table[], int mask) 407 { 408 const u_char *end = &cp[size]; 409 410 while (cp < end && (table[*cp] & mask) == 0) 411 cp++; 412 return (end - cp); 413 } 414 415 /* 416 * Find a block of the specified size in the specified cylinder group. 417 * 418 * It is a panic if a request is made to find a block if none are 419 * available. 420 */ 421 static int32_t 422 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz) 423 { 424 int32_t bno; 425 int start, len, loc, i; 426 int blk, field, subfield, pos; 427 int ostart, olen; 428 429 /* 430 * find the fragment by searching through the free block 431 * map for an appropriate bit pattern 432 */ 433 if (bpref) 434 start = dtogd(fs, bpref) / NBBY; 435 else 436 start = cgp->cg_frotor / NBBY; 437 len = howmany(fs->fs_fpg, NBBY) - start; 438 ostart = start; 439 olen = len; 440 loc = scanc((u_int)len, 441 (const u_char *)&cg_blksfree(cgp)[start], 442 (const u_char *)fragtbl[fs->fs_frag], 443 (1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 444 if (loc == 0) { 445 len = start + 1; 446 start = 0; 447 loc = scanc((u_int)len, 448 (const u_char *)&cg_blksfree(cgp)[0], 449 (const u_char *)fragtbl[fs->fs_frag], 450 (1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); 451 if (loc == 0) { 452 errx(1, "%s: map corrupted: start %d " 453 "len %d offset %d %ld", __func__, ostart, olen, 454 cgp->cg_freeoff, 455 (long)cg_blksfree(cgp) - (long)cgp); 456 } 457 } 458 bno = (start + len - loc) * NBBY; 459 cgp->cg_frotor = bno; 460 /* 461 * found the byte in the map 462 * sift through the bits to find the selected frag 463 */ 464 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 465 blk = blkmap(fs, cg_blksfree(cgp), bno); 466 blk <<= 1; 467 field = around[allocsiz]; 468 subfield = inside[allocsiz]; 469 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 470 if ((blk & field) == subfield) 471 return (bno + pos); 472 field <<= 1; 473 subfield <<= 1; 474 } 475 } 476 errx(1, "%s: block not in map: bno %lld", __func__, (long long)bno); 477 return (-1); 478 } 479