1 /* $NetBSD: ffs_alloc.c,v 1.125 2010/02/21 13:55:58 mlelstv Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 2002 Networks Associates Technology, Inc. 34 * All rights reserved. 35 * 36 * This software was developed for the FreeBSD Project by Marshall 37 * Kirk McKusick and Network Associates Laboratories, the Security 38 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 39 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 40 * research program 41 * 42 * Copyright (c) 1982, 1986, 1989, 1993 43 * The Regents of the University of California. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 3. Neither the name of the University nor the names of its contributors 54 * may be used to endorse or promote products derived from this software 55 * without specific prior written permission. 56 * 57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 67 * SUCH DAMAGE. 68 * 69 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.125 2010/02/21 13:55:58 mlelstv Exp $"); 74 75 #if defined(_KERNEL_OPT) 76 #include "opt_ffs.h" 77 #include "opt_quota.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/buf.h> 83 #include <sys/fstrans.h> 84 #include <sys/kauth.h> 85 #include <sys/kernel.h> 86 #include <sys/mount.h> 87 #include <sys/proc.h> 88 #include <sys/syslog.h> 89 #include <sys/vnode.h> 90 #include <sys/wapbl.h> 91 92 #include <miscfs/specfs/specdev.h> 93 #include <ufs/ufs/quota.h> 94 #include <ufs/ufs/ufsmount.h> 95 #include <ufs/ufs/inode.h> 96 #include <ufs/ufs/ufs_extern.h> 97 #include <ufs/ufs/ufs_bswap.h> 98 #include <ufs/ufs/ufs_wapbl.h> 99 100 #include <ufs/ffs/fs.h> 101 #include <ufs/ffs/ffs_extern.h> 102 103 static daddr_t ffs_alloccg(struct inode *, int, daddr_t, int, int); 104 static daddr_t ffs_alloccgblk(struct inode *, struct buf *, daddr_t, int); 105 static ino_t ffs_dirpref(struct inode *); 106 static daddr_t ffs_fragextend(struct inode *, int, daddr_t, int, int); 107 static void ffs_fserr(struct fs *, u_int, const char *); 108 static daddr_t ffs_hashalloc(struct inode *, int, daddr_t, int, int, 109 daddr_t (*)(struct inode *, int, daddr_t, int, int)); 110 static daddr_t ffs_nodealloccg(struct inode *, int, daddr_t, int, int); 111 static int32_t ffs_mapsearch(struct fs *, struct cg *, 112 daddr_t, int); 113 static void ffs_blkfree_common(struct ufsmount *, struct fs *, dev_t, struct buf *, 114 daddr_t, long, bool); 115 static void ffs_freefile_common(struct ufsmount *, struct fs *, dev_t, struct buf *, ino_t, 116 int, bool); 117 118 /* if 1, changes in optimalization strategy are logged */ 119 int ffs_log_changeopt = 0; 120 121 /* in ffs_tables.c */ 122 extern const int inside[], around[]; 123 extern const u_char * const fragtbl[]; 124 125 /* Basic consistency check for block allocations */ 126 static int 127 ffs_check_bad_allocation(const char *func, struct fs *fs, daddr_t bno, 128 long size, dev_t dev, ino_t inum) 129 { 130 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || 131 fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { 132 printf("dev = 0x%llx, bno = %" PRId64 " bsize = %d, " 133 "size = %ld, fs = %s\n", 134 (long long)dev, bno, fs->fs_bsize, size, fs->fs_fsmnt); 135 panic("%s: bad size", func); 136 } 137 138 if (bno >= fs->fs_size) { 139 printf("bad block %" PRId64 ", ino %llu\n", bno, 140 (unsigned long long)inum); 141 ffs_fserr(fs, inum, "bad block"); 142 return EINVAL; 143 } 144 return 0; 145 } 146 147 /* 148 * Allocate a block in the file system. 149 * 150 * The size of the requested block is given, which must be some 151 * multiple of fs_fsize and <= fs_bsize. 152 * A preference may be optionally specified. If a preference is given 153 * the following hierarchy is used to allocate a block: 154 * 1) allocate the requested block. 155 * 2) allocate a rotationally optimal block in the same cylinder. 156 * 3) allocate a block in the same cylinder group. 157 * 4) quadradically rehash into other cylinder groups, until an 158 * available block is located. 159 * If no block preference is given the following hierarchy is used 160 * to allocate a block: 161 * 1) allocate a block in the cylinder group that contains the 162 * inode for the file. 163 * 2) quadradically rehash into other cylinder groups, until an 164 * available block is located. 165 * 166 * => called with um_lock held 167 * => releases um_lock before returning 168 */ 169 int 170 ffs_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size, int flags, 171 kauth_cred_t cred, daddr_t *bnp) 172 { 173 struct ufsmount *ump; 174 struct fs *fs; 175 daddr_t bno; 176 int cg; 177 #ifdef QUOTA 178 int error; 179 #endif 180 181 fs = ip->i_fs; 182 ump = ip->i_ump; 183 184 KASSERT(mutex_owned(&ump->um_lock)); 185 186 #ifdef UVM_PAGE_TRKOWN 187 if (ITOV(ip)->v_type == VREG && 188 lblktosize(fs, (voff_t)lbn) < round_page(ITOV(ip)->v_size)) { 189 struct vm_page *pg; 190 struct uvm_object *uobj = &ITOV(ip)->v_uobj; 191 voff_t off = trunc_page(lblktosize(fs, lbn)); 192 voff_t endoff = round_page(lblktosize(fs, lbn) + size); 193 194 mutex_enter(&uobj->vmobjlock); 195 while (off < endoff) { 196 pg = uvm_pagelookup(uobj, off); 197 KASSERT(pg == NULL || pg->owner == curproc->p_pid); 198 off += PAGE_SIZE; 199 } 200 mutex_exit(&uobj->vmobjlock); 201 } 202 #endif 203 204 *bnp = 0; 205 #ifdef DIAGNOSTIC 206 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { 207 printf("dev = 0x%llx, bsize = %d, size = %d, fs = %s\n", 208 (unsigned long long)ip->i_dev, fs->fs_bsize, size, 209 fs->fs_fsmnt); 210 panic("ffs_alloc: bad size"); 211 } 212 if (cred == NOCRED) 213 panic("ffs_alloc: missing credential"); 214 #endif /* DIAGNOSTIC */ 215 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) 216 goto nospace; 217 if (freespace(fs, fs->fs_minfree) <= 0 && 218 kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 219 NULL, NULL) != 0) 220 goto nospace; 221 #ifdef QUOTA 222 mutex_exit(&ump->um_lock); 223 if ((error = chkdq(ip, btodb(size), cred, 0)) != 0) 224 return (error); 225 mutex_enter(&ump->um_lock); 226 #endif 227 228 if (bpref >= fs->fs_size) 229 bpref = 0; 230 if (bpref == 0) 231 cg = ino_to_cg(fs, ip->i_number); 232 else 233 cg = dtog(fs, bpref); 234 bno = ffs_hashalloc(ip, cg, bpref, size, flags, ffs_alloccg); 235 if (bno > 0) { 236 DIP_ADD(ip, blocks, btodb(size)); 237 ip->i_flag |= IN_CHANGE | IN_UPDATE; 238 *bnp = bno; 239 return (0); 240 } 241 #ifdef QUOTA 242 /* 243 * Restore user's disk quota because allocation failed. 244 */ 245 (void) chkdq(ip, -btodb(size), cred, FORCE); 246 #endif 247 if (flags & B_CONTIG) { 248 /* 249 * XXX ump->um_lock handling is "suspect" at best. 250 * For the case where ffs_hashalloc() fails early 251 * in the B_CONTIG case we reach here with um_lock 252 * already unlocked, so we can't release it again 253 * like in the normal error path. See kern/39206. 254 * 255 * 256 * Fail silently - it's up to our caller to report 257 * errors. 258 */ 259 return (ENOSPC); 260 } 261 nospace: 262 mutex_exit(&ump->um_lock); 263 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full"); 264 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 265 return (ENOSPC); 266 } 267 268 /* 269 * Reallocate a fragment to a bigger size 270 * 271 * The number and size of the old block is given, and a preference 272 * and new size is also specified. The allocator attempts to extend 273 * the original block. Failing that, the regular block allocator is 274 * invoked to get an appropriate block. 275 * 276 * => called with um_lock held 277 * => return with um_lock released 278 */ 279 int 280 ffs_realloccg(struct inode *ip, daddr_t lbprev, daddr_t bpref, int osize, 281 int nsize, kauth_cred_t cred, struct buf **bpp, daddr_t *blknop) 282 { 283 struct ufsmount *ump; 284 struct fs *fs; 285 struct buf *bp; 286 int cg, request, error; 287 daddr_t bprev, bno; 288 289 fs = ip->i_fs; 290 ump = ip->i_ump; 291 292 KASSERT(mutex_owned(&ump->um_lock)); 293 294 #ifdef UVM_PAGE_TRKOWN 295 if (ITOV(ip)->v_type == VREG) { 296 struct vm_page *pg; 297 struct uvm_object *uobj = &ITOV(ip)->v_uobj; 298 voff_t off = trunc_page(lblktosize(fs, lbprev)); 299 voff_t endoff = round_page(lblktosize(fs, lbprev) + osize); 300 301 mutex_enter(&uobj->vmobjlock); 302 while (off < endoff) { 303 pg = uvm_pagelookup(uobj, off); 304 KASSERT(pg == NULL || pg->owner == curproc->p_pid); 305 KASSERT((pg->flags & PG_CLEAN) == 0); 306 off += PAGE_SIZE; 307 } 308 mutex_exit(&uobj->vmobjlock); 309 } 310 #endif 311 312 #ifdef DIAGNOSTIC 313 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || 314 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { 315 printf( 316 "dev = 0x%llx, bsize = %d, osize = %d, nsize = %d, fs = %s\n", 317 (unsigned long long)ip->i_dev, fs->fs_bsize, osize, nsize, 318 fs->fs_fsmnt); 319 panic("ffs_realloccg: bad size"); 320 } 321 if (cred == NOCRED) 322 panic("ffs_realloccg: missing credential"); 323 #endif /* DIAGNOSTIC */ 324 if (freespace(fs, fs->fs_minfree) <= 0 && 325 kauth_authorize_system(cred, KAUTH_SYSTEM_FS_RESERVEDSPACE, 0, NULL, 326 NULL, NULL) != 0) { 327 mutex_exit(&ump->um_lock); 328 goto nospace; 329 } 330 if (fs->fs_magic == FS_UFS2_MAGIC) 331 bprev = ufs_rw64(ip->i_ffs2_db[lbprev], UFS_FSNEEDSWAP(fs)); 332 else 333 bprev = ufs_rw32(ip->i_ffs1_db[lbprev], UFS_FSNEEDSWAP(fs)); 334 335 if (bprev == 0) { 336 printf("dev = 0x%llx, bsize = %d, bprev = %" PRId64 ", fs = %s\n", 337 (unsigned long long)ip->i_dev, fs->fs_bsize, bprev, 338 fs->fs_fsmnt); 339 panic("ffs_realloccg: bad bprev"); 340 } 341 mutex_exit(&ump->um_lock); 342 343 /* 344 * Allocate the extra space in the buffer. 345 */ 346 if (bpp != NULL && 347 (error = bread(ITOV(ip), lbprev, osize, NOCRED, 0, &bp)) != 0) { 348 brelse(bp, 0); 349 return (error); 350 } 351 #ifdef QUOTA 352 if ((error = chkdq(ip, btodb(nsize - osize), cred, 0)) != 0) { 353 if (bpp != NULL) { 354 brelse(bp, 0); 355 } 356 return (error); 357 } 358 #endif 359 /* 360 * Check for extension in the existing location. 361 */ 362 cg = dtog(fs, bprev); 363 mutex_enter(&ump->um_lock); 364 if ((bno = ffs_fragextend(ip, cg, bprev, osize, nsize)) != 0) { 365 DIP_ADD(ip, blocks, btodb(nsize - osize)); 366 ip->i_flag |= IN_CHANGE | IN_UPDATE; 367 368 if (bpp != NULL) { 369 if (bp->b_blkno != fsbtodb(fs, bno)) 370 panic("bad blockno"); 371 allocbuf(bp, nsize, 1); 372 memset((char *)bp->b_data + osize, 0, nsize - osize); 373 mutex_enter(bp->b_objlock); 374 KASSERT(!cv_has_waiters(&bp->b_done)); 375 bp->b_oflags |= BO_DONE; 376 mutex_exit(bp->b_objlock); 377 *bpp = bp; 378 } 379 if (blknop != NULL) { 380 *blknop = bno; 381 } 382 return (0); 383 } 384 /* 385 * Allocate a new disk location. 386 */ 387 if (bpref >= fs->fs_size) 388 bpref = 0; 389 switch ((int)fs->fs_optim) { 390 case FS_OPTSPACE: 391 /* 392 * Allocate an exact sized fragment. Although this makes 393 * best use of space, we will waste time relocating it if 394 * the file continues to grow. If the fragmentation is 395 * less than half of the minimum free reserve, we choose 396 * to begin optimizing for time. 397 */ 398 request = nsize; 399 if (fs->fs_minfree < 5 || 400 fs->fs_cstotal.cs_nffree > 401 fs->fs_dsize * fs->fs_minfree / (2 * 100)) 402 break; 403 404 if (ffs_log_changeopt) { 405 log(LOG_NOTICE, 406 "%s: optimization changed from SPACE to TIME\n", 407 fs->fs_fsmnt); 408 } 409 410 fs->fs_optim = FS_OPTTIME; 411 break; 412 case FS_OPTTIME: 413 /* 414 * At this point we have discovered a file that is trying to 415 * grow a small fragment to a larger fragment. To save time, 416 * we allocate a full sized block, then free the unused portion. 417 * If the file continues to grow, the `ffs_fragextend' call 418 * above will be able to grow it in place without further 419 * copying. If aberrant programs cause disk fragmentation to 420 * grow within 2% of the free reserve, we choose to begin 421 * optimizing for space. 422 */ 423 request = fs->fs_bsize; 424 if (fs->fs_cstotal.cs_nffree < 425 fs->fs_dsize * (fs->fs_minfree - 2) / 100) 426 break; 427 428 if (ffs_log_changeopt) { 429 log(LOG_NOTICE, 430 "%s: optimization changed from TIME to SPACE\n", 431 fs->fs_fsmnt); 432 } 433 434 fs->fs_optim = FS_OPTSPACE; 435 break; 436 default: 437 printf("dev = 0x%llx, optim = %d, fs = %s\n", 438 (unsigned long long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt); 439 panic("ffs_realloccg: bad optim"); 440 /* NOTREACHED */ 441 } 442 bno = ffs_hashalloc(ip, cg, bpref, request, 0, ffs_alloccg); 443 if (bno > 0) { 444 if ((ip->i_ump->um_mountp->mnt_wapbl) && 445 (ITOV(ip)->v_type != VREG)) { 446 UFS_WAPBL_REGISTER_DEALLOCATION( 447 ip->i_ump->um_mountp, fsbtodb(fs, bprev), 448 osize); 449 } else { 450 ffs_blkfree(fs, ip->i_devvp, bprev, (long)osize, 451 ip->i_number); 452 } 453 if (nsize < request) { 454 if ((ip->i_ump->um_mountp->mnt_wapbl) && 455 (ITOV(ip)->v_type != VREG)) { 456 UFS_WAPBL_REGISTER_DEALLOCATION( 457 ip->i_ump->um_mountp, 458 fsbtodb(fs, (bno + numfrags(fs, nsize))), 459 request - nsize); 460 } else 461 ffs_blkfree(fs, ip->i_devvp, 462 bno + numfrags(fs, nsize), 463 (long)(request - nsize), ip->i_number); 464 } 465 DIP_ADD(ip, blocks, btodb(nsize - osize)); 466 ip->i_flag |= IN_CHANGE | IN_UPDATE; 467 if (bpp != NULL) { 468 bp->b_blkno = fsbtodb(fs, bno); 469 allocbuf(bp, nsize, 1); 470 memset((char *)bp->b_data + osize, 0, (u_int)nsize - osize); 471 mutex_enter(bp->b_objlock); 472 KASSERT(!cv_has_waiters(&bp->b_done)); 473 bp->b_oflags |= BO_DONE; 474 mutex_exit(bp->b_objlock); 475 *bpp = bp; 476 } 477 if (blknop != NULL) { 478 *blknop = bno; 479 } 480 return (0); 481 } 482 mutex_exit(&ump->um_lock); 483 484 #ifdef QUOTA 485 /* 486 * Restore user's disk quota because allocation failed. 487 */ 488 (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); 489 #endif 490 if (bpp != NULL) { 491 brelse(bp, 0); 492 } 493 494 nospace: 495 /* 496 * no space available 497 */ 498 ffs_fserr(fs, kauth_cred_geteuid(cred), "file system full"); 499 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); 500 return (ENOSPC); 501 } 502 503 /* 504 * Allocate an inode in the file system. 505 * 506 * If allocating a directory, use ffs_dirpref to select the inode. 507 * If allocating in a directory, the following hierarchy is followed: 508 * 1) allocate the preferred inode. 509 * 2) allocate an inode in the same cylinder group. 510 * 3) quadradically rehash into other cylinder groups, until an 511 * available inode is located. 512 * If no inode preference is given the following hierarchy is used 513 * to allocate an inode: 514 * 1) allocate an inode in cylinder group 0. 515 * 2) quadradically rehash into other cylinder groups, until an 516 * available inode is located. 517 * 518 * => um_lock not held upon entry or return 519 */ 520 int 521 ffs_valloc(struct vnode *pvp, int mode, kauth_cred_t cred, 522 struct vnode **vpp) 523 { 524 struct ufsmount *ump; 525 struct inode *pip; 526 struct fs *fs; 527 struct inode *ip; 528 struct timespec ts; 529 ino_t ino, ipref; 530 int cg, error; 531 532 UFS_WAPBL_JUNLOCK_ASSERT(pvp->v_mount); 533 534 *vpp = NULL; 535 pip = VTOI(pvp); 536 fs = pip->i_fs; 537 ump = pip->i_ump; 538 539 error = UFS_WAPBL_BEGIN(pvp->v_mount); 540 if (error) { 541 return error; 542 } 543 mutex_enter(&ump->um_lock); 544 if (fs->fs_cstotal.cs_nifree == 0) 545 goto noinodes; 546 547 if ((mode & IFMT) == IFDIR) 548 ipref = ffs_dirpref(pip); 549 else 550 ipref = pip->i_number; 551 if (ipref >= fs->fs_ncg * fs->fs_ipg) 552 ipref = 0; 553 cg = ino_to_cg(fs, ipref); 554 /* 555 * Track number of dirs created one after another 556 * in a same cg without intervening by files. 557 */ 558 if ((mode & IFMT) == IFDIR) { 559 if (fs->fs_contigdirs[cg] < 255) 560 fs->fs_contigdirs[cg]++; 561 } else { 562 if (fs->fs_contigdirs[cg] > 0) 563 fs->fs_contigdirs[cg]--; 564 } 565 ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0, ffs_nodealloccg); 566 if (ino == 0) 567 goto noinodes; 568 UFS_WAPBL_END(pvp->v_mount); 569 error = VFS_VGET(pvp->v_mount, ino, vpp); 570 if (error) { 571 int err; 572 err = UFS_WAPBL_BEGIN(pvp->v_mount); 573 if (err == 0) 574 ffs_vfree(pvp, ino, mode); 575 if (err == 0) 576 UFS_WAPBL_END(pvp->v_mount); 577 return (error); 578 } 579 KASSERT((*vpp)->v_type == VNON); 580 ip = VTOI(*vpp); 581 if (ip->i_mode) { 582 #if 0 583 printf("mode = 0%o, inum = %d, fs = %s\n", 584 ip->i_mode, ip->i_number, fs->fs_fsmnt); 585 #else 586 printf("dmode %x mode %x dgen %x gen %x\n", 587 DIP(ip, mode), ip->i_mode, 588 DIP(ip, gen), ip->i_gen); 589 printf("size %llx blocks %llx\n", 590 (long long)DIP(ip, size), (long long)DIP(ip, blocks)); 591 printf("ino %llu ipref %llu\n", (unsigned long long)ino, 592 (unsigned long long)ipref); 593 #if 0 594 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), 595 (int)fs->fs_bsize, NOCRED, 0, &bp); 596 #endif 597 598 #endif 599 panic("ffs_valloc: dup alloc"); 600 } 601 if (DIP(ip, blocks)) { /* XXX */ 602 printf("free inode %s/%llu had %" PRId64 " blocks\n", 603 fs->fs_fsmnt, (unsigned long long)ino, DIP(ip, blocks)); 604 DIP_ASSIGN(ip, blocks, 0); 605 } 606 ip->i_flag &= ~IN_SPACECOUNTED; 607 ip->i_flags = 0; 608 DIP_ASSIGN(ip, flags, 0); 609 /* 610 * Set up a new generation number for this inode. 611 */ 612 ip->i_gen++; 613 DIP_ASSIGN(ip, gen, ip->i_gen); 614 if (fs->fs_magic == FS_UFS2_MAGIC) { 615 vfs_timestamp(&ts); 616 ip->i_ffs2_birthtime = ts.tv_sec; 617 ip->i_ffs2_birthnsec = ts.tv_nsec; 618 } 619 return (0); 620 noinodes: 621 mutex_exit(&ump->um_lock); 622 UFS_WAPBL_END(pvp->v_mount); 623 ffs_fserr(fs, kauth_cred_geteuid(cred), "out of inodes"); 624 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); 625 return (ENOSPC); 626 } 627 628 /* 629 * Find a cylinder group in which to place a directory. 630 * 631 * The policy implemented by this algorithm is to allocate a 632 * directory inode in the same cylinder group as its parent 633 * directory, but also to reserve space for its files inodes 634 * and data. Restrict the number of directories which may be 635 * allocated one after another in the same cylinder group 636 * without intervening allocation of files. 637 * 638 * If we allocate a first level directory then force allocation 639 * in another cylinder group. 640 */ 641 static ino_t 642 ffs_dirpref(struct inode *pip) 643 { 644 register struct fs *fs; 645 int cg, prefcg; 646 int64_t dirsize, cgsize, curdsz; 647 int avgifree, avgbfree, avgndir; 648 int minifree, minbfree, maxndir; 649 int mincg, minndir; 650 int maxcontigdirs; 651 652 KASSERT(mutex_owned(&pip->i_ump->um_lock)); 653 654 fs = pip->i_fs; 655 656 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; 657 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 658 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg; 659 660 /* 661 * Force allocation in another cg if creating a first level dir. 662 */ 663 if (ITOV(pip)->v_vflag & VV_ROOT) { 664 prefcg = random() % fs->fs_ncg; 665 mincg = prefcg; 666 minndir = fs->fs_ipg; 667 for (cg = prefcg; cg < fs->fs_ncg; cg++) 668 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 669 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 670 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 671 mincg = cg; 672 minndir = fs->fs_cs(fs, cg).cs_ndir; 673 } 674 for (cg = 0; cg < prefcg; cg++) 675 if (fs->fs_cs(fs, cg).cs_ndir < minndir && 676 fs->fs_cs(fs, cg).cs_nifree >= avgifree && 677 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 678 mincg = cg; 679 minndir = fs->fs_cs(fs, cg).cs_ndir; 680 } 681 return ((ino_t)(fs->fs_ipg * mincg)); 682 } 683 684 /* 685 * Count various limits which used for 686 * optimal allocation of a directory inode. 687 */ 688 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg); 689 minifree = avgifree - fs->fs_ipg / 4; 690 if (minifree < 0) 691 minifree = 0; 692 minbfree = avgbfree - fragstoblks(fs, fs->fs_fpg) / 4; 693 if (minbfree < 0) 694 minbfree = 0; 695 cgsize = (int64_t)fs->fs_fsize * fs->fs_fpg; 696 dirsize = (int64_t)fs->fs_avgfilesize * fs->fs_avgfpdir; 697 if (avgndir != 0) { 698 curdsz = (cgsize - (int64_t)avgbfree * fs->fs_bsize) / avgndir; 699 if (dirsize < curdsz) 700 dirsize = curdsz; 701 } 702 if (cgsize < dirsize * 255) 703 maxcontigdirs = cgsize / dirsize; 704 else 705 maxcontigdirs = 255; 706 if (fs->fs_avgfpdir > 0) 707 maxcontigdirs = min(maxcontigdirs, 708 fs->fs_ipg / fs->fs_avgfpdir); 709 if (maxcontigdirs == 0) 710 maxcontigdirs = 1; 711 712 /* 713 * Limit number of dirs in one cg and reserve space for 714 * regular files, but only if we have no deficit in 715 * inodes or space. 716 */ 717 prefcg = ino_to_cg(fs, pip->i_number); 718 for (cg = prefcg; cg < fs->fs_ncg; cg++) 719 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 720 fs->fs_cs(fs, cg).cs_nifree >= minifree && 721 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 722 if (fs->fs_contigdirs[cg] < maxcontigdirs) 723 return ((ino_t)(fs->fs_ipg * cg)); 724 } 725 for (cg = 0; cg < prefcg; cg++) 726 if (fs->fs_cs(fs, cg).cs_ndir < maxndir && 727 fs->fs_cs(fs, cg).cs_nifree >= minifree && 728 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { 729 if (fs->fs_contigdirs[cg] < maxcontigdirs) 730 return ((ino_t)(fs->fs_ipg * cg)); 731 } 732 /* 733 * This is a backstop when we are deficient in space. 734 */ 735 for (cg = prefcg; cg < fs->fs_ncg; cg++) 736 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 737 return ((ino_t)(fs->fs_ipg * cg)); 738 for (cg = 0; cg < prefcg; cg++) 739 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) 740 break; 741 return ((ino_t)(fs->fs_ipg * cg)); 742 } 743 744 /* 745 * Select the desired position for the next block in a file. The file is 746 * logically divided into sections. The first section is composed of the 747 * direct blocks. Each additional section contains fs_maxbpg blocks. 748 * 749 * If no blocks have been allocated in the first section, the policy is to 750 * request a block in the same cylinder group as the inode that describes 751 * the file. If no blocks have been allocated in any other section, the 752 * policy is to place the section in a cylinder group with a greater than 753 * average number of free blocks. An appropriate cylinder group is found 754 * by using a rotor that sweeps the cylinder groups. When a new group of 755 * blocks is needed, the sweep begins in the cylinder group following the 756 * cylinder group from which the previous allocation was made. The sweep 757 * continues until a cylinder group with greater than the average number 758 * of free blocks is found. If the allocation is for the first block in an 759 * indirect block, the information on the previous allocation is unavailable; 760 * here a best guess is made based upon the logical block number being 761 * allocated. 762 * 763 * If a section is already partially allocated, the policy is to 764 * contiguously allocate fs_maxcontig blocks. The end of one of these 765 * contiguous blocks and the beginning of the next is laid out 766 * contigously if possible. 767 * 768 * => um_lock held on entry and exit 769 */ 770 daddr_t 771 ffs_blkpref_ufs1(struct inode *ip, daddr_t lbn, int indx, int flags, 772 int32_t *bap /* XXX ondisk32 */) 773 { 774 struct fs *fs; 775 int cg; 776 int avgbfree, startcg; 777 778 KASSERT(mutex_owned(&ip->i_ump->um_lock)); 779 780 fs = ip->i_fs; 781 782 /* 783 * If allocating a contiguous file with B_CONTIG, use the hints 784 * in the inode extentions to return the desired block. 785 * 786 * For metadata (indirect blocks) return the address of where 787 * the first indirect block resides - we'll scan for the next 788 * available slot if we need to allocate more than one indirect 789 * block. For data, return the address of the actual block 790 * relative to the address of the first data block. 791 */ 792 if (flags & B_CONTIG) { 793 KASSERT(ip->i_ffs_first_data_blk != 0); 794 KASSERT(ip->i_ffs_first_indir_blk != 0); 795 if (flags & B_METAONLY) 796 return ip->i_ffs_first_indir_blk; 797 else 798 return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn); 799 } 800 801 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 802 if (lbn < NDADDR + NINDIR(fs)) { 803 cg = ino_to_cg(fs, ip->i_number); 804 return (cgbase(fs, cg) + fs->fs_frag); 805 } 806 /* 807 * Find a cylinder with greater than average number of 808 * unused data blocks. 809 */ 810 if (indx == 0 || bap[indx - 1] == 0) 811 startcg = 812 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 813 else 814 startcg = dtog(fs, 815 ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1); 816 startcg %= fs->fs_ncg; 817 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 818 for (cg = startcg; cg < fs->fs_ncg; cg++) 819 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 820 return (cgbase(fs, cg) + fs->fs_frag); 821 } 822 for (cg = 0; cg < startcg; cg++) 823 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 824 return (cgbase(fs, cg) + fs->fs_frag); 825 } 826 return (0); 827 } 828 /* 829 * We just always try to lay things out contiguously. 830 */ 831 return ufs_rw32(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag; 832 } 833 834 daddr_t 835 ffs_blkpref_ufs2(struct inode *ip, daddr_t lbn, int indx, int flags, 836 int64_t *bap) 837 { 838 struct fs *fs; 839 int cg; 840 int avgbfree, startcg; 841 842 KASSERT(mutex_owned(&ip->i_ump->um_lock)); 843 844 fs = ip->i_fs; 845 846 /* 847 * If allocating a contiguous file with B_CONTIG, use the hints 848 * in the inode extentions to return the desired block. 849 * 850 * For metadata (indirect blocks) return the address of where 851 * the first indirect block resides - we'll scan for the next 852 * available slot if we need to allocate more than one indirect 853 * block. For data, return the address of the actual block 854 * relative to the address of the first data block. 855 */ 856 if (flags & B_CONTIG) { 857 KASSERT(ip->i_ffs_first_data_blk != 0); 858 KASSERT(ip->i_ffs_first_indir_blk != 0); 859 if (flags & B_METAONLY) 860 return ip->i_ffs_first_indir_blk; 861 else 862 return ip->i_ffs_first_data_blk + blkstofrags(fs, lbn); 863 } 864 865 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { 866 if (lbn < NDADDR + NINDIR(fs)) { 867 cg = ino_to_cg(fs, ip->i_number); 868 return (cgbase(fs, cg) + fs->fs_frag); 869 } 870 /* 871 * Find a cylinder with greater than average number of 872 * unused data blocks. 873 */ 874 if (indx == 0 || bap[indx - 1] == 0) 875 startcg = 876 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; 877 else 878 startcg = dtog(fs, 879 ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + 1); 880 startcg %= fs->fs_ncg; 881 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; 882 for (cg = startcg; cg < fs->fs_ncg; cg++) 883 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 884 return (cgbase(fs, cg) + fs->fs_frag); 885 } 886 for (cg = 0; cg < startcg; cg++) 887 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { 888 return (cgbase(fs, cg) + fs->fs_frag); 889 } 890 return (0); 891 } 892 /* 893 * We just always try to lay things out contiguously. 894 */ 895 return ufs_rw64(bap[indx - 1], UFS_FSNEEDSWAP(fs)) + fs->fs_frag; 896 } 897 898 899 /* 900 * Implement the cylinder overflow algorithm. 901 * 902 * The policy implemented by this algorithm is: 903 * 1) allocate the block in its requested cylinder group. 904 * 2) quadradically rehash on the cylinder group number. 905 * 3) brute force search for a free block. 906 * 907 * => called with um_lock held 908 * => returns with um_lock released on success, held on failure 909 * (*allocator releases lock on success, retains lock on failure) 910 */ 911 /*VARARGS5*/ 912 static daddr_t 913 ffs_hashalloc(struct inode *ip, int cg, daddr_t pref, 914 int size /* size for data blocks, mode for inodes */, 915 int flags, daddr_t (*allocator)(struct inode *, int, daddr_t, int, int)) 916 { 917 struct fs *fs; 918 daddr_t result; 919 int i, icg = cg; 920 921 fs = ip->i_fs; 922 /* 923 * 1: preferred cylinder group 924 */ 925 result = (*allocator)(ip, cg, pref, size, flags); 926 if (result) 927 return (result); 928 929 if (flags & B_CONTIG) 930 return (result); 931 /* 932 * 2: quadratic rehash 933 */ 934 for (i = 1; i < fs->fs_ncg; i *= 2) { 935 cg += i; 936 if (cg >= fs->fs_ncg) 937 cg -= fs->fs_ncg; 938 result = (*allocator)(ip, cg, 0, size, flags); 939 if (result) 940 return (result); 941 } 942 /* 943 * 3: brute force search 944 * Note that we start at i == 2, since 0 was checked initially, 945 * and 1 is always checked in the quadratic rehash. 946 */ 947 cg = (icg + 2) % fs->fs_ncg; 948 for (i = 2; i < fs->fs_ncg; i++) { 949 result = (*allocator)(ip, cg, 0, size, flags); 950 if (result) 951 return (result); 952 cg++; 953 if (cg == fs->fs_ncg) 954 cg = 0; 955 } 956 return (0); 957 } 958 959 /* 960 * Determine whether a fragment can be extended. 961 * 962 * Check to see if the necessary fragments are available, and 963 * if they are, allocate them. 964 * 965 * => called with um_lock held 966 * => returns with um_lock released on success, held on failure 967 */ 968 static daddr_t 969 ffs_fragextend(struct inode *ip, int cg, daddr_t bprev, int osize, int nsize) 970 { 971 struct ufsmount *ump; 972 struct fs *fs; 973 struct cg *cgp; 974 struct buf *bp; 975 daddr_t bno; 976 int frags, bbase; 977 int i, error; 978 u_int8_t *blksfree; 979 980 fs = ip->i_fs; 981 ump = ip->i_ump; 982 983 KASSERT(mutex_owned(&ump->um_lock)); 984 985 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) 986 return (0); 987 frags = numfrags(fs, nsize); 988 bbase = fragnum(fs, bprev); 989 if (bbase > fragnum(fs, (bprev + frags - 1))) { 990 /* cannot extend across a block boundary */ 991 return (0); 992 } 993 mutex_exit(&ump->um_lock); 994 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 995 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 996 if (error) 997 goto fail; 998 cgp = (struct cg *)bp->b_data; 999 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) 1000 goto fail; 1001 cgp->cg_old_time = ufs_rw32(time_second, UFS_FSNEEDSWAP(fs)); 1002 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1003 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1004 cgp->cg_time = ufs_rw64(time_second, UFS_FSNEEDSWAP(fs)); 1005 bno = dtogd(fs, bprev); 1006 blksfree = cg_blksfree(cgp, UFS_FSNEEDSWAP(fs)); 1007 for (i = numfrags(fs, osize); i < frags; i++) 1008 if (isclr(blksfree, bno + i)) 1009 goto fail; 1010 /* 1011 * the current fragment can be extended 1012 * deduct the count on fragment being extended into 1013 * increase the count on the remaining fragment (if any) 1014 * allocate the extended piece 1015 */ 1016 for (i = frags; i < fs->fs_frag - bbase; i++) 1017 if (isclr(blksfree, bno + i)) 1018 break; 1019 ufs_add32(cgp->cg_frsum[i - numfrags(fs, osize)], -1, UFS_FSNEEDSWAP(fs)); 1020 if (i != frags) 1021 ufs_add32(cgp->cg_frsum[i - frags], 1, UFS_FSNEEDSWAP(fs)); 1022 mutex_enter(&ump->um_lock); 1023 for (i = numfrags(fs, osize); i < frags; i++) { 1024 clrbit(blksfree, bno + i); 1025 ufs_add32(cgp->cg_cs.cs_nffree, -1, UFS_FSNEEDSWAP(fs)); 1026 fs->fs_cstotal.cs_nffree--; 1027 fs->fs_cs(fs, cg).cs_nffree--; 1028 } 1029 fs->fs_fmod = 1; 1030 ACTIVECG_CLR(fs, cg); 1031 mutex_exit(&ump->um_lock); 1032 bdwrite(bp); 1033 return (bprev); 1034 1035 fail: 1036 brelse(bp, 0); 1037 mutex_enter(&ump->um_lock); 1038 return (0); 1039 } 1040 1041 /* 1042 * Determine whether a block can be allocated. 1043 * 1044 * Check to see if a block of the appropriate size is available, 1045 * and if it is, allocate it. 1046 */ 1047 static daddr_t 1048 ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size, int flags) 1049 { 1050 struct ufsmount *ump; 1051 struct fs *fs = ip->i_fs; 1052 struct cg *cgp; 1053 struct buf *bp; 1054 int32_t bno; 1055 daddr_t blkno; 1056 int error, frags, allocsiz, i; 1057 u_int8_t *blksfree; 1058 #ifdef FFS_EI 1059 const int needswap = UFS_FSNEEDSWAP(fs); 1060 #endif 1061 1062 ump = ip->i_ump; 1063 1064 KASSERT(mutex_owned(&ump->um_lock)); 1065 1066 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) 1067 return (0); 1068 mutex_exit(&ump->um_lock); 1069 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1070 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1071 if (error) 1072 goto fail; 1073 cgp = (struct cg *)bp->b_data; 1074 if (!cg_chkmagic(cgp, needswap) || 1075 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) 1076 goto fail; 1077 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1078 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1079 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1080 cgp->cg_time = ufs_rw64(time_second, needswap); 1081 if (size == fs->fs_bsize) { 1082 mutex_enter(&ump->um_lock); 1083 blkno = ffs_alloccgblk(ip, bp, bpref, flags); 1084 ACTIVECG_CLR(fs, cg); 1085 mutex_exit(&ump->um_lock); 1086 bdwrite(bp); 1087 return (blkno); 1088 } 1089 /* 1090 * check to see if any fragments are already available 1091 * allocsiz is the size which will be allocated, hacking 1092 * it down to a smaller size if necessary 1093 */ 1094 blksfree = cg_blksfree(cgp, needswap); 1095 frags = numfrags(fs, size); 1096 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) 1097 if (cgp->cg_frsum[allocsiz] != 0) 1098 break; 1099 if (allocsiz == fs->fs_frag) { 1100 /* 1101 * no fragments were available, so a block will be 1102 * allocated, and hacked up 1103 */ 1104 if (cgp->cg_cs.cs_nbfree == 0) 1105 goto fail; 1106 mutex_enter(&ump->um_lock); 1107 blkno = ffs_alloccgblk(ip, bp, bpref, flags); 1108 bno = dtogd(fs, blkno); 1109 for (i = frags; i < fs->fs_frag; i++) 1110 setbit(blksfree, bno + i); 1111 i = fs->fs_frag - frags; 1112 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap); 1113 fs->fs_cstotal.cs_nffree += i; 1114 fs->fs_cs(fs, cg).cs_nffree += i; 1115 fs->fs_fmod = 1; 1116 ufs_add32(cgp->cg_frsum[i], 1, needswap); 1117 ACTIVECG_CLR(fs, cg); 1118 mutex_exit(&ump->um_lock); 1119 bdwrite(bp); 1120 return (blkno); 1121 } 1122 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); 1123 #if 0 1124 /* 1125 * XXX fvdl mapsearch will panic, and never return -1 1126 * also: returning NULL as daddr_t ? 1127 */ 1128 if (bno < 0) 1129 goto fail; 1130 #endif 1131 for (i = 0; i < frags; i++) 1132 clrbit(blksfree, bno + i); 1133 mutex_enter(&ump->um_lock); 1134 ufs_add32(cgp->cg_cs.cs_nffree, -frags, needswap); 1135 fs->fs_cstotal.cs_nffree -= frags; 1136 fs->fs_cs(fs, cg).cs_nffree -= frags; 1137 fs->fs_fmod = 1; 1138 ufs_add32(cgp->cg_frsum[allocsiz], -1, needswap); 1139 if (frags != allocsiz) 1140 ufs_add32(cgp->cg_frsum[allocsiz - frags], 1, needswap); 1141 blkno = cgbase(fs, cg) + bno; 1142 ACTIVECG_CLR(fs, cg); 1143 mutex_exit(&ump->um_lock); 1144 bdwrite(bp); 1145 return blkno; 1146 1147 fail: 1148 brelse(bp, 0); 1149 mutex_enter(&ump->um_lock); 1150 return (0); 1151 } 1152 1153 /* 1154 * Allocate a block in a cylinder group. 1155 * 1156 * This algorithm implements the following policy: 1157 * 1) allocate the requested block. 1158 * 2) allocate a rotationally optimal block in the same cylinder. 1159 * 3) allocate the next available block on the block rotor for the 1160 * specified cylinder group. 1161 * Note that this routine only allocates fs_bsize blocks; these 1162 * blocks may be fragmented by the routine that allocates them. 1163 */ 1164 static daddr_t 1165 ffs_alloccgblk(struct inode *ip, struct buf *bp, daddr_t bpref, int flags) 1166 { 1167 struct ufsmount *ump; 1168 struct fs *fs = ip->i_fs; 1169 struct cg *cgp; 1170 int cg; 1171 daddr_t blkno; 1172 int32_t bno; 1173 u_int8_t *blksfree; 1174 #ifdef FFS_EI 1175 const int needswap = UFS_FSNEEDSWAP(fs); 1176 #endif 1177 1178 ump = ip->i_ump; 1179 1180 KASSERT(mutex_owned(&ump->um_lock)); 1181 1182 cgp = (struct cg *)bp->b_data; 1183 blksfree = cg_blksfree(cgp, needswap); 1184 if (bpref == 0 || dtog(fs, bpref) != ufs_rw32(cgp->cg_cgx, needswap)) { 1185 bpref = ufs_rw32(cgp->cg_rotor, needswap); 1186 } else { 1187 bpref = blknum(fs, bpref); 1188 bno = dtogd(fs, bpref); 1189 /* 1190 * if the requested block is available, use it 1191 */ 1192 if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) 1193 goto gotit; 1194 /* 1195 * if the requested data block isn't available and we are 1196 * trying to allocate a contiguous file, return an error. 1197 */ 1198 if ((flags & (B_CONTIG | B_METAONLY)) == B_CONTIG) 1199 return (0); 1200 } 1201 1202 /* 1203 * Take the next available block in this cylinder group. 1204 */ 1205 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); 1206 if (bno < 0) 1207 return (0); 1208 cgp->cg_rotor = ufs_rw32(bno, needswap); 1209 gotit: 1210 blkno = fragstoblks(fs, bno); 1211 ffs_clrblock(fs, blksfree, blkno); 1212 ffs_clusteracct(fs, cgp, blkno, -1); 1213 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1214 fs->fs_cstotal.cs_nbfree--; 1215 fs->fs_cs(fs, ufs_rw32(cgp->cg_cgx, needswap)).cs_nbfree--; 1216 if ((fs->fs_magic == FS_UFS1_MAGIC) && 1217 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1218 int cylno; 1219 cylno = old_cbtocylno(fs, bno); 1220 KASSERT(cylno >= 0); 1221 KASSERT(cylno < fs->fs_old_ncyl); 1222 KASSERT(old_cbtorpos(fs, bno) >= 0); 1223 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bno) < fs->fs_old_nrpos); 1224 ufs_add16(old_cg_blks(fs, cgp, cylno, needswap)[old_cbtorpos(fs, bno)], -1, 1225 needswap); 1226 ufs_add32(old_cg_blktot(cgp, needswap)[cylno], -1, needswap); 1227 } 1228 fs->fs_fmod = 1; 1229 cg = ufs_rw32(cgp->cg_cgx, needswap); 1230 blkno = cgbase(fs, cg) + bno; 1231 return (blkno); 1232 } 1233 1234 /* 1235 * Determine whether an inode can be allocated. 1236 * 1237 * Check to see if an inode is available, and if it is, 1238 * allocate it using the following policy: 1239 * 1) allocate the requested inode. 1240 * 2) allocate the next available inode after the requested 1241 * inode in the specified cylinder group. 1242 */ 1243 static daddr_t 1244 ffs_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode, int flags) 1245 { 1246 struct ufsmount *ump = ip->i_ump; 1247 struct fs *fs = ip->i_fs; 1248 struct cg *cgp; 1249 struct buf *bp, *ibp; 1250 u_int8_t *inosused; 1251 int error, start, len, loc, map, i; 1252 int32_t initediblk; 1253 daddr_t nalloc; 1254 struct ufs2_dinode *dp2; 1255 #ifdef FFS_EI 1256 const int needswap = UFS_FSNEEDSWAP(fs); 1257 #endif 1258 1259 KASSERT(mutex_owned(&ump->um_lock)); 1260 UFS_WAPBL_JLOCK_ASSERT(ip->i_ump->um_mountp); 1261 1262 if (fs->fs_cs(fs, cg).cs_nifree == 0) 1263 return (0); 1264 mutex_exit(&ump->um_lock); 1265 ibp = NULL; 1266 initediblk = -1; 1267 retry: 1268 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), 1269 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1270 if (error) 1271 goto fail; 1272 cgp = (struct cg *)bp->b_data; 1273 if (!cg_chkmagic(cgp, needswap) || cgp->cg_cs.cs_nifree == 0) 1274 goto fail; 1275 1276 if (ibp != NULL && 1277 initediblk != ufs_rw32(cgp->cg_initediblk, needswap)) { 1278 /* Another thread allocated more inodes so we retry the test. */ 1279 brelse(ibp, 0); 1280 ibp = NULL; 1281 } 1282 /* 1283 * Check to see if we need to initialize more inodes. 1284 */ 1285 if (fs->fs_magic == FS_UFS2_MAGIC && ibp == NULL) { 1286 initediblk = ufs_rw32(cgp->cg_initediblk, needswap); 1287 nalloc = fs->fs_ipg - ufs_rw32(cgp->cg_cs.cs_nifree, needswap); 1288 if (nalloc + INOPB(fs) > initediblk && 1289 initediblk < ufs_rw32(cgp->cg_niblk, needswap)) { 1290 /* 1291 * We have to release the cg buffer here to prevent 1292 * a deadlock when reading the inode block will 1293 * run a copy-on-write that might use this cg. 1294 */ 1295 brelse(bp, 0); 1296 bp = NULL; 1297 error = ffs_getblk(ip->i_devvp, fsbtodb(fs, 1298 ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)), 1299 FFS_NOBLK, fs->fs_bsize, false, &ibp); 1300 if (error) 1301 goto fail; 1302 goto retry; 1303 } 1304 } 1305 1306 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1307 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1308 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1309 cgp->cg_time = ufs_rw64(time_second, needswap); 1310 inosused = cg_inosused(cgp, needswap); 1311 if (ipref) { 1312 ipref %= fs->fs_ipg; 1313 if (isclr(inosused, ipref)) 1314 goto gotit; 1315 } 1316 start = ufs_rw32(cgp->cg_irotor, needswap) / NBBY; 1317 len = howmany(fs->fs_ipg - ufs_rw32(cgp->cg_irotor, needswap), 1318 NBBY); 1319 loc = skpc(0xff, len, &inosused[start]); 1320 if (loc == 0) { 1321 len = start + 1; 1322 start = 0; 1323 loc = skpc(0xff, len, &inosused[0]); 1324 if (loc == 0) { 1325 printf("cg = %d, irotor = %d, fs = %s\n", 1326 cg, ufs_rw32(cgp->cg_irotor, needswap), 1327 fs->fs_fsmnt); 1328 panic("ffs_nodealloccg: map corrupted"); 1329 /* NOTREACHED */ 1330 } 1331 } 1332 i = start + len - loc; 1333 map = inosused[i]; 1334 ipref = i * NBBY; 1335 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { 1336 if ((map & i) == 0) { 1337 cgp->cg_irotor = ufs_rw32(ipref, needswap); 1338 goto gotit; 1339 } 1340 } 1341 printf("fs = %s\n", fs->fs_fsmnt); 1342 panic("ffs_nodealloccg: block not in map"); 1343 /* NOTREACHED */ 1344 gotit: 1345 UFS_WAPBL_REGISTER_INODE(ip->i_ump->um_mountp, cg * fs->fs_ipg + ipref, 1346 mode); 1347 /* 1348 * Check to see if we need to initialize more inodes. 1349 */ 1350 if (ibp != NULL) { 1351 KASSERT(initediblk == ufs_rw32(cgp->cg_initediblk, needswap)); 1352 memset(ibp->b_data, 0, fs->fs_bsize); 1353 dp2 = (struct ufs2_dinode *)(ibp->b_data); 1354 for (i = 0; i < INOPB(fs); i++) { 1355 /* 1356 * Don't bother to swap, it's supposed to be 1357 * random, after all. 1358 */ 1359 dp2->di_gen = (arc4random() & INT32_MAX) / 2 + 1; 1360 dp2++; 1361 } 1362 initediblk += INOPB(fs); 1363 cgp->cg_initediblk = ufs_rw32(initediblk, needswap); 1364 } 1365 1366 mutex_enter(&ump->um_lock); 1367 ACTIVECG_CLR(fs, cg); 1368 setbit(inosused, ipref); 1369 ufs_add32(cgp->cg_cs.cs_nifree, -1, needswap); 1370 fs->fs_cstotal.cs_nifree--; 1371 fs->fs_cs(fs, cg).cs_nifree--; 1372 fs->fs_fmod = 1; 1373 if ((mode & IFMT) == IFDIR) { 1374 ufs_add32(cgp->cg_cs.cs_ndir, 1, needswap); 1375 fs->fs_cstotal.cs_ndir++; 1376 fs->fs_cs(fs, cg).cs_ndir++; 1377 } 1378 mutex_exit(&ump->um_lock); 1379 if (ibp != NULL) { 1380 bwrite(bp); 1381 bawrite(ibp); 1382 } else 1383 bdwrite(bp); 1384 return (cg * fs->fs_ipg + ipref); 1385 fail: 1386 if (bp != NULL) 1387 brelse(bp, 0); 1388 if (ibp != NULL) 1389 brelse(ibp, 0); 1390 mutex_enter(&ump->um_lock); 1391 return (0); 1392 } 1393 1394 /* 1395 * Allocate a block or fragment. 1396 * 1397 * The specified block or fragment is removed from the 1398 * free map, possibly fragmenting a block in the process. 1399 * 1400 * This implementation should mirror fs_blkfree 1401 * 1402 * => um_lock not held on entry or exit 1403 */ 1404 int 1405 ffs_blkalloc(struct inode *ip, daddr_t bno, long size) 1406 { 1407 int error; 1408 1409 error = ffs_check_bad_allocation(__func__, ip->i_fs, bno, size, 1410 ip->i_dev, ip->i_uid); 1411 if (error) 1412 return error; 1413 1414 return ffs_blkalloc_ump(ip->i_ump, bno, size); 1415 } 1416 1417 int 1418 ffs_blkalloc_ump(struct ufsmount *ump, daddr_t bno, long size) 1419 { 1420 struct fs *fs = ump->um_fs; 1421 struct cg *cgp; 1422 struct buf *bp; 1423 int32_t fragno, cgbno; 1424 int i, error, cg, blk, frags, bbase; 1425 u_int8_t *blksfree; 1426 const int needswap = UFS_FSNEEDSWAP(fs); 1427 1428 KASSERT((u_int)size <= fs->fs_bsize && fragoff(fs, size) == 0 && 1429 fragnum(fs, bno) + numfrags(fs, size) <= fs->fs_frag); 1430 KASSERT(bno < fs->fs_size); 1431 1432 cg = dtog(fs, bno); 1433 error = bread(ump->um_devvp, fsbtodb(fs, cgtod(fs, cg)), 1434 (int)fs->fs_cgsize, NOCRED, B_MODIFY, &bp); 1435 if (error) { 1436 brelse(bp, 0); 1437 return error; 1438 } 1439 cgp = (struct cg *)bp->b_data; 1440 if (!cg_chkmagic(cgp, needswap)) { 1441 brelse(bp, 0); 1442 return EIO; 1443 } 1444 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1445 cgp->cg_time = ufs_rw64(time_second, needswap); 1446 cgbno = dtogd(fs, bno); 1447 blksfree = cg_blksfree(cgp, needswap); 1448 1449 mutex_enter(&ump->um_lock); 1450 if (size == fs->fs_bsize) { 1451 fragno = fragstoblks(fs, cgbno); 1452 if (!ffs_isblock(fs, blksfree, fragno)) { 1453 mutex_exit(&ump->um_lock); 1454 brelse(bp, 0); 1455 return EBUSY; 1456 } 1457 ffs_clrblock(fs, blksfree, fragno); 1458 ffs_clusteracct(fs, cgp, fragno, -1); 1459 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1460 fs->fs_cstotal.cs_nbfree--; 1461 fs->fs_cs(fs, cg).cs_nbfree--; 1462 } else { 1463 bbase = cgbno - fragnum(fs, cgbno); 1464 1465 frags = numfrags(fs, size); 1466 for (i = 0; i < frags; i++) { 1467 if (isclr(blksfree, cgbno + i)) { 1468 mutex_exit(&ump->um_lock); 1469 brelse(bp, 0); 1470 return EBUSY; 1471 } 1472 } 1473 /* 1474 * if a complete block is being split, account for it 1475 */ 1476 fragno = fragstoblks(fs, bbase); 1477 if (ffs_isblock(fs, blksfree, fragno)) { 1478 ufs_add32(cgp->cg_cs.cs_nffree, fs->fs_frag, needswap); 1479 fs->fs_cstotal.cs_nffree += fs->fs_frag; 1480 fs->fs_cs(fs, cg).cs_nffree += fs->fs_frag; 1481 ffs_clusteracct(fs, cgp, fragno, -1); 1482 ufs_add32(cgp->cg_cs.cs_nbfree, -1, needswap); 1483 fs->fs_cstotal.cs_nbfree--; 1484 fs->fs_cs(fs, cg).cs_nbfree--; 1485 } 1486 /* 1487 * decrement the counts associated with the old frags 1488 */ 1489 blk = blkmap(fs, blksfree, bbase); 1490 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap); 1491 /* 1492 * allocate the fragment 1493 */ 1494 for (i = 0; i < frags; i++) { 1495 clrbit(blksfree, cgbno + i); 1496 } 1497 ufs_add32(cgp->cg_cs.cs_nffree, -i, needswap); 1498 fs->fs_cstotal.cs_nffree -= i; 1499 fs->fs_cs(fs, cg).cs_nffree -= i; 1500 /* 1501 * add back in counts associated with the new frags 1502 */ 1503 blk = blkmap(fs, blksfree, bbase); 1504 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap); 1505 } 1506 fs->fs_fmod = 1; 1507 ACTIVECG_CLR(fs, cg); 1508 mutex_exit(&ump->um_lock); 1509 bdwrite(bp); 1510 return 0; 1511 } 1512 1513 /* 1514 * Free a block or fragment. 1515 * 1516 * The specified block or fragment is placed back in the 1517 * free map. If a fragment is deallocated, a possible 1518 * block reassembly is checked. 1519 * 1520 * => um_lock not held on entry or exit 1521 */ 1522 void 1523 ffs_blkfree(struct fs *fs, struct vnode *devvp, daddr_t bno, long size, 1524 ino_t inum) 1525 { 1526 struct cg *cgp; 1527 struct buf *bp; 1528 struct ufsmount *ump; 1529 daddr_t cgblkno; 1530 int error, cg; 1531 dev_t dev; 1532 const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1533 #ifdef FFS_EI 1534 const int needswap = UFS_FSNEEDSWAP(fs); 1535 #endif 1536 1537 KASSERT(!devvp_is_snapshot); 1538 1539 cg = dtog(fs, bno); 1540 dev = devvp->v_rdev; 1541 ump = VFSTOUFS(devvp->v_specmountpoint); 1542 KASSERT(fs == ump->um_fs); 1543 cgblkno = fsbtodb(fs, cgtod(fs, cg)); 1544 if (ffs_snapblkfree(fs, devvp, bno, size, inum)) 1545 return; 1546 1547 error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum); 1548 if (error) 1549 return; 1550 1551 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, 1552 NOCRED, B_MODIFY, &bp); 1553 if (error) { 1554 brelse(bp, 0); 1555 return; 1556 } 1557 cgp = (struct cg *)bp->b_data; 1558 if (!cg_chkmagic(cgp, needswap)) { 1559 brelse(bp, 0); 1560 return; 1561 } 1562 1563 ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot); 1564 1565 bdwrite(bp); 1566 } 1567 1568 /* 1569 * Free a block or fragment from a snapshot cg copy. 1570 * 1571 * The specified block or fragment is placed back in the 1572 * free map. If a fragment is deallocated, a possible 1573 * block reassembly is checked. 1574 * 1575 * => um_lock not held on entry or exit 1576 */ 1577 void 1578 ffs_blkfree_snap(struct fs *fs, struct vnode *devvp, daddr_t bno, long size, 1579 ino_t inum) 1580 { 1581 struct cg *cgp; 1582 struct buf *bp; 1583 struct ufsmount *ump; 1584 daddr_t cgblkno; 1585 int error, cg; 1586 dev_t dev; 1587 const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1588 #ifdef FFS_EI 1589 const int needswap = UFS_FSNEEDSWAP(fs); 1590 #endif 1591 1592 KASSERT(devvp_is_snapshot); 1593 1594 cg = dtog(fs, bno); 1595 dev = VTOI(devvp)->i_devvp->v_rdev; 1596 ump = VFSTOUFS(devvp->v_mount); 1597 cgblkno = fragstoblks(fs, cgtod(fs, cg)); 1598 1599 error = ffs_check_bad_allocation(__func__, fs, bno, size, dev, inum); 1600 if (error) 1601 return; 1602 1603 error = bread(devvp, cgblkno, (int)fs->fs_cgsize, 1604 NOCRED, B_MODIFY, &bp); 1605 if (error) { 1606 brelse(bp, 0); 1607 return; 1608 } 1609 cgp = (struct cg *)bp->b_data; 1610 if (!cg_chkmagic(cgp, needswap)) { 1611 brelse(bp, 0); 1612 return; 1613 } 1614 1615 ffs_blkfree_common(ump, fs, dev, bp, bno, size, devvp_is_snapshot); 1616 1617 bdwrite(bp); 1618 } 1619 1620 static void 1621 ffs_blkfree_common(struct ufsmount *ump, struct fs *fs, dev_t dev, 1622 struct buf *bp, daddr_t bno, long size, bool devvp_is_snapshot) 1623 { 1624 struct cg *cgp; 1625 int32_t fragno, cgbno; 1626 int i, cg, blk, frags, bbase; 1627 u_int8_t *blksfree; 1628 const int needswap = UFS_FSNEEDSWAP(fs); 1629 1630 cg = dtog(fs, bno); 1631 cgp = (struct cg *)bp->b_data; 1632 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1633 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1634 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1635 cgp->cg_time = ufs_rw64(time_second, needswap); 1636 cgbno = dtogd(fs, bno); 1637 blksfree = cg_blksfree(cgp, needswap); 1638 mutex_enter(&ump->um_lock); 1639 if (size == fs->fs_bsize) { 1640 fragno = fragstoblks(fs, cgbno); 1641 if (!ffs_isfreeblock(fs, blksfree, fragno)) { 1642 if (devvp_is_snapshot) { 1643 mutex_exit(&ump->um_lock); 1644 return; 1645 } 1646 printf("dev = 0x%llx, block = %" PRId64 ", fs = %s\n", 1647 (unsigned long long)dev, bno, fs->fs_fsmnt); 1648 panic("blkfree: freeing free block"); 1649 } 1650 ffs_setblock(fs, blksfree, fragno); 1651 ffs_clusteracct(fs, cgp, fragno, 1); 1652 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap); 1653 fs->fs_cstotal.cs_nbfree++; 1654 fs->fs_cs(fs, cg).cs_nbfree++; 1655 if ((fs->fs_magic == FS_UFS1_MAGIC) && 1656 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1657 i = old_cbtocylno(fs, cgbno); 1658 KASSERT(i >= 0); 1659 KASSERT(i < fs->fs_old_ncyl); 1660 KASSERT(old_cbtorpos(fs, cgbno) >= 0); 1661 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, cgbno) < fs->fs_old_nrpos); 1662 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, cgbno)], 1, 1663 needswap); 1664 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap); 1665 } 1666 } else { 1667 bbase = cgbno - fragnum(fs, cgbno); 1668 /* 1669 * decrement the counts associated with the old frags 1670 */ 1671 blk = blkmap(fs, blksfree, bbase); 1672 ffs_fragacct(fs, blk, cgp->cg_frsum, -1, needswap); 1673 /* 1674 * deallocate the fragment 1675 */ 1676 frags = numfrags(fs, size); 1677 for (i = 0; i < frags; i++) { 1678 if (isset(blksfree, cgbno + i)) { 1679 printf("dev = 0x%llx, block = %" PRId64 1680 ", fs = %s\n", 1681 (unsigned long long)dev, bno + i, 1682 fs->fs_fsmnt); 1683 panic("blkfree: freeing free frag"); 1684 } 1685 setbit(blksfree, cgbno + i); 1686 } 1687 ufs_add32(cgp->cg_cs.cs_nffree, i, needswap); 1688 fs->fs_cstotal.cs_nffree += i; 1689 fs->fs_cs(fs, cg).cs_nffree += i; 1690 /* 1691 * add back in counts associated with the new frags 1692 */ 1693 blk = blkmap(fs, blksfree, bbase); 1694 ffs_fragacct(fs, blk, cgp->cg_frsum, 1, needswap); 1695 /* 1696 * if a complete block has been reassembled, account for it 1697 */ 1698 fragno = fragstoblks(fs, bbase); 1699 if (ffs_isblock(fs, blksfree, fragno)) { 1700 ufs_add32(cgp->cg_cs.cs_nffree, -fs->fs_frag, needswap); 1701 fs->fs_cstotal.cs_nffree -= fs->fs_frag; 1702 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; 1703 ffs_clusteracct(fs, cgp, fragno, 1); 1704 ufs_add32(cgp->cg_cs.cs_nbfree, 1, needswap); 1705 fs->fs_cstotal.cs_nbfree++; 1706 fs->fs_cs(fs, cg).cs_nbfree++; 1707 if ((fs->fs_magic == FS_UFS1_MAGIC) && 1708 ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0)) { 1709 i = old_cbtocylno(fs, bbase); 1710 KASSERT(i >= 0); 1711 KASSERT(i < fs->fs_old_ncyl); 1712 KASSERT(old_cbtorpos(fs, bbase) >= 0); 1713 KASSERT(fs->fs_old_nrpos == 0 || old_cbtorpos(fs, bbase) < fs->fs_old_nrpos); 1714 ufs_add16(old_cg_blks(fs, cgp, i, needswap)[old_cbtorpos(fs, 1715 bbase)], 1, needswap); 1716 ufs_add32(old_cg_blktot(cgp, needswap)[i], 1, needswap); 1717 } 1718 } 1719 } 1720 fs->fs_fmod = 1; 1721 ACTIVECG_CLR(fs, cg); 1722 mutex_exit(&ump->um_lock); 1723 } 1724 1725 /* 1726 * Free an inode. 1727 */ 1728 int 1729 ffs_vfree(struct vnode *vp, ino_t ino, int mode) 1730 { 1731 1732 return ffs_freefile(vp->v_mount, ino, mode); 1733 } 1734 1735 /* 1736 * Do the actual free operation. 1737 * The specified inode is placed back in the free map. 1738 * 1739 * => um_lock not held on entry or exit 1740 */ 1741 int 1742 ffs_freefile(struct mount *mp, ino_t ino, int mode) 1743 { 1744 struct ufsmount *ump = VFSTOUFS(mp); 1745 struct fs *fs = ump->um_fs; 1746 struct vnode *devvp; 1747 struct cg *cgp; 1748 struct buf *bp; 1749 int error, cg; 1750 daddr_t cgbno; 1751 dev_t dev; 1752 #ifdef FFS_EI 1753 const int needswap = UFS_FSNEEDSWAP(fs); 1754 #endif 1755 1756 cg = ino_to_cg(fs, ino); 1757 devvp = ump->um_devvp; 1758 dev = devvp->v_rdev; 1759 cgbno = fsbtodb(fs, cgtod(fs, cg)); 1760 1761 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1762 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s", 1763 (long long)dev, (unsigned long long)ino, fs->fs_fsmnt); 1764 error = bread(devvp, cgbno, (int)fs->fs_cgsize, 1765 NOCRED, B_MODIFY, &bp); 1766 if (error) { 1767 brelse(bp, 0); 1768 return (error); 1769 } 1770 cgp = (struct cg *)bp->b_data; 1771 if (!cg_chkmagic(cgp, needswap)) { 1772 brelse(bp, 0); 1773 return (0); 1774 } 1775 1776 ffs_freefile_common(ump, fs, dev, bp, ino, mode, false); 1777 1778 bdwrite(bp); 1779 1780 return 0; 1781 } 1782 1783 int 1784 ffs_freefile_snap(struct fs *fs, struct vnode *devvp, ino_t ino, int mode) 1785 { 1786 struct ufsmount *ump; 1787 struct cg *cgp; 1788 struct buf *bp; 1789 int error, cg; 1790 daddr_t cgbno; 1791 dev_t dev; 1792 #ifdef FFS_EI 1793 const int needswap = UFS_FSNEEDSWAP(fs); 1794 #endif 1795 1796 KASSERT(devvp->v_type != VBLK); 1797 1798 cg = ino_to_cg(fs, ino); 1799 dev = VTOI(devvp)->i_devvp->v_rdev; 1800 ump = VFSTOUFS(devvp->v_mount); 1801 cgbno = fragstoblks(fs, cgtod(fs, cg)); 1802 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1803 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s", 1804 (unsigned long long)dev, (unsigned long long)ino, 1805 fs->fs_fsmnt); 1806 error = bread(devvp, cgbno, (int)fs->fs_cgsize, 1807 NOCRED, B_MODIFY, &bp); 1808 if (error) { 1809 brelse(bp, 0); 1810 return (error); 1811 } 1812 cgp = (struct cg *)bp->b_data; 1813 if (!cg_chkmagic(cgp, needswap)) { 1814 brelse(bp, 0); 1815 return (0); 1816 } 1817 ffs_freefile_common(ump, fs, dev, bp, ino, mode, true); 1818 1819 bdwrite(bp); 1820 1821 return 0; 1822 } 1823 1824 static void 1825 ffs_freefile_common(struct ufsmount *ump, struct fs *fs, dev_t dev, 1826 struct buf *bp, ino_t ino, int mode, bool devvp_is_snapshot) 1827 { 1828 int cg; 1829 struct cg *cgp; 1830 u_int8_t *inosused; 1831 #ifdef FFS_EI 1832 const int needswap = UFS_FSNEEDSWAP(fs); 1833 #endif 1834 1835 cg = ino_to_cg(fs, ino); 1836 cgp = (struct cg *)bp->b_data; 1837 cgp->cg_old_time = ufs_rw32(time_second, needswap); 1838 if ((fs->fs_magic != FS_UFS1_MAGIC) || 1839 (fs->fs_old_flags & FS_FLAGS_UPDATED)) 1840 cgp->cg_time = ufs_rw64(time_second, needswap); 1841 inosused = cg_inosused(cgp, needswap); 1842 ino %= fs->fs_ipg; 1843 if (isclr(inosused, ino)) { 1844 printf("ifree: dev = 0x%llx, ino = %llu, fs = %s\n", 1845 (unsigned long long)dev, (unsigned long long)ino + 1846 cg * fs->fs_ipg, fs->fs_fsmnt); 1847 if (fs->fs_ronly == 0) 1848 panic("ifree: freeing free inode"); 1849 } 1850 clrbit(inosused, ino); 1851 if (!devvp_is_snapshot) 1852 UFS_WAPBL_UNREGISTER_INODE(ump->um_mountp, 1853 ino + cg * fs->fs_ipg, mode); 1854 if (ino < ufs_rw32(cgp->cg_irotor, needswap)) 1855 cgp->cg_irotor = ufs_rw32(ino, needswap); 1856 ufs_add32(cgp->cg_cs.cs_nifree, 1, needswap); 1857 mutex_enter(&ump->um_lock); 1858 fs->fs_cstotal.cs_nifree++; 1859 fs->fs_cs(fs, cg).cs_nifree++; 1860 if ((mode & IFMT) == IFDIR) { 1861 ufs_add32(cgp->cg_cs.cs_ndir, -1, needswap); 1862 fs->fs_cstotal.cs_ndir--; 1863 fs->fs_cs(fs, cg).cs_ndir--; 1864 } 1865 fs->fs_fmod = 1; 1866 ACTIVECG_CLR(fs, cg); 1867 mutex_exit(&ump->um_lock); 1868 } 1869 1870 /* 1871 * Check to see if a file is free. 1872 */ 1873 int 1874 ffs_checkfreefile(struct fs *fs, struct vnode *devvp, ino_t ino) 1875 { 1876 struct cg *cgp; 1877 struct buf *bp; 1878 daddr_t cgbno; 1879 int ret, cg; 1880 u_int8_t *inosused; 1881 const bool devvp_is_snapshot = (devvp->v_type != VBLK); 1882 1883 KASSERT(devvp_is_snapshot); 1884 1885 cg = ino_to_cg(fs, ino); 1886 if (devvp_is_snapshot) 1887 cgbno = fragstoblks(fs, cgtod(fs, cg)); 1888 else 1889 cgbno = fsbtodb(fs, cgtod(fs, cg)); 1890 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) 1891 return 1; 1892 if (bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, 0, &bp)) { 1893 brelse(bp, 0); 1894 return 1; 1895 } 1896 cgp = (struct cg *)bp->b_data; 1897 if (!cg_chkmagic(cgp, UFS_FSNEEDSWAP(fs))) { 1898 brelse(bp, 0); 1899 return 1; 1900 } 1901 inosused = cg_inosused(cgp, UFS_FSNEEDSWAP(fs)); 1902 ino %= fs->fs_ipg; 1903 ret = isclr(inosused, ino); 1904 brelse(bp, 0); 1905 return ret; 1906 } 1907 1908 /* 1909 * Find a block of the specified size in the specified cylinder group. 1910 * 1911 * It is a panic if a request is made to find a block if none are 1912 * available. 1913 */ 1914 static int32_t 1915 ffs_mapsearch(struct fs *fs, struct cg *cgp, daddr_t bpref, int allocsiz) 1916 { 1917 int32_t bno; 1918 int start, len, loc, i; 1919 int blk, field, subfield, pos; 1920 int ostart, olen; 1921 u_int8_t *blksfree; 1922 #ifdef FFS_EI 1923 const int needswap = UFS_FSNEEDSWAP(fs); 1924 #endif 1925 1926 /* KASSERT(mutex_owned(&ump->um_lock)); */ 1927 1928 /* 1929 * find the fragment by searching through the free block 1930 * map for an appropriate bit pattern 1931 */ 1932 if (bpref) 1933 start = dtogd(fs, bpref) / NBBY; 1934 else 1935 start = ufs_rw32(cgp->cg_frotor, needswap) / NBBY; 1936 blksfree = cg_blksfree(cgp, needswap); 1937 len = howmany(fs->fs_fpg, NBBY) - start; 1938 ostart = start; 1939 olen = len; 1940 loc = scanc((u_int)len, 1941 (const u_char *)&blksfree[start], 1942 (const u_char *)fragtbl[fs->fs_frag], 1943 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1))))); 1944 if (loc == 0) { 1945 len = start + 1; 1946 start = 0; 1947 loc = scanc((u_int)len, 1948 (const u_char *)&blksfree[0], 1949 (const u_char *)fragtbl[fs->fs_frag], 1950 (1 << (allocsiz - 1 + (fs->fs_frag & (NBBY - 1))))); 1951 if (loc == 0) { 1952 printf("start = %d, len = %d, fs = %s\n", 1953 ostart, olen, fs->fs_fsmnt); 1954 printf("offset=%d %ld\n", 1955 ufs_rw32(cgp->cg_freeoff, needswap), 1956 (long)blksfree - (long)cgp); 1957 printf("cg %d\n", cgp->cg_cgx); 1958 panic("ffs_alloccg: map corrupted"); 1959 /* NOTREACHED */ 1960 } 1961 } 1962 bno = (start + len - loc) * NBBY; 1963 cgp->cg_frotor = ufs_rw32(bno, needswap); 1964 /* 1965 * found the byte in the map 1966 * sift through the bits to find the selected frag 1967 */ 1968 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { 1969 blk = blkmap(fs, blksfree, bno); 1970 blk <<= 1; 1971 field = around[allocsiz]; 1972 subfield = inside[allocsiz]; 1973 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { 1974 if ((blk & field) == subfield) 1975 return (bno + pos); 1976 field <<= 1; 1977 subfield <<= 1; 1978 } 1979 } 1980 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt); 1981 panic("ffs_alloccg: block not in map"); 1982 /* return (-1); */ 1983 } 1984 1985 /* 1986 * Update the cluster map because of an allocation or free. 1987 * 1988 * Cnt == 1 means free; cnt == -1 means allocating. 1989 */ 1990 void 1991 ffs_clusteracct(struct fs *fs, struct cg *cgp, int32_t blkno, int cnt) 1992 { 1993 int32_t *sump; 1994 int32_t *lp; 1995 u_char *freemapp, *mapp; 1996 int i, start, end, forw, back, map, bit; 1997 #ifdef FFS_EI 1998 const int needswap = UFS_FSNEEDSWAP(fs); 1999 #endif 2000 2001 /* KASSERT(mutex_owned(&ump->um_lock)); */ 2002 2003 if (fs->fs_contigsumsize <= 0) 2004 return; 2005 freemapp = cg_clustersfree(cgp, needswap); 2006 sump = cg_clustersum(cgp, needswap); 2007 /* 2008 * Allocate or clear the actual block. 2009 */ 2010 if (cnt > 0) 2011 setbit(freemapp, blkno); 2012 else 2013 clrbit(freemapp, blkno); 2014 /* 2015 * Find the size of the cluster going forward. 2016 */ 2017 start = blkno + 1; 2018 end = start + fs->fs_contigsumsize; 2019 if (end >= ufs_rw32(cgp->cg_nclusterblks, needswap)) 2020 end = ufs_rw32(cgp->cg_nclusterblks, needswap); 2021 mapp = &freemapp[start / NBBY]; 2022 map = *mapp++; 2023 bit = 1 << (start % NBBY); 2024 for (i = start; i < end; i++) { 2025 if ((map & bit) == 0) 2026 break; 2027 if ((i & (NBBY - 1)) != (NBBY - 1)) { 2028 bit <<= 1; 2029 } else { 2030 map = *mapp++; 2031 bit = 1; 2032 } 2033 } 2034 forw = i - start; 2035 /* 2036 * Find the size of the cluster going backward. 2037 */ 2038 start = blkno - 1; 2039 end = start - fs->fs_contigsumsize; 2040 if (end < 0) 2041 end = -1; 2042 mapp = &freemapp[start / NBBY]; 2043 map = *mapp--; 2044 bit = 1 << (start % NBBY); 2045 for (i = start; i > end; i--) { 2046 if ((map & bit) == 0) 2047 break; 2048 if ((i & (NBBY - 1)) != 0) { 2049 bit >>= 1; 2050 } else { 2051 map = *mapp--; 2052 bit = 1 << (NBBY - 1); 2053 } 2054 } 2055 back = start - i; 2056 /* 2057 * Account for old cluster and the possibly new forward and 2058 * back clusters. 2059 */ 2060 i = back + forw + 1; 2061 if (i > fs->fs_contigsumsize) 2062 i = fs->fs_contigsumsize; 2063 ufs_add32(sump[i], cnt, needswap); 2064 if (back > 0) 2065 ufs_add32(sump[back], -cnt, needswap); 2066 if (forw > 0) 2067 ufs_add32(sump[forw], -cnt, needswap); 2068 2069 /* 2070 * Update cluster summary information. 2071 */ 2072 lp = &sump[fs->fs_contigsumsize]; 2073 for (i = fs->fs_contigsumsize; i > 0; i--) 2074 if (ufs_rw32(*lp--, needswap) > 0) 2075 break; 2076 fs->fs_maxcluster[ufs_rw32(cgp->cg_cgx, needswap)] = i; 2077 } 2078 2079 /* 2080 * Fserr prints the name of a file system with an error diagnostic. 2081 * 2082 * The form of the error message is: 2083 * fs: error message 2084 */ 2085 static void 2086 ffs_fserr(struct fs *fs, u_int uid, const char *cp) 2087 { 2088 2089 log(LOG_ERR, "uid %d, pid %d, command %s, on %s: %s\n", 2090 uid, curproc->p_pid, curproc->p_comm, fs->fs_fsmnt, cp); 2091 } 2092