1 /* $NetBSD: lfs_bio.c,v 1.135 2015/10/03 09:31:29 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 /* 32 * Copyright (c) 1991, 1993 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.135 2015/10/03 09:31:29 hannken Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/proc.h> 68 #include <sys/buf.h> 69 #include <sys/vnode.h> 70 #include <sys/resourcevar.h> 71 #include <sys/mount.h> 72 #include <sys/kernel.h> 73 #include <sys/kauth.h> 74 75 #include <ufs/lfs/ulfs_inode.h> 76 #include <ufs/lfs/ulfsmount.h> 77 #include <ufs/lfs/ulfs_extern.h> 78 79 #include <ufs/lfs/lfs.h> 80 #include <ufs/lfs/lfs_accessors.h> 81 #include <ufs/lfs/lfs_extern.h> 82 #include <ufs/lfs/lfs_kernel.h> 83 84 #include <uvm/uvm.h> 85 86 /* 87 * LFS block write function. 88 * 89 * XXX 90 * No write cost accounting is done. 91 * This is almost certainly wrong for synchronous operations and NFS. 92 * 93 * protected by lfs_lock. 94 */ 95 int locked_queue_count = 0; /* Count of locked-down buffers. */ 96 long locked_queue_bytes = 0L; /* Total size of locked buffers. */ 97 int lfs_subsys_pages = 0L; /* Total number LFS-written pages */ 98 int lfs_fs_pagetrip = 0; /* # of pages to trip per-fs write */ 99 int lfs_writing = 0; /* Set if already kicked off a writer 100 because of buffer space */ 101 int locked_queue_waiters = 0; /* Number of processes waiting on lq */ 102 103 /* Lock and condition variables for above. */ 104 kcondvar_t locked_queue_cv; 105 kcondvar_t lfs_writing_cv; 106 kmutex_t lfs_lock; 107 108 extern int lfs_dostats; 109 110 /* 111 * reserved number/bytes of locked buffers 112 */ 113 int locked_queue_rcount = 0; 114 long locked_queue_rbytes = 0L; 115 116 static int lfs_fits_buf(struct lfs *, int, int); 117 static int lfs_reservebuf(struct lfs *, struct vnode *vp, struct vnode *vp2, 118 int, int); 119 static int lfs_reserveavail(struct lfs *, struct vnode *vp, struct vnode *vp2, 120 int); 121 122 static int 123 lfs_fits_buf(struct lfs *fs, int n, int bytes) 124 { 125 int count_fit, bytes_fit; 126 127 ASSERT_NO_SEGLOCK(fs); 128 KASSERT(mutex_owned(&lfs_lock)); 129 130 count_fit = 131 (locked_queue_count + locked_queue_rcount + n <= LFS_WAIT_BUFS); 132 bytes_fit = 133 (locked_queue_bytes + locked_queue_rbytes + bytes <= LFS_WAIT_BYTES); 134 135 #ifdef DEBUG 136 if (!count_fit) { 137 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit count: %d + %d + %d >= %d\n", 138 locked_queue_count, locked_queue_rcount, 139 n, LFS_WAIT_BUFS)); 140 } 141 if (!bytes_fit) { 142 DLOG((DLOG_AVAIL, "lfs_fits_buf: no fit bytes: %ld + %ld + %d >= %ld\n", 143 locked_queue_bytes, locked_queue_rbytes, 144 bytes, LFS_WAIT_BYTES)); 145 } 146 #endif /* DEBUG */ 147 148 return (count_fit && bytes_fit); 149 } 150 151 /* ARGSUSED */ 152 static int 153 lfs_reservebuf(struct lfs *fs, struct vnode *vp, 154 struct vnode *vp2, int n, int bytes) 155 { 156 int cantwait; 157 158 ASSERT_MAYBE_SEGLOCK(fs); 159 KASSERT(locked_queue_rcount >= 0); 160 KASSERT(locked_queue_rbytes >= 0); 161 162 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp; 163 mutex_enter(&lfs_lock); 164 while (!cantwait && n > 0 && !lfs_fits_buf(fs, n, bytes)) { 165 int error; 166 167 lfs_flush(fs, 0, 0); 168 169 DLOG((DLOG_AVAIL, "lfs_reservebuf: waiting: count=%d, bytes=%ld\n", 170 locked_queue_count, locked_queue_bytes)); 171 ++locked_queue_waiters; 172 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 173 hz * LFS_BUFWAIT); 174 --locked_queue_waiters; 175 if (error && error != EWOULDBLOCK) { 176 mutex_exit(&lfs_lock); 177 return error; 178 } 179 } 180 181 locked_queue_rcount += n; 182 locked_queue_rbytes += bytes; 183 184 if (n < 0 && locked_queue_waiters > 0) { 185 DLOG((DLOG_AVAIL, "lfs_reservebuf: broadcast: count=%d, bytes=%ld\n", 186 locked_queue_count, locked_queue_bytes)); 187 cv_broadcast(&locked_queue_cv); 188 } 189 190 mutex_exit(&lfs_lock); 191 192 KASSERT(locked_queue_rcount >= 0); 193 KASSERT(locked_queue_rbytes >= 0); 194 195 return 0; 196 } 197 198 /* 199 * Try to reserve some blocks, prior to performing a sensitive operation that 200 * requires the vnode lock to be honored. If there is not enough space, wait 201 * for the space to become available. 202 * 203 * Called with vp locked. (Note nowever that if fsb < 0, vp is ignored.) 204 */ 205 static int 206 lfs_reserveavail(struct lfs *fs, struct vnode *vp, 207 struct vnode *vp2, int fsb) 208 { 209 CLEANERINFO *cip; 210 struct buf *bp; 211 int error, slept; 212 int cantwait; 213 214 ASSERT_MAYBE_SEGLOCK(fs); 215 slept = 0; 216 mutex_enter(&lfs_lock); 217 cantwait = (VTOI(vp)->i_flag & IN_ADIROP) || fs->lfs_unlockvp == vp; 218 while (!cantwait && fsb > 0 && 219 !lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) { 220 mutex_exit(&lfs_lock); 221 222 if (!slept) { 223 DLOG((DLOG_AVAIL, "lfs_reserve: waiting for %ld (bfree = %jd," 224 " est_bfree = %jd)\n", 225 fsb + fs->lfs_ravail + fs->lfs_favail, 226 (intmax_t)lfs_sb_getbfree(fs), 227 (intmax_t)LFS_EST_BFREE(fs))); 228 } 229 ++slept; 230 231 /* Wake up the cleaner */ 232 LFS_CLEANERINFO(cip, fs, bp); 233 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 234 lfs_wakeup_cleaner(fs); 235 236 mutex_enter(&lfs_lock); 237 /* Cleaner might have run while we were reading, check again */ 238 if (lfs_fits(fs, fsb + fs->lfs_ravail + fs->lfs_favail)) 239 break; 240 241 error = mtsleep(&fs->lfs_availsleep, PCATCH | PUSER, 242 "lfs_reserve", 0, &lfs_lock); 243 if (error) { 244 mutex_exit(&lfs_lock); 245 return error; 246 } 247 } 248 #ifdef DEBUG 249 if (slept) { 250 DLOG((DLOG_AVAIL, "lfs_reserve: woke up\n")); 251 } 252 #endif 253 fs->lfs_ravail += fsb; 254 mutex_exit(&lfs_lock); 255 256 return 0; 257 } 258 259 #ifdef DIAGNOSTIC 260 int lfs_rescount; 261 int lfs_rescountdirop; 262 #endif 263 264 int 265 lfs_reserve(struct lfs *fs, struct vnode *vp, struct vnode *vp2, int fsb) 266 { 267 int error; 268 269 ASSERT_MAYBE_SEGLOCK(fs); 270 if (vp2) { 271 /* Make sure we're not in the process of reclaiming vp2 */ 272 mutex_enter(&lfs_lock); 273 while(fs->lfs_flags & LFS_UNDIROP) { 274 mtsleep(&fs->lfs_flags, PRIBIO + 1, "lfsrundirop", 0, 275 &lfs_lock); 276 } 277 mutex_exit(&lfs_lock); 278 } 279 280 KASSERT(fsb < 0 || VOP_ISLOCKED(vp)); 281 KASSERT(vp2 == NULL || fsb < 0 || VOP_ISLOCKED(vp2)); 282 KASSERT(vp2 == NULL || vp2 != fs->lfs_unlockvp); 283 284 #ifdef DIAGNOSTIC 285 mutex_enter(&lfs_lock); 286 if (fsb > 0) 287 lfs_rescount++; 288 else if (fsb < 0) 289 lfs_rescount--; 290 if (lfs_rescount < 0) 291 panic("lfs_rescount"); 292 mutex_exit(&lfs_lock); 293 #endif 294 295 error = lfs_reserveavail(fs, vp, vp2, fsb); 296 if (error) 297 return error; 298 299 /* 300 * XXX just a guess. should be more precise. 301 */ 302 error = lfs_reservebuf(fs, vp, vp2, fsb, lfs_fsbtob(fs, fsb)); 303 if (error) 304 lfs_reserveavail(fs, vp, vp2, -fsb); 305 306 return error; 307 } 308 309 int 310 lfs_bwrite(void *v) 311 { 312 struct vop_bwrite_args /* { 313 struct vnode *a_vp; 314 struct buf *a_bp; 315 } */ *ap = v; 316 struct buf *bp = ap->a_bp; 317 318 #ifdef DIAGNOSTIC 319 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == 0 && (bp->b_flags & B_ASYNC)) { 320 panic("bawrite LFS buffer"); 321 } 322 #endif /* DIAGNOSTIC */ 323 return lfs_bwrite_ext(bp, 0); 324 } 325 326 /* 327 * Determine if there is enough room currently available to write fsb 328 * blocks. We need enough blocks for the new blocks, the current 329 * inode blocks (including potentially the ifile inode), a summary block, 330 * and the segment usage table, plus an ifile block. 331 */ 332 int 333 lfs_fits(struct lfs *fs, int fsb) 334 { 335 int64_t needed; 336 337 ASSERT_NO_SEGLOCK(fs); 338 needed = fsb + lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + 339 ((howmany(lfs_sb_getuinodes(fs) + 1, LFS_INOPB(fs)) + 340 lfs_sb_getsegtabsz(fs) + 341 1) << (lfs_sb_getbshift(fs) - lfs_sb_getffshift(fs))); 342 343 if (needed >= lfs_sb_getavail(fs)) { 344 #ifdef DEBUG 345 DLOG((DLOG_AVAIL, "lfs_fits: no fit: fsb = %ld, uinodes = %ld, " 346 "needed = %jd, avail = %jd\n", 347 (long)fsb, (long)lfs_sb_getuinodes(fs), (intmax_t)needed, 348 (intmax_t)lfs_sb_getavail(fs))); 349 #endif 350 return 0; 351 } 352 return 1; 353 } 354 355 int 356 lfs_availwait(struct lfs *fs, int fsb) 357 { 358 int error; 359 CLEANERINFO *cip; 360 struct buf *cbp; 361 362 ASSERT_NO_SEGLOCK(fs); 363 /* Push cleaner blocks through regardless */ 364 mutex_enter(&lfs_lock); 365 if (LFS_SEGLOCK_HELD(fs) && 366 fs->lfs_sp->seg_flags & (SEGM_CLEAN | SEGM_FORCE_CKP)) { 367 mutex_exit(&lfs_lock); 368 return 0; 369 } 370 mutex_exit(&lfs_lock); 371 372 while (!lfs_fits(fs, fsb)) { 373 /* 374 * Out of space, need cleaner to run. 375 * Update the cleaner info, then wake it up. 376 * Note the cleanerinfo block is on the ifile 377 * so it CANT_WAIT. 378 */ 379 LFS_CLEANERINFO(cip, fs, cbp); 380 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0); 381 382 #ifdef DEBUG 383 DLOG((DLOG_AVAIL, "lfs_availwait: out of available space, " 384 "waiting on cleaner\n")); 385 #endif 386 387 lfs_wakeup_cleaner(fs); 388 #ifdef DIAGNOSTIC 389 if (LFS_SEGLOCK_HELD(fs)) 390 panic("lfs_availwait: deadlock"); 391 #endif 392 error = tsleep(&fs->lfs_availsleep, PCATCH | PUSER, 393 "cleaner", 0); 394 if (error) 395 return (error); 396 } 397 return 0; 398 } 399 400 int 401 lfs_bwrite_ext(struct buf *bp, int flags) 402 { 403 struct lfs *fs; 404 struct inode *ip; 405 struct vnode *vp; 406 int fsb; 407 408 vp = bp->b_vp; 409 fs = VFSTOULFS(vp->v_mount)->um_lfs; 410 411 ASSERT_MAYBE_SEGLOCK(fs); 412 KASSERT(bp->b_cflags & BC_BUSY); 413 KASSERT(flags & BW_CLEAN || !LFS_IS_MALLOC_BUF(bp)); 414 KASSERT(((bp->b_oflags | bp->b_flags) & (BO_DELWRI|B_LOCKED)) 415 != BO_DELWRI); 416 417 /* 418 * Don't write *any* blocks if we're mounted read-only, or 419 * if we are "already unmounted". 420 * 421 * In particular the cleaner can't write blocks either. 422 */ 423 if (fs->lfs_ronly || (lfs_sb_getpflags(fs) & LFS_PF_CLEAN)) { 424 bp->b_oflags &= ~BO_DELWRI; 425 bp->b_flags |= B_READ; /* XXX is this right? --ks */ 426 bp->b_error = 0; 427 mutex_enter(&bufcache_lock); 428 LFS_UNLOCK_BUF(bp); 429 if (LFS_IS_MALLOC_BUF(bp)) 430 bp->b_cflags &= ~BC_BUSY; 431 else 432 brelsel(bp, 0); 433 mutex_exit(&bufcache_lock); 434 return (fs->lfs_ronly ? EROFS : 0); 435 } 436 437 /* 438 * Set the delayed write flag and use reassignbuf to move the buffer 439 * from the clean list to the dirty one. 440 * 441 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move 442 * the buffer onto the LOCKED free list. This is necessary, otherwise 443 * getnewbuf() would try to reclaim the buffers using bawrite, which 444 * isn't going to work. 445 * 446 * XXX we don't let meta-data writes run out of space because they can 447 * come from the segment writer. We need to make sure that there is 448 * enough space reserved so that there's room to write meta-data 449 * blocks. 450 */ 451 if ((bp->b_flags & B_LOCKED) == 0) { 452 fsb = lfs_numfrags(fs, bp->b_bcount); 453 454 ip = VTOI(vp); 455 mutex_enter(&lfs_lock); 456 if (flags & BW_CLEAN) { 457 LFS_SET_UINO(ip, IN_CLEANING); 458 } else { 459 LFS_SET_UINO(ip, IN_MODIFIED); 460 } 461 mutex_exit(&lfs_lock); 462 lfs_sb_subavail(fs, fsb); 463 464 mutex_enter(&bufcache_lock); 465 mutex_enter(vp->v_interlock); 466 bp->b_oflags = (bp->b_oflags | BO_DELWRI) & ~BO_DONE; 467 LFS_LOCK_BUF(bp); 468 bp->b_flags &= ~B_READ; 469 bp->b_error = 0; 470 reassignbuf(bp, bp->b_vp); 471 mutex_exit(vp->v_interlock); 472 } else { 473 mutex_enter(&bufcache_lock); 474 } 475 476 if (bp->b_iodone != NULL) 477 bp->b_cflags &= ~BC_BUSY; 478 else 479 brelsel(bp, 0); 480 mutex_exit(&bufcache_lock); 481 482 return (0); 483 } 484 485 /* 486 * Called and return with the lfs_lock held. 487 */ 488 void 489 lfs_flush_fs(struct lfs *fs, int flags) 490 { 491 ASSERT_NO_SEGLOCK(fs); 492 KASSERT(mutex_owned(&lfs_lock)); 493 if (fs->lfs_ronly) 494 return; 495 496 if (lfs_dostats) 497 ++lfs_stats.flush_invoked; 498 499 fs->lfs_pdflush = 0; 500 mutex_exit(&lfs_lock); 501 lfs_writer_enter(fs, "fldirop"); 502 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 503 lfs_writer_leave(fs); 504 mutex_enter(&lfs_lock); 505 fs->lfs_favail = 0; /* XXX */ 506 } 507 508 /* 509 * This routine initiates segment writes when LFS is consuming too many 510 * resources. Ideally the pageout daemon would be able to direct LFS 511 * more subtly. 512 * XXX We have one static count of locked buffers; 513 * XXX need to think more about the multiple filesystem case. 514 * 515 * Called and return with lfs_lock held. 516 * If fs != NULL, we hold the segment lock for fs. 517 */ 518 void 519 lfs_flush(struct lfs *fs, int flags, int only_onefs) 520 { 521 extern u_int64_t locked_fakequeue_count; 522 struct mount *mp, *nmp; 523 struct lfs *tfs; 524 525 KASSERT(mutex_owned(&lfs_lock)); 526 KDASSERT(fs == NULL || !LFS_SEGLOCK_HELD(fs)); 527 528 if (lfs_dostats) 529 ++lfs_stats.write_exceeded; 530 /* XXX should we include SEGM_CKP here? */ 531 if (lfs_writing && !(flags & SEGM_SYNC)) { 532 DLOG((DLOG_FLUSH, "lfs_flush: not flushing because another flush is active\n")); 533 return; 534 } 535 while (lfs_writing) 536 cv_wait(&lfs_writing_cv, &lfs_lock); 537 lfs_writing = 1; 538 539 mutex_exit(&lfs_lock); 540 541 if (only_onefs) { 542 KASSERT(fs != NULL); 543 if (vfs_busy(fs->lfs_ivnode->v_mount, NULL)) 544 goto errout; 545 mutex_enter(&lfs_lock); 546 lfs_flush_fs(fs, flags); 547 mutex_exit(&lfs_lock); 548 vfs_unbusy(fs->lfs_ivnode->v_mount, false, NULL); 549 } else { 550 locked_fakequeue_count = 0; 551 mutex_enter(&mountlist_lock); 552 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 553 if (vfs_busy(mp, &nmp)) { 554 DLOG((DLOG_FLUSH, "lfs_flush: fs vfs_busy\n")); 555 continue; 556 } 557 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, 558 sizeof(mp->mnt_stat.f_fstypename)) == 0) { 559 tfs = VFSTOULFS(mp)->um_lfs; 560 mutex_enter(&lfs_lock); 561 lfs_flush_fs(tfs, flags); 562 mutex_exit(&lfs_lock); 563 } 564 vfs_unbusy(mp, false, &nmp); 565 } 566 mutex_exit(&mountlist_lock); 567 } 568 LFS_DEBUG_COUNTLOCKED("flush"); 569 wakeup(&lfs_subsys_pages); 570 571 errout: 572 mutex_enter(&lfs_lock); 573 KASSERT(lfs_writing); 574 lfs_writing = 0; 575 wakeup(&lfs_writing); 576 } 577 578 #define INOCOUNT(fs) howmany(lfs_sb_getuinodes(fs), LFS_INOPB(fs)) 579 #define INOBYTES(fs) (lfs_sb_getuinodes(fs) * DINOSIZE(fs)) 580 581 /* 582 * make sure that we don't have too many locked buffers. 583 * flush buffers if needed. 584 */ 585 int 586 lfs_check(struct vnode *vp, daddr_t blkno, int flags) 587 { 588 int error; 589 struct lfs *fs; 590 struct inode *ip; 591 extern pid_t lfs_writer_daemon; 592 593 error = 0; 594 ip = VTOI(vp); 595 596 /* If out of buffers, wait on writer */ 597 /* XXX KS - if it's the Ifile, we're probably the cleaner! */ 598 if (ip->i_number == LFS_IFILE_INUM) 599 return 0; 600 /* If we're being called from inside a dirop, don't sleep */ 601 if (ip->i_flag & IN_ADIROP) 602 return 0; 603 604 fs = ip->i_lfs; 605 606 ASSERT_NO_SEGLOCK(fs); 607 608 /* 609 * If we would flush below, but dirops are active, sleep. 610 * Note that a dirop cannot ever reach this code! 611 */ 612 mutex_enter(&lfs_lock); 613 while (fs->lfs_dirops > 0 && 614 (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 615 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 616 lfs_subsys_pages > LFS_MAX_PAGES || 617 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 618 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0)) 619 { 620 ++fs->lfs_diropwait; 621 mtsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0, 622 &lfs_lock); 623 --fs->lfs_diropwait; 624 } 625 626 #ifdef DEBUG 627 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS) 628 DLOG((DLOG_FLUSH, "lfs_check: lqc = %d, max %d\n", 629 locked_queue_count + INOCOUNT(fs), LFS_MAX_BUFS)); 630 if (locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) 631 DLOG((DLOG_FLUSH, "lfs_check: lqb = %ld, max %ld\n", 632 locked_queue_bytes + INOBYTES(fs), LFS_MAX_BYTES)); 633 if (lfs_subsys_pages > LFS_MAX_PAGES) 634 DLOG((DLOG_FLUSH, "lfs_check: lssp = %d, max %d\n", 635 lfs_subsys_pages, LFS_MAX_PAGES)); 636 if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) 637 DLOG((DLOG_FLUSH, "lfs_check: fssp = %d, trip at %d\n", 638 fs->lfs_pages, lfs_fs_pagetrip)); 639 if (lfs_dirvcount > LFS_MAX_DIROP) 640 DLOG((DLOG_FLUSH, "lfs_check: ldvc = %d, max %d\n", 641 lfs_dirvcount, LFS_MAX_DIROP)); 642 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs)) 643 DLOG((DLOG_FLUSH, "lfs_check: lfdvc = %d, max %d\n", 644 fs->lfs_dirvcount, LFS_MAX_FSDIROP(fs))); 645 if (fs->lfs_diropwait > 0) 646 DLOG((DLOG_FLUSH, "lfs_check: ldvw = %d\n", 647 fs->lfs_diropwait)); 648 #endif 649 650 /* If there are too many pending dirops, we have to flush them. */ 651 if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 652 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 653 mutex_exit(&lfs_lock); 654 lfs_flush_dirops(fs); 655 mutex_enter(&lfs_lock); 656 } else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 657 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 658 lfs_subsys_pages > LFS_MAX_PAGES || 659 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 660 lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { 661 lfs_flush(fs, flags, 0); 662 } else if (lfs_fs_pagetrip && fs->lfs_pages > lfs_fs_pagetrip) { 663 /* 664 * If we didn't flush the whole thing, some filesystems 665 * still might want to be flushed. 666 */ 667 ++fs->lfs_pdflush; 668 wakeup(&lfs_writer_daemon); 669 } 670 671 while (locked_queue_count + INOCOUNT(fs) >= LFS_WAIT_BUFS || 672 locked_queue_bytes + INOBYTES(fs) >= LFS_WAIT_BYTES || 673 lfs_subsys_pages > LFS_WAIT_PAGES || 674 fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || 675 lfs_dirvcount > LFS_MAX_DIROP) { 676 677 if (lfs_dostats) 678 ++lfs_stats.wait_exceeded; 679 DLOG((DLOG_AVAIL, "lfs_check: waiting: count=%d, bytes=%ld\n", 680 locked_queue_count, locked_queue_bytes)); 681 ++locked_queue_waiters; 682 error = cv_timedwait_sig(&locked_queue_cv, &lfs_lock, 683 hz * LFS_BUFWAIT); 684 --locked_queue_waiters; 685 if (error != EWOULDBLOCK) 686 break; 687 688 /* 689 * lfs_flush might not flush all the buffers, if some of the 690 * inodes were locked or if most of them were Ifile blocks 691 * and we weren't asked to checkpoint. Try flushing again 692 * to keep us from blocking indefinitely. 693 */ 694 if (locked_queue_count + INOCOUNT(fs) >= LFS_MAX_BUFS || 695 locked_queue_bytes + INOBYTES(fs) >= LFS_MAX_BYTES) { 696 lfs_flush(fs, flags | SEGM_CKP, 0); 697 } 698 } 699 mutex_exit(&lfs_lock); 700 return (error); 701 } 702 703 /* 704 * Allocate a new buffer header. 705 */ 706 struct buf * 707 lfs_newbuf(struct lfs *fs, struct vnode *vp, daddr_t daddr, size_t size, int type) 708 { 709 struct buf *bp; 710 size_t nbytes; 711 712 ASSERT_MAYBE_SEGLOCK(fs); 713 nbytes = roundup(size, lfs_fsbtob(fs, 1)); 714 715 bp = getiobuf(NULL, true); 716 if (nbytes) { 717 bp->b_data = lfs_malloc(fs, nbytes, type); 718 /* memset(bp->b_data, 0, nbytes); */ 719 } 720 #ifdef DIAGNOSTIC 721 if (vp == NULL) 722 panic("vp is NULL in lfs_newbuf"); 723 if (bp == NULL) 724 panic("bp is NULL after malloc in lfs_newbuf"); 725 #endif 726 727 bp->b_bufsize = size; 728 bp->b_bcount = size; 729 bp->b_lblkno = daddr; 730 bp->b_blkno = daddr; 731 bp->b_error = 0; 732 bp->b_resid = 0; 733 bp->b_iodone = lfs_callback; 734 bp->b_cflags = BC_BUSY | BC_NOCACHE; 735 bp->b_private = fs; 736 737 mutex_enter(&bufcache_lock); 738 mutex_enter(vp->v_interlock); 739 bgetvp(vp, bp); 740 mutex_exit(vp->v_interlock); 741 mutex_exit(&bufcache_lock); 742 743 return (bp); 744 } 745 746 void 747 lfs_freebuf(struct lfs *fs, struct buf *bp) 748 { 749 struct vnode *vp; 750 751 if ((vp = bp->b_vp) != NULL) { 752 mutex_enter(&bufcache_lock); 753 mutex_enter(vp->v_interlock); 754 brelvp(bp); 755 mutex_exit(vp->v_interlock); 756 mutex_exit(&bufcache_lock); 757 } 758 if (!(bp->b_cflags & BC_INVAL)) { /* BC_INVAL indicates a "fake" buffer */ 759 lfs_free(fs, bp->b_data, LFS_NB_UNKNOWN); 760 bp->b_data = NULL; 761 } 762 putiobuf(bp); 763 } 764 765 /* 766 * Count buffers on the "locked" queue, and compare it to a pro-forma count. 767 * Don't count malloced buffers, since they don't detract from the total. 768 */ 769 void 770 lfs_countlocked(int *count, long *bytes, const char *msg) 771 { 772 struct buf *bp; 773 int n = 0; 774 long int size = 0L; 775 776 mutex_enter(&bufcache_lock); 777 TAILQ_FOREACH(bp, &bufqueues[BQ_LOCKED].bq_queue, b_freelist) { 778 KASSERT(bp->b_iodone == NULL); 779 n++; 780 size += bp->b_bufsize; 781 #ifdef DIAGNOSTIC 782 if (n > nbuf) 783 panic("lfs_countlocked: this can't happen: more" 784 " buffers locked than exist"); 785 #endif 786 } 787 /* 788 * Theoretically this function never really does anything. 789 * Give a warning if we have to fix the accounting. 790 */ 791 if (n != *count) { 792 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted buf count" 793 " from %d to %d\n", msg, *count, n)); 794 } 795 if (size != *bytes) { 796 DLOG((DLOG_LLIST, "lfs_countlocked: %s: adjusted byte count" 797 " from %ld to %ld\n", msg, *bytes, size)); 798 } 799 *count = n; 800 *bytes = size; 801 mutex_exit(&bufcache_lock); 802 return; 803 } 804 805 int 806 lfs_wait_pages(void) 807 { 808 int active, inactive; 809 810 uvm_estimatepageable(&active, &inactive); 811 return LFS_WAIT_RESOURCE(active + inactive + uvmexp.free, 1); 812 } 813 814 int 815 lfs_max_pages(void) 816 { 817 int active, inactive; 818 819 uvm_estimatepageable(&active, &inactive); 820 return LFS_MAX_RESOURCE(active + inactive + uvmexp.free, 1); 821 } 822