1 /* $NetBSD: lfs_bio.c,v 1.43 2002/05/14 20:03:53 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 /* 39 * Copyright (c) 1991, 1993 40 * The Regents of the University of California. All rights reserved. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. All advertising materials mentioning features or use of this software 51 * must display the following acknowledgement: 52 * This product includes software developed by the University of 53 * California, Berkeley and its contributors. 54 * 4. Neither the name of the University nor the names of its contributors 55 * may be used to endorse or promote products derived from this software 56 * without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 59 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 60 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 61 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 62 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 63 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 64 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 65 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 66 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 67 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 68 * SUCH DAMAGE. 69 * 70 * @(#)lfs_bio.c 8.10 (Berkeley) 6/10/95 71 */ 72 73 #include <sys/cdefs.h> 74 __KERNEL_RCSID(0, "$NetBSD: lfs_bio.c,v 1.43 2002/05/14 20:03:53 perseant Exp $"); 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/proc.h> 79 #include <sys/buf.h> 80 #include <sys/vnode.h> 81 #include <sys/resourcevar.h> 82 #include <sys/mount.h> 83 #include <sys/kernel.h> 84 85 #include <ufs/ufs/inode.h> 86 #include <ufs/ufs/ufsmount.h> 87 #include <ufs/ufs/ufs_extern.h> 88 89 #include <sys/malloc.h> 90 #include <ufs/lfs/lfs.h> 91 #include <ufs/lfs/lfs_extern.h> 92 93 /* Macros to clear/set/test flags. */ 94 # define SET(t, f) (t) |= (f) 95 # define CLR(t, f) (t) &= ~(f) 96 # define ISSET(t, f) ((t) & (f)) 97 98 /* 99 * LFS block write function. 100 * 101 * XXX 102 * No write cost accounting is done. 103 * This is almost certainly wrong for synchronous operations and NFS. 104 */ 105 int locked_queue_count = 0; /* XXX Count of locked-down buffers. */ 106 long locked_queue_bytes = 0L; /* XXX Total size of locked buffers. */ 107 int lfs_writing = 0; /* Set if already kicked off a writer 108 because of buffer space */ 109 extern int lfs_dostats; 110 111 /* 112 * Try to reserve some blocks, prior to performing a sensitive operation that 113 * requires the vnode lock to be honored. If there is not enough space, give 114 * up the vnode lock temporarily and wait for the space to become available. 115 * 116 * Called with vp locked. (Note nowever that if nb < 0, vp is ignored.) 117 */ 118 int 119 lfs_reserve(struct lfs *fs, struct vnode *vp, int nb) 120 { 121 CLEANERINFO *cip; 122 struct buf *bp; 123 int error, slept; 124 125 slept = 0; 126 while (nb > 0 && !lfs_fits(fs, nb + fs->lfs_ravail)) { 127 VOP_UNLOCK(vp, 0); 128 129 if (!slept) { 130 #ifdef DEBUG 131 printf("lfs_reserve: waiting for %ld (bfree = %d," 132 " est_bfree = %d)\n", 133 nb + fs->lfs_ravail, fs->lfs_bfree, 134 LFS_EST_BFREE(fs)); 135 #endif 136 } 137 ++slept; 138 139 /* Wake up the cleaner */ 140 LFS_CLEANERINFO(cip, fs, bp); 141 LFS_SYNC_CLEANERINFO(cip, fs, bp, 0); 142 wakeup(&lfs_allclean_wakeup); 143 wakeup(&fs->lfs_nextseg); 144 145 error = tsleep(&fs->lfs_avail, PCATCH | PUSER, "lfs_reserve", 146 0); 147 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX use lockstatus */ 148 if (error) 149 return error; 150 } 151 if (slept) 152 printf("lfs_reserve: woke up\n"); 153 fs->lfs_ravail += nb; 154 return 0; 155 } 156 157 /* 158 * 159 * XXX we don't let meta-data writes run out of space because they can 160 * come from the segment writer. We need to make sure that there is 161 * enough space reserved so that there's room to write meta-data 162 * blocks. 163 * 164 * Also, we don't let blocks that have come to us from the cleaner 165 * run out of space. 166 */ 167 #define CANT_WAIT(BP,F) (IS_IFILE((BP)) || (BP)->b_lblkno < 0 || ((F) & BW_CLEAN)) 168 169 int 170 lfs_bwrite(void *v) 171 { 172 struct vop_bwrite_args /* { 173 struct buf *a_bp; 174 } */ *ap = v; 175 struct buf *bp = ap->a_bp; 176 177 #ifdef DIAGNOSTIC 178 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly == 0 && (bp->b_flags & B_ASYNC)) { 179 panic("bawrite LFS buffer"); 180 } 181 #endif /* DIAGNOSTIC */ 182 return lfs_bwrite_ext(bp,0); 183 } 184 185 /* 186 * Determine if there is enough room currently available to write db 187 * disk blocks. We need enough blocks for the new blocks, the current 188 * inode blocks, a summary block, plus potentially the ifile inode and 189 * the segment usage table, plus an ifile page. 190 */ 191 int 192 lfs_fits(struct lfs *fs, int fsb) 193 { 194 int needed; 195 196 needed = fsb + btofsb(fs, fs->lfs_sumsize) + 197 fsbtodb(fs, howmany(fs->lfs_uinodes + 1, INOPB(fs)) + 198 fs->lfs_segtabsz + btofsb(fs, fs->lfs_sumsize)); 199 200 if (needed >= fs->lfs_avail) { 201 #ifdef DEBUG 202 printf("lfs_fits: no fit: fsb = %d, uinodes = %d, " 203 "needed = %d, avail = %d\n", 204 fsb, fs->lfs_uinodes, needed, fs->lfs_avail); 205 #endif 206 return 0; 207 } 208 return 1; 209 } 210 211 int 212 lfs_availwait(struct lfs *fs, int db) 213 { 214 int error; 215 CLEANERINFO *cip; 216 struct buf *cbp; 217 218 while (!lfs_fits(fs, db)) { 219 /* 220 * Out of space, need cleaner to run. 221 * Update the cleaner info, then wake it up. 222 * Note the cleanerinfo block is on the ifile 223 * so it CANT_WAIT. 224 */ 225 LFS_CLEANERINFO(cip, fs, cbp); 226 LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0); 227 228 printf("lfs_availwait: out of available space, " 229 "waiting on cleaner\n"); 230 231 wakeup(&lfs_allclean_wakeup); 232 wakeup(&fs->lfs_nextseg); 233 #ifdef DIAGNOSTIC 234 if (fs->lfs_seglock && fs->lfs_lockpid == curproc->p_pid) 235 panic("lfs_availwait: deadlock"); 236 #endif 237 error = tsleep(&fs->lfs_avail, PCATCH | PUSER, "cleaner", 0); 238 if (error) 239 return (error); 240 } 241 return 0; 242 } 243 244 int 245 lfs_bwrite_ext(struct buf *bp, int flags) 246 { 247 struct lfs *fs; 248 struct inode *ip; 249 int fsb, error, s; 250 251 /* 252 * Don't write *any* blocks if we're mounted read-only. 253 * In particular the cleaner can't write blocks either. 254 */ 255 if (VTOI(bp->b_vp)->i_lfs->lfs_ronly) { 256 bp->b_flags &= ~(B_DELWRI | B_READ | B_ERROR); 257 LFS_UNLOCK_BUF(bp); 258 if (bp->b_flags & B_CALL) 259 bp->b_flags &= ~B_BUSY; 260 else 261 brelse(bp); 262 return EROFS; 263 } 264 265 /* 266 * Set the delayed write flag and use reassignbuf to move the buffer 267 * from the clean list to the dirty one. 268 * 269 * Set the B_LOCKED flag and unlock the buffer, causing brelse to move 270 * the buffer onto the LOCKED free list. This is necessary, otherwise 271 * getnewbuf() would try to reclaim the buffers using bawrite, which 272 * isn't going to work. 273 * 274 * XXX we don't let meta-data writes run out of space because they can 275 * come from the segment writer. We need to make sure that there is 276 * enough space reserved so that there's room to write meta-data 277 * blocks. 278 */ 279 if (!(bp->b_flags & B_LOCKED)) { 280 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 281 fsb = fragstofsb(fs, numfrags(fs, bp->b_bcount)); 282 if (!CANT_WAIT(bp, flags)) { 283 if ((error = lfs_availwait(fs, fsb)) != 0) { 284 brelse(bp); 285 return error; 286 } 287 } 288 289 ip = VTOI(bp->b_vp); 290 if (bp->b_flags & B_CALL) { 291 LFS_SET_UINO(ip, IN_CLEANING); 292 } else { 293 LFS_SET_UINO(ip, IN_MODIFIED); 294 if (bp->b_lblkno >= 0) 295 LFS_SET_UINO(ip, IN_UPDATE); 296 } 297 fs->lfs_avail -= fsb; 298 bp->b_flags |= B_DELWRI; 299 300 LFS_LOCK_BUF(bp); 301 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR); 302 s = splbio(); 303 reassignbuf(bp, bp->b_vp); 304 splx(s); 305 } 306 307 if (bp->b_flags & B_CALL) 308 bp->b_flags &= ~B_BUSY; 309 else 310 brelse(bp); 311 312 return (0); 313 } 314 315 void 316 lfs_flush_fs(struct lfs *fs, int flags) 317 { 318 if (fs->lfs_ronly == 0 && fs->lfs_dirops == 0) 319 { 320 /* disallow dirops during flush */ 321 fs->lfs_writer++; 322 323 /* 324 * We set the queue to 0 here because we 325 * are about to write all the dirty 326 * buffers we have. If more come in 327 * while we're writing the segment, they 328 * may not get written, so we want the 329 * count to reflect these new writes 330 * after the segwrite completes. 331 */ 332 if (lfs_dostats) 333 ++lfs_stats.flush_invoked; 334 lfs_segwrite(fs->lfs_ivnode->v_mount, flags); 335 336 /* XXX KS - allow dirops again */ 337 if (--fs->lfs_writer == 0) 338 wakeup(&fs->lfs_dirops); 339 } 340 } 341 342 /* 343 * XXX 344 * This routine flushes buffers out of the B_LOCKED queue when LFS has too 345 * many locked down. Eventually the pageout daemon will simply call LFS 346 * when pages need to be reclaimed. Note, we have one static count of locked 347 * buffers, so we can't have more than a single file system. To make this 348 * work for multiple file systems, put the count into the mount structure. 349 */ 350 void 351 lfs_flush(struct lfs *fs, int flags) 352 { 353 struct mount *mp, *nmp; 354 355 if (lfs_dostats) 356 ++lfs_stats.write_exceeded; 357 if (lfs_writing && flags == 0) {/* XXX flags */ 358 #ifdef DEBUG_LFS 359 printf("lfs_flush: not flushing because another flush is active\n"); 360 #endif 361 return; 362 } 363 lfs_writing = 1; 364 365 simple_lock(&mountlist_slock); 366 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 367 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 368 nmp = mp->mnt_list.cqe_next; 369 continue; 370 } 371 if (strncmp(&mp->mnt_stat.f_fstypename[0], MOUNT_LFS, MFSNAMELEN) == 0) 372 lfs_flush_fs(((struct ufsmount *)mp->mnt_data)->ufsmount_u.lfs, flags); 373 simple_lock(&mountlist_slock); 374 nmp = mp->mnt_list.cqe_next; 375 vfs_unbusy(mp); 376 } 377 simple_unlock(&mountlist_slock); 378 379 LFS_DEBUG_COUNTLOCKED("flush"); 380 381 lfs_writing = 0; 382 } 383 384 #define INOCOUNT(fs) howmany((fs)->lfs_uinodes, INOPB(fs)) 385 #define INOBYTES(fs) ((fs)->lfs_uinodes * DINODE_SIZE) 386 387 int 388 lfs_check(struct vnode *vp, ufs_daddr_t blkno, int flags) 389 { 390 int error; 391 struct lfs *fs; 392 struct inode *ip; 393 extern int lfs_dirvcount; 394 395 error = 0; 396 ip = VTOI(vp); 397 398 /* If out of buffers, wait on writer */ 399 /* XXX KS - if it's the Ifile, we're probably the cleaner! */ 400 if (ip->i_number == LFS_IFILE_INUM) 401 return 0; 402 /* If we're being called from inside a dirop, don't sleep */ 403 if (ip->i_flag & IN_ADIROP) 404 return 0; 405 406 fs = ip->i_lfs; 407 408 /* 409 * If we would flush below, but dirops are active, sleep. 410 * Note that a dirop cannot ever reach this code! 411 */ 412 while (fs->lfs_dirops > 0 && 413 (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 414 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 415 lfs_dirvcount > LFS_MAXDIROP || fs->lfs_diropwait > 0)) 416 { 417 ++fs->lfs_diropwait; 418 tsleep(&fs->lfs_writer, PRIBIO+1, "bufdirop", 0); 419 --fs->lfs_diropwait; 420 } 421 422 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 423 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || 424 lfs_dirvcount > LFS_MAXDIROP || fs->lfs_diropwait > 0) 425 { 426 ++fs->lfs_writer; 427 lfs_flush(fs, flags); 428 if (--fs->lfs_writer == 0) 429 wakeup(&fs->lfs_dirops); 430 } 431 432 while (locked_queue_count + INOCOUNT(fs) > LFS_WAIT_BUFS 433 || locked_queue_bytes + INOBYTES(fs) > LFS_WAIT_BYTES) 434 { 435 if (lfs_dostats) 436 ++lfs_stats.wait_exceeded; 437 #ifdef DEBUG_LFS 438 printf("lfs_check: waiting: count=%d, bytes=%ld\n", 439 locked_queue_count, locked_queue_bytes); 440 #endif 441 error = tsleep(&locked_queue_count, PCATCH | PUSER, 442 "buffers", hz * LFS_BUFWAIT); 443 if (error != EWOULDBLOCK) 444 break; 445 /* 446 * lfs_flush might not flush all the buffers, if some of the 447 * inodes were locked or if most of them were Ifile blocks 448 * and we weren't asked to checkpoint. Try flushing again 449 * to keep us from blocking indefinitely. 450 */ 451 if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || 452 locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES) 453 { 454 ++fs->lfs_writer; 455 lfs_flush(fs, flags | SEGM_CKP); 456 if (--fs->lfs_writer == 0) 457 wakeup(&fs->lfs_dirops); 458 } 459 } 460 return (error); 461 } 462 463 /* 464 * Allocate a new buffer header. 465 */ 466 #ifdef MALLOCLOG 467 # define DOMALLOC(S, T, F) _malloc((S), (T), (F), file, line) 468 struct buf * 469 lfs_newbuf_malloclog(struct lfs *fs, struct vnode *vp, ufs_daddr_t daddr, size_t size, char *file, int line) 470 #else 471 # define DOMALLOC(S, T, F) malloc((S), (T), (F)) 472 struct buf * 473 lfs_newbuf(struct lfs *fs, struct vnode *vp, ufs_daddr_t daddr, size_t size) 474 #endif 475 { 476 struct buf *bp; 477 size_t nbytes; 478 int s; 479 480 nbytes = roundup(size, fsbtob(fs, 1)); 481 482 bp = DOMALLOC(sizeof(struct buf), M_SEGMENT, M_WAITOK); 483 bzero(bp, sizeof(struct buf)); 484 if (nbytes) { 485 bp->b_data = DOMALLOC(nbytes, M_SEGMENT, M_WAITOK); 486 bzero(bp->b_data, nbytes); 487 } 488 #ifdef DIAGNOSTIC 489 if (vp == NULL) 490 panic("vp is NULL in lfs_newbuf"); 491 if (bp == NULL) 492 panic("bp is NULL after malloc in lfs_newbuf"); 493 #endif 494 s = splbio(); 495 bgetvp(vp, bp); 496 splx(s); 497 498 bp->b_saveaddr = (caddr_t)fs; 499 bp->b_bufsize = size; 500 bp->b_bcount = size; 501 bp->b_lblkno = daddr; 502 bp->b_blkno = daddr; 503 bp->b_error = 0; 504 bp->b_resid = 0; 505 bp->b_iodone = lfs_callback; 506 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 507 508 return (bp); 509 } 510 511 #ifdef MALLOCLOG 512 # define DOFREE(A, T) _free((A), (T), file, line) 513 void 514 lfs_freebuf_malloclog(struct buf *bp, char *file, int line) 515 #else 516 # define DOFREE(A, T) free((A), (T)) 517 void 518 lfs_freebuf(struct buf *bp) 519 #endif 520 { 521 int s; 522 523 s = splbio(); 524 if (bp->b_vp) 525 brelvp(bp); 526 splx(s); 527 if (!(bp->b_flags & B_INVAL)) { /* B_INVAL indicates a "fake" buffer */ 528 DOFREE(bp->b_data, M_SEGMENT); 529 bp->b_data = NULL; 530 } 531 DOFREE(bp, M_SEGMENT); 532 } 533 534 /* 535 * Definitions for the buffer free lists. 536 */ 537 #define BQUEUES 4 /* number of free buffer queues */ 538 539 #define BQ_LOCKED 0 /* super-blocks &c */ 540 #define BQ_LRU 1 /* lru, useful buffers */ 541 #define BQ_AGE 2 /* rubbish */ 542 #define BQ_EMPTY 3 /* buffer headers with no memory */ 543 544 extern TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; 545 546 /* 547 * Return a count of buffers on the "locked" queue. 548 * Don't count malloced buffers, since they don't detract from the total. 549 */ 550 void 551 lfs_countlocked(int *count, long *bytes, char *msg) 552 { 553 struct buf *bp; 554 int n = 0; 555 long int size = 0L; 556 557 for (bp = bufqueues[BQ_LOCKED].tqh_first; bp; 558 bp = bp->b_freelist.tqe_next) { 559 if (bp->b_flags & B_CALL) /* Malloced buffer */ 560 continue; 561 n++; 562 size += bp->b_bufsize; 563 #ifdef DEBUG_LOCKED_LIST 564 if (n > nbuf) 565 panic("lfs_countlocked: this can't happen: more" 566 " buffers locked than exist"); 567 #endif 568 } 569 #ifdef DEBUG_LOCKED_LIST 570 /* Theoretically this function never really does anything */ 571 if (n != *count) 572 printf("lfs_countlocked: %s: adjusted buf count from %d to %d\n", 573 msg, *count, n); 574 if (size != *bytes) 575 printf("lfs_countlocked: %s: adjusted byte count from %ld to %ld\n", 576 msg, *bytes, size); 577 #endif 578 *count = n; 579 *bytes = size; 580 return; 581 } 582