1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.35 (Berkeley) 09/03/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 32 #include <ufs/lfs/lfs.h> 33 #include <ufs/lfs/lfs_extern.h> 34 35 #define MAX_ACTIVE 10 36 /* 37 * Determine if it's OK to start a partial in this segment, or if we need 38 * to go on to a new segment. 39 */ 40 #define LFS_PARTIAL_FITS(fs) \ 41 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 42 1 << (fs)->lfs_fsbtodb) 43 44 void lfs_callback __P((struct buf *)); 45 void lfs_gather __P((struct lfs *, struct segment *, 46 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 47 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 48 void lfs_initseg __P((struct lfs *, struct segment *)); 49 void lfs_iset __P((struct inode *, daddr_t, time_t)); 50 int lfs_match_data __P((struct lfs *, struct buf *)); 51 int lfs_match_dindir __P((struct lfs *, struct buf *)); 52 int lfs_match_indir __P((struct lfs *, struct buf *)); 53 int lfs_match_tindir __P((struct lfs *, struct buf *)); 54 void lfs_newseg __P((struct lfs *)); 55 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 56 void lfs_supercallback __P((struct buf *)); 57 void lfs_updatemeta __P((struct segment *)); 58 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 59 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 60 int lfs_writeseg __P((struct lfs *, struct segment *)); 61 void lfs_writesuper __P((struct lfs *, struct segment *)); 62 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 63 struct segment *sp, int dirops)); 64 65 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 66 67 /* 68 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 69 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 70 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 71 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 72 */ 73 74 int 75 lfs_vflush(vp) 76 struct vnode *vp; 77 { 78 struct inode *ip; 79 struct lfs *fs; 80 struct segment *sp; 81 int error, s; 82 83 fs = VFSTOUFS(vp->v_mount)->um_lfs; 84 if (fs->lfs_nactive > MAX_ACTIVE) 85 return(lfs_segwrite(vp->v_mount, 1)); 86 87 lfs_seglock(fs); 88 89 /* 90 * Allocate a segment structure and enough space to hold pointers to 91 * the maximum possible number of buffers which can be described in a 92 * single summary block. 93 */ 94 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 95 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 96 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 97 sp->seg_flags = SEGM_CKP; 98 sp->vp = NULL; 99 100 /* 101 * Keep a cumulative count of the outstanding I/O operations. If the 102 * disk drive catches up with us it could go to zero before we finish, 103 * so we artificially increment it by one until we've scheduled all of 104 * the writes we intend to do. 105 */ 106 s = splbio(); 107 ++fs->lfs_iocount; 108 splx(s); 109 110 ip = VTOI(vp); 111 do { 112 lfs_initseg(fs, sp); 113 do { 114 if (vp->v_dirtyblkhd != NULL) 115 lfs_writefile(fs, sp, vp); 116 } while (lfs_writeinode(fs, sp, ip)); 117 118 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 119 120 /* 121 * If the I/O count is non-zero, sleep until it reaches zero. At the 122 * moment, the user's process hangs around so we can sleep. 123 */ 124 s = splbio(); 125 if (--fs->lfs_iocount && (error = 126 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 127 free(sp->bpp, M_SEGMENT); 128 free(sp, M_SEGMENT); 129 return (error); 130 } 131 splx(s); 132 lfs_segunlock(fs); 133 134 /* 135 * XXX 136 * Should be writing a checkpoint? 137 */ 138 free(sp->bpp, M_SEGMENT); 139 free(sp, M_SEGMENT); 140 141 return (0); 142 } 143 144 void 145 lfs_writevnodes(fs, mp, sp, dirops) 146 struct lfs *fs; 147 struct mount *mp; 148 struct segment *sp; 149 int dirops; 150 { 151 struct inode *ip; 152 struct vnode *vp; 153 int error, s; 154 155 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 156 /* 157 * If the vnode that we are about to sync is no longer 158 * associated with this mount point, start over. 159 */ 160 if (vp->v_mount != mp) 161 goto loop; 162 163 if (dirops && !(vp->v_flag & VDIROP) || 164 !dirops && (vp->v_flag & VDIROP)) 165 continue; 166 /* 167 * XXX 168 * Up the ref count so we don't get tossed out of 169 * memory. 170 */ 171 VREF(vp); 172 173 /* 174 * Write the inode/file if dirty and it's not the 175 * the IFILE. 176 */ 177 ip = VTOI(vp); 178 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 179 vp->v_dirtyblkhd != NULL) && 180 ip->i_number != LFS_IFILE_INUM) { 181 if (vp->v_dirtyblkhd != NULL) 182 lfs_writefile(fs, sp, vp); 183 (void) lfs_writeinode(fs, sp, ip); 184 } 185 vp->v_flag &= ~VDIROP; 186 vrele(vp); 187 } 188 } 189 190 int 191 lfs_segwrite(mp, do_ckp) 192 struct mount *mp; 193 int do_ckp; /* Do a checkpoint. */ 194 { 195 struct buf *bp; 196 struct inode *ip; 197 struct lfs *fs; 198 struct segment *sp; 199 struct vnode *vp; 200 SEGUSE *segusep; 201 daddr_t ibno; 202 CLEANERINFO *cip; 203 int clean, error, i, s; 204 205 fs = VFSTOUFS(mp)->um_lfs; 206 207 /* 208 * If we have fewer than 2 clean segments, wait until cleaner 209 * writes. 210 */ 211 do { 212 LFS_CLEANERINFO(cip, fs, bp); 213 clean = cip->clean; 214 brelse(bp); 215 if (clean <= 2) { 216 printf ("segs clean: %d\n", clean); 217 wakeup(&lfs_allclean_wakeup); 218 if (error = tsleep(&fs->lfs_avail, PRIBIO + 1, 219 "lfs writer", 0)) 220 return (error); 221 } 222 } while (clean <= 2 ); 223 lfs_seglock(fs); 224 225 /* 226 * Allocate a segment structure and enough space to hold pointers to 227 * the maximum possible number of buffers which can be described in a 228 * single summary block. 229 */ 230 do_ckp = do_ckp || fs->lfs_nactive > MAX_ACTIVE; 231 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 232 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 233 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 234 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 235 sp->vp = NULL; 236 lfs_initseg(fs, sp); 237 238 /* 239 * Keep a cumulative count of the outstanding I/O operations. If the 240 * disk drive catches up with us it could go to zero before we finish, 241 * so we artificially increment it by one until we've scheduled all of 242 * the writes we intend to do. If not a checkpoint, we never do the 243 * final decrement, avoiding the wakeup in the callback routine. 244 */ 245 s = splbio(); 246 ++fs->lfs_iocount; 247 splx(s); 248 249 lfs_writevnodes(fs, mp, sp, 0); 250 fs->lfs_writer = 1; 251 if (fs->lfs_dirops && (error = 252 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 253 free(sp->bpp, M_SEGMENT); 254 free(sp, M_SEGMENT); 255 fs->lfs_writer = 0; 256 return (error); 257 } 258 259 lfs_writevnodes(fs, mp, sp, 1); 260 261 /* 262 * If we are doing a checkpoint, mark everything since the 263 * last checkpoint as no longer ACTIVE. 264 */ 265 if (do_ckp) 266 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 267 --ibno >= fs->lfs_cleansz; ) { 268 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 269 NOCRED, &bp)) 270 271 panic("lfs: ifile read"); 272 segusep = (SEGUSE *)bp->b_un.b_addr; 273 for (i = fs->lfs_sepb; i--; segusep++) 274 segusep->su_flags &= ~SEGUSE_ACTIVE; 275 276 error = VOP_BWRITE(bp); 277 } 278 279 if (do_ckp || fs->lfs_doifile) { 280 redo: 281 vp = fs->lfs_ivnode; 282 while (vget(vp)); 283 ip = VTOI(vp); 284 if (vp->v_dirtyblkhd != NULL) 285 lfs_writefile(fs, sp, vp); 286 (void)lfs_writeinode(fs, sp, ip); 287 vput(vp); 288 if (lfs_writeseg(fs, sp) && do_ckp) { 289 lfs_initseg(fs, sp); 290 goto redo; 291 } 292 } else 293 (void) lfs_writeseg(fs, sp); 294 295 /* 296 * If the I/O count is non-zero, sleep until it reaches zero. At the 297 * moment, the user's process hangs around so we can sleep. 298 */ 299 fs->lfs_writer = 0; 300 fs->lfs_doifile = 0; 301 wakeup(&fs->lfs_dirops); 302 303 s = splbio(); 304 --fs->lfs_iocount; 305 if (do_ckp) { 306 if (fs->lfs_iocount && (error = 307 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 308 free(sp->bpp, M_SEGMENT); 309 free(sp, M_SEGMENT); 310 return (error); 311 } 312 splx(s); 313 fs->lfs_nactive = 0; 314 lfs_writesuper(fs, sp); 315 } else 316 splx(s); 317 318 lfs_segunlock(fs); 319 320 free(sp->bpp, M_SEGMENT); 321 free(sp, M_SEGMENT); 322 323 return (0); 324 } 325 326 /* 327 * Write the dirty blocks associated with a vnode. 328 */ 329 void 330 lfs_writefile(fs, sp, vp) 331 struct lfs *fs; 332 struct segment *sp; 333 struct vnode *vp; 334 { 335 struct buf *bp; 336 struct finfo *fip; 337 IFILE *ifp; 338 339 if (sp->seg_bytes_left < fs->lfs_bsize || 340 sp->sum_bytes_left < sizeof(struct finfo)) { 341 (void) lfs_writeseg(fs, sp); 342 lfs_initseg(fs, sp); 343 } 344 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 345 346 fip = sp->fip; 347 fip->fi_nblocks = 0; 348 fip->fi_ino = VTOI(vp)->i_number; 349 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 350 fip->fi_version = ifp->if_version; 351 brelse(bp); 352 353 /* 354 * It may not be necessary to write the meta-data blocks at this point, 355 * as the roll-forward recovery code should be able to reconstruct the 356 * list. 357 */ 358 lfs_gather(fs, sp, vp, lfs_match_data); 359 lfs_gather(fs, sp, vp, lfs_match_indir); 360 lfs_gather(fs, sp, vp, lfs_match_dindir); 361 #ifdef TRIPLE 362 lfs_gather(fs, sp, vp, lfs_match_tindir); 363 #endif 364 365 fip = sp->fip; 366 #ifdef META 367 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 368 #endif 369 if (fip->fi_nblocks != 0) { 370 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 371 sp->fip = 372 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 373 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 374 sp->start_lbp = &sp->fip->fi_blocks[0]; 375 } else 376 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 377 } 378 379 int 380 lfs_writeinode(fs, sp, ip) 381 struct lfs *fs; 382 struct segment *sp; 383 struct inode *ip; 384 { 385 struct buf *bp, *ibp; 386 IFILE *ifp; 387 SEGUSE *sup; 388 daddr_t daddr; 389 ino_t ino; 390 int error, ndx; 391 int redo_ifile = 0; 392 393 if (!(ip->i_flag & (IMOD | IACC | IUPD | ICHG))) 394 return(0); 395 396 /* Allocate a new inode block if necessary. */ 397 if (sp->ibp == NULL) { 398 /* Allocate a new segment if necessary. */ 399 if (sp->seg_bytes_left < fs->lfs_bsize || 400 sp->sum_bytes_left < sizeof(daddr_t)) { 401 (void) lfs_writeseg(fs, sp); 402 lfs_initseg(fs, sp); 403 } 404 405 /* Get next inode block. */ 406 daddr = fs->lfs_offset; 407 fs->lfs_offset += fsbtodb(fs, 1); 408 sp->ibp = *sp->cbpp++ = 409 lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, 410 fs->lfs_bsize); 411 ++sp->start_bpp; 412 fs->lfs_avail -= fsbtodb(fs, 1); 413 /* Set remaining space counters. */ 414 sp->seg_bytes_left -= fs->lfs_bsize; 415 sp->sum_bytes_left -= sizeof(daddr_t); 416 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 417 sp->ninodes / INOPB(fs) - 1; 418 ((daddr_t *)(sp->segsum))[ndx] = daddr; 419 } 420 421 /* Update the inode times and copy the inode onto the inode page. */ 422 if (ip->i_flag & IMOD) 423 --fs->lfs_uinodes; 424 ITIMES(ip, &time, &time); 425 ip->i_flag &= ~(IMOD | IACC | IUPD | ICHG); 426 bp = sp->ibp; 427 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 428 /* Increment inode count in segment summary block. */ 429 ++((SEGSUM *)(sp->segsum))->ss_ninos; 430 431 /* If this page is full, set flag to allocate a new page. */ 432 if (++sp->ninodes % INOPB(fs) == 0) 433 sp->ibp = NULL; 434 435 /* 436 * If updating the ifile, update the super-block. Update the disk 437 * address and access times for this inode in the ifile. 438 */ 439 ino = ip->i_number; 440 if (ino == LFS_IFILE_INUM) { 441 daddr = fs->lfs_idaddr; 442 fs->lfs_idaddr = bp->b_blkno; 443 } else { 444 LFS_IENTRY(ifp, fs, ino, ibp); 445 daddr = ifp->if_daddr; 446 ifp->if_daddr = bp->b_blkno; 447 error = VOP_BWRITE(ibp); 448 } 449 450 /* 451 * No need to update segment usage if there was no former inode address 452 * or if the last inode address is in the current partial segment. 453 */ 454 if (daddr != LFS_UNUSED_DADDR && 455 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 456 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 457 #ifdef DIAGNOSTIC 458 if (sup->su_nbytes < sizeof(struct dinode)) { 459 /* XXX -- Change to a panic. */ 460 printf("lfs: negative bytes (segment %d)\n", 461 datosn(fs, daddr)); 462 panic("negative bytes"); 463 } 464 #endif 465 sup->su_nbytes -= sizeof(struct dinode); 466 redo_ifile = 467 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 468 error = VOP_BWRITE(bp); 469 } 470 return (redo_ifile); 471 } 472 473 int 474 lfs_gatherblock(sp, bp, sptr) 475 struct segment *sp; 476 struct buf *bp; 477 int *sptr; 478 { 479 struct lfs *fs; 480 int version; 481 482 /* 483 * If full, finish this segment. We may be doing I/O, so 484 * release and reacquire the splbio(). 485 */ 486 #ifdef DIAGNOSTIC 487 if (sp->vp == NULL) 488 panic ("lfs_gatherblock: Null vp in segment"); 489 #endif 490 fs = sp->fs; 491 if (sp->sum_bytes_left < sizeof(daddr_t) || 492 sp->seg_bytes_left < fs->lfs_bsize) { 493 if (sptr) 494 splx(*sptr); 495 lfs_updatemeta(sp); 496 497 /* Add the current file to the segment summary. */ 498 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 499 500 version = sp->fip->fi_version; 501 (void) lfs_writeseg(fs, sp); 502 lfs_initseg(fs, sp); 503 504 sp->fip->fi_version = version; 505 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 506 507 sp->sum_bytes_left -= 508 sizeof(struct finfo) - sizeof(daddr_t); 509 510 if (sptr) 511 *sptr = splbio(); 512 return(1); 513 } 514 515 /* Insert into the buffer list, update the FINFO block. */ 516 bp->b_flags |= B_GATHERED; 517 *sp->cbpp++ = bp; 518 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 519 520 sp->sum_bytes_left -= sizeof(daddr_t); 521 sp->seg_bytes_left -= bp->b_bufsize; 522 return(0); 523 } 524 525 void 526 lfs_gather(fs, sp, vp, match) 527 struct lfs *fs; 528 struct segment *sp; 529 struct vnode *vp; 530 int (*match) __P((struct lfs *, struct buf *)); 531 { 532 struct buf *bp; 533 int s; 534 535 sp->vp = vp; 536 s = splbio(); 537 loop: for (bp = vp->v_dirtyblkhd; bp; bp = bp->b_blockf) { 538 if (bp->b_flags & B_BUSY || !match(fs, bp) || 539 bp->b_flags & B_GATHERED) 540 continue; 541 #ifdef DIAGNOSTIC 542 if (!(bp->b_flags & B_DELWRI)) 543 panic("lfs_gather: bp not B_DELWRI"); 544 if (!(bp->b_flags & B_LOCKED)) 545 panic("lfs_gather: bp not B_LOCKED"); 546 #endif 547 if (lfs_gatherblock(sp, bp, &s)) 548 goto loop; 549 } 550 splx(s); 551 lfs_updatemeta(sp); 552 sp->vp = NULL; 553 } 554 555 556 /* 557 * Update the metadata that points to the blocks listed in the FINFO 558 * array. 559 */ 560 void 561 lfs_updatemeta(sp) 562 struct segment *sp; 563 { 564 SEGUSE *sup; 565 struct buf *bp; 566 struct lfs *fs; 567 struct vnode *vp; 568 INDIR a[NIADDR], *ap; 569 struct inode *ip; 570 daddr_t daddr, lbn, off; 571 int db_per_fsb, error, i, nblocks, num; 572 573 vp = sp->vp; 574 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 575 if (vp == NULL || nblocks == 0) 576 return; 577 578 /* Sort the blocks. */ 579 if (!(sp->seg_flags & SEGM_CLEAN)) 580 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 581 582 /* 583 * Assign disk addresses, and update references to the logical 584 * block and the segment usage information. 585 */ 586 fs = sp->fs; 587 db_per_fsb = fsbtodb(fs, 1); 588 for (i = nblocks; i--; ++sp->start_bpp) { 589 lbn = *sp->start_lbp++; 590 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 591 fs->lfs_offset += db_per_fsb; 592 593 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 594 panic("lfs_updatemeta: lfs_bmaparray %d", error); 595 ip = VTOI(vp); 596 switch (num) { 597 case 0: 598 ip->i_db[lbn] = off; 599 break; 600 case 1: 601 ip->i_ib[a[0].in_off] = off; 602 break; 603 default: 604 ap = &a[num - 1]; 605 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 606 panic("lfs_updatemeta: bread bno %d", 607 ap->in_lbn); 608 /* 609 * Bread may create a new indirect block which needs 610 * to get counted for the inode. 611 */ 612 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 613 printf ("Updatemeta allocating indirect block: shouldn't happen\n"); 614 ip->i_blocks += btodb(fs->lfs_bsize); 615 fs->lfs_bfree -= btodb(fs->lfs_bsize); 616 } 617 bp->b_un.b_daddr[ap->in_off] = off; 618 VOP_BWRITE(bp); 619 } 620 621 /* Update segment usage information. */ 622 if (daddr != UNASSIGNED) { 623 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 624 #ifdef DIAGNOSTIC 625 if (sup->su_nbytes < fs->lfs_bsize) { 626 /* XXX -- Change to a panic. */ 627 printf("lfs: negative bytes (segment %d)\n", 628 datosn(fs, daddr)); 629 panic ("Negative Bytes"); 630 } 631 #endif 632 sup->su_nbytes -= fs->lfs_bsize; 633 error = VOP_BWRITE(bp); 634 } 635 } 636 } 637 638 /* 639 * Start a new segment. 640 */ 641 void 642 lfs_initseg(fs, sp) 643 struct lfs *fs; 644 struct segment *sp; 645 { 646 SEGUSE *sup; 647 SEGSUM *ssp; 648 struct buf *bp; 649 daddr_t lbn, *lbnp; 650 651 /* Advance to the next segment. */ 652 if (!LFS_PARTIAL_FITS(fs)) { 653 /* Wake up any cleaning procs waiting on this file system. */ 654 wakeup(&fs->lfs_nextseg); 655 wakeup(&lfs_allclean_wakeup); 656 657 lfs_newseg(fs); 658 fs->lfs_offset = fs->lfs_curseg; 659 sp->seg_number = datosn(fs, fs->lfs_curseg); 660 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 661 662 /* 663 * If the segment contains a superblock, update the offset 664 * and summary address to skip over it. 665 */ 666 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 667 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 668 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 669 sp->seg_bytes_left -= LFS_SBPAD; 670 } 671 brelse(bp); 672 } else { 673 sp->seg_number = datosn(fs, fs->lfs_curseg); 674 sp->seg_bytes_left = (fs->lfs_dbpseg - 675 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 676 } 677 fs->lfs_lastpseg = fs->lfs_offset; 678 679 sp->fs = fs; 680 sp->ibp = NULL; 681 sp->ninodes = 0; 682 683 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 684 sp->cbpp = sp->bpp; 685 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset, 686 LFS_SUMMARY_SIZE); 687 sp->segsum = (*sp->cbpp)->b_un.b_addr; 688 sp->start_bpp = ++sp->cbpp; 689 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 690 691 /* Set point to SEGSUM, initialize it. */ 692 ssp = sp->segsum; 693 ssp->ss_next = fs->lfs_nextseg; 694 ssp->ss_nfinfo = ssp->ss_ninos = 0; 695 696 /* Set pointer to first FINFO, initialize it. */ 697 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 698 sp->fip->fi_nblocks = 0; 699 sp->start_lbp = &sp->fip->fi_blocks[0]; 700 701 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 702 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 703 } 704 705 /* 706 * Return the next segment to write. 707 */ 708 void 709 lfs_newseg(fs) 710 struct lfs *fs; 711 { 712 CLEANERINFO *cip; 713 SEGUSE *sup; 714 struct buf *bp; 715 int curseg, error, isdirty, sn; 716 717 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 718 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 719 sup->su_nbytes = 0; 720 sup->su_nsums = 0; 721 sup->su_ninos = 0; 722 (void) VOP_BWRITE(bp); 723 724 LFS_CLEANERINFO(cip, fs, bp); 725 --cip->clean; 726 ++cip->dirty; 727 (void) VOP_BWRITE(bp); 728 729 fs->lfs_lastseg = fs->lfs_curseg; 730 fs->lfs_curseg = fs->lfs_nextseg; 731 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 732 sn = (sn + 1) % fs->lfs_nseg; 733 if (sn == curseg) 734 panic("lfs_nextseg: no clean segments"); 735 LFS_SEGENTRY(sup, fs, sn, bp); 736 isdirty = sup->su_flags & SEGUSE_DIRTY; 737 brelse(bp); 738 if (!isdirty) 739 break; 740 } 741 742 ++fs->lfs_nactive; 743 fs->lfs_nextseg = sntoda(fs, sn); 744 } 745 746 int 747 lfs_writeseg(fs, sp) 748 struct lfs *fs; 749 struct segment *sp; 750 { 751 extern int locked_queue_count; 752 struct buf **bpp, *bp, *cbp; 753 SEGUSE *sup; 754 SEGSUM *ssp; 755 dev_t i_dev; 756 size_t size; 757 u_long *datap, *dp; 758 int ch_per_blk, do_again, error, i, nblocks, num, s; 759 int (*strategy)__P((struct vop_strategy_args *)); 760 struct vop_strategy_args vop_strategy_a; 761 u_short ninos; 762 char *p; 763 764 /* 765 * If there are no buffers other than the segment summary to write 766 * and it is not a checkpoint, don't do anything. On a checkpoint, 767 * even if there aren't any buffers, you need to write the superblock. 768 */ 769 if ((nblocks = sp->cbpp - sp->bpp) == 1 && !(sp->seg_flags & SEGM_CKP)) 770 return (0); 771 772 ssp = (SEGSUM *)sp->segsum; 773 774 /* Update the segment usage information. */ 775 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 776 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 777 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift; 778 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 779 sup->su_nbytes += LFS_SUMMARY_SIZE; 780 sup->su_lastmod = time.tv_sec; 781 sup->su_ninos += ninos; 782 ++sup->su_nsums; 783 do_again = !(bp->b_flags & B_GATHERED); 784 (void)VOP_BWRITE(bp); 785 /* 786 * Compute checksum across data and then across summary; the first 787 * block (the summary block) is skipped. Set the create time here 788 * so that it's guaranteed to be later than the inode mod times. 789 * 790 * XXX 791 * Fix this to do it inline, instead of malloc/copy. 792 */ 793 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 794 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 795 if ((*++bpp)->b_flags & B_INVAL) { 796 if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long))) 797 panic("lfs_writeseg: copyin failed"); 798 } else 799 *dp++ = (*bpp)->b_un.b_words[0]; 800 } 801 ssp->ss_create = time.tv_sec; 802 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 803 ssp->ss_sumsum = 804 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 805 free(datap, M_SEGMENT); 806 #ifdef DIAGNOSTIC 807 if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE) 808 panic("lfs_writeseg: No diskspace for summary"); 809 #endif 810 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 811 812 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 813 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 814 815 /* 816 * When we simply write the blocks we lose a rotation for every block 817 * written. To avoid this problem, we allocate memory in chunks, copy 818 * the buffers into the chunk and write the chunk. 56K was chosen as 819 * some driver/controllers can't handle unsigned 16 bit transfers. 820 * When the data is copied to the chunk, turn off the the B_LOCKED bit 821 * and brelse the buffer (which will move them to the LRU list). Add 822 * the B_CALL flag to the buffer header so we can count I/O's for the 823 * checkpoints and so we can release the allocated memory. 824 * 825 * XXX 826 * This should be removed if the new virtual memory system allows us to 827 * easily make the buffers contiguous in kernel memory and if that's 828 * fast enough. 829 */ 830 #define LFS_CHUNKSIZE (56 * 1024) 831 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 832 for (bpp = sp->bpp, i = nblocks; i;) { 833 num = ch_per_blk; 834 if (num > i) 835 num = i; 836 i -= num; 837 size = num * fs->lfs_bsize; 838 839 cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 840 (*bpp)->b_blkno, size); 841 cbp->b_dev = i_dev; 842 cbp->b_flags |= B_ASYNC | B_BUSY; 843 844 s = splbio(); 845 ++fs->lfs_iocount; 846 for (p = cbp->b_un.b_addr; num--;) { 847 bp = *bpp++; 848 /* 849 * Fake buffers from the cleaner are marked as B_INVAL. 850 * We need to copy the data from user space rather than 851 * from the buffer indicated. 852 * XXX == what do I do on an error? 853 */ 854 if (bp->b_flags & B_INVAL) { 855 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 856 panic("lfs_writeseg: copyin failed"); 857 } else 858 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 859 p += bp->b_bcount; 860 if (bp->b_flags & B_LOCKED) 861 --locked_queue_count; 862 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 863 B_LOCKED | B_GATHERED); 864 if (bp->b_flags & B_CALL) { 865 /* if B_CALL, it was created with newbuf */ 866 brelvp(bp); 867 free(bp, M_SEGMENT); 868 } else { 869 bremfree(bp); 870 reassignbuf(bp, bp->b_vp); 871 brelse(bp); 872 } 873 } 874 ++cbp->b_vp->v_numoutput; 875 splx(s); 876 cbp->b_bcount = p - cbp->b_un.b_addr; 877 /* 878 * XXXX This is a gross and disgusting hack. Since these 879 * buffers are physically addressed, they hang off the 880 * device vnode (devvp). As a result, they have no way 881 * of getting to the LFS superblock or lfs structure to 882 * keep track of the number of I/O's pending. So, I am 883 * going to stuff the fs into the saveaddr field of 884 * the buffer (yuk). 885 */ 886 cbp->b_saveaddr = (caddr_t)fs; 887 vop_strategy_a.a_desc = VDESC(vop_strategy); 888 vop_strategy_a.a_bp = cbp; 889 (strategy)(&vop_strategy_a); 890 } 891 return (do_again); 892 } 893 894 void 895 lfs_writesuper(fs, sp) 896 struct lfs *fs; 897 struct segment *sp; 898 { 899 struct buf *bp; 900 dev_t i_dev; 901 int (*strategy) __P((struct vop_strategy_args *)); 902 int s; 903 struct vop_strategy_args vop_strategy_a; 904 905 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 906 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 907 908 /* Checksum the superblock and copy it into a buffer. */ 909 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 910 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0], 911 LFS_SBPAD); 912 *bp->b_un.b_lfs = *fs; 913 914 /* Write the first superblock (wait). */ 915 bp->b_dev = i_dev; 916 bp->b_flags |= B_BUSY; 917 bp->b_flags &= ~(B_DONE | B_CALL | B_ERROR | B_READ | B_DELWRI); 918 vop_strategy_a.a_desc = VDESC(vop_strategy); 919 vop_strategy_a.a_bp = bp; 920 s = splbio(); 921 bp->b_vp->v_numoutput += 2; 922 splx(s); 923 (strategy)(&vop_strategy_a); 924 biowait(bp); 925 926 /* Write the second superblock (don't wait). */ 927 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 928 bp->b_flags |= B_CALL | B_ASYNC | B_BUSY; 929 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 930 bp->b_iodone = lfs_supercallback; 931 (strategy)(&vop_strategy_a); 932 } 933 934 /* 935 * Logical block number match routines used when traversing the dirty block 936 * chain. 937 */ 938 int 939 lfs_match_data(fs, bp) 940 struct lfs *fs; 941 struct buf *bp; 942 { 943 return (bp->b_lblkno >= 0); 944 } 945 946 int 947 lfs_match_indir(fs, bp) 948 struct lfs *fs; 949 struct buf *bp; 950 { 951 int lbn; 952 953 lbn = bp->b_lblkno; 954 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 955 } 956 957 int 958 lfs_match_dindir(fs, bp) 959 struct lfs *fs; 960 struct buf *bp; 961 { 962 int lbn; 963 964 lbn = bp->b_lblkno; 965 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 966 } 967 968 int 969 lfs_match_tindir(fs, bp) 970 struct lfs *fs; 971 struct buf *bp; 972 { 973 int lbn; 974 975 lbn = bp->b_lblkno; 976 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 977 } 978 979 /* 980 * Allocate a new buffer header. 981 */ 982 struct buf * 983 lfs_newbuf(vp, daddr, size) 984 struct vnode *vp; 985 daddr_t daddr; 986 size_t size; 987 { 988 struct buf *bp; 989 size_t nbytes; 990 991 nbytes = roundup(size, DEV_BSIZE); 992 bp = malloc(sizeof(struct buf) + nbytes, M_SEGMENT, M_WAITOK); 993 bzero(bp, sizeof(struct buf) + nbytes); 994 bgetvp(vp, bp); 995 bp->b_un.b_addr = (caddr_t)(bp + 1); 996 bp->b_bufsize = size; 997 bp->b_bcount = size; 998 bp->b_lblkno = daddr; 999 bp->b_blkno = daddr; 1000 bp->b_error = 0; 1001 bp->b_resid = 0; 1002 bp->b_iodone = lfs_callback; 1003 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 1004 return (bp); 1005 } 1006 1007 void 1008 lfs_callback(bp) 1009 struct buf *bp; 1010 { 1011 struct lfs *fs; 1012 1013 fs = (struct lfs *)bp->b_saveaddr; 1014 #ifdef DIAGNOSTIC 1015 if (fs->lfs_iocount == 0) 1016 panic("lfs_callback: zero iocount\n"); 1017 #endif 1018 if (--fs->lfs_iocount == 0) 1019 wakeup(&fs->lfs_iocount); 1020 1021 brelvp(bp); 1022 free(bp, M_SEGMENT); 1023 } 1024 1025 void 1026 lfs_supercallback(bp) 1027 struct buf *bp; 1028 { 1029 brelvp(bp); 1030 free(bp, M_SEGMENT); 1031 } 1032 1033 /* 1034 * Shellsort (diminishing increment sort) from Data Structures and 1035 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1036 * see also Knuth Vol. 3, page 84. The increments are selected from 1037 * formula (8), page 95. Roughly O(N^3/2). 1038 */ 1039 /* 1040 * This is our own private copy of shellsort because we want to sort 1041 * two parallel arrays (the array of buffer pointers and the array of 1042 * logical block numbers) simultaneously. Note that we cast the array 1043 * of logical block numbers to a unsigned in this routine so that the 1044 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1045 */ 1046 void 1047 lfs_shellsort(bp_array, lb_array, nmemb) 1048 struct buf **bp_array; 1049 daddr_t *lb_array; 1050 register int nmemb; 1051 { 1052 static int __rsshell_increments[] = { 4, 1, 0 }; 1053 register int incr, *incrp, t1, t2; 1054 struct buf *bp_temp; 1055 u_long lb_temp; 1056 1057 for (incrp = __rsshell_increments; incr = *incrp++;) 1058 for (t1 = incr; t1 < nmemb; ++t1) 1059 for (t2 = t1 - incr; t2 >= 0;) 1060 if (lb_array[t2] > lb_array[t2 + incr]) { 1061 lb_temp = lb_array[t2]; 1062 lb_array[t2] = lb_array[t2 + incr]; 1063 lb_array[t2 + incr] = lb_temp; 1064 bp_temp = bp_array[t2]; 1065 bp_array[t2] = bp_array[t2 + incr]; 1066 bp_array[t2 + incr] = bp_temp; 1067 t2 -= incr; 1068 } else 1069 break; 1070 } 1071 1072