1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.33 (Berkeley) 08/28/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 32 #include <ufs/lfs/lfs.h> 33 #include <ufs/lfs/lfs_extern.h> 34 35 #define MAX_ACTIVE 10 36 /* 37 * Determine if it's OK to start a partial in this segment, or if we need 38 * to go on to a new segment. 39 */ 40 #define LFS_PARTIAL_FITS(fs) \ 41 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 42 1 << (fs)->lfs_fsbtodb) 43 44 void lfs_callback __P((struct buf *)); 45 void lfs_gather __P((struct lfs *, struct segment *, 46 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 47 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 48 void lfs_initseg __P((struct lfs *, struct segment *)); 49 void lfs_iset __P((struct inode *, daddr_t, time_t)); 50 int lfs_match_data __P((struct lfs *, struct buf *)); 51 int lfs_match_dindir __P((struct lfs *, struct buf *)); 52 int lfs_match_indir __P((struct lfs *, struct buf *)); 53 int lfs_match_tindir __P((struct lfs *, struct buf *)); 54 void lfs_newseg __P((struct lfs *)); 55 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 56 void lfs_supercallback __P((struct buf *)); 57 void lfs_updatemeta __P((struct segment *)); 58 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 59 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 60 int lfs_writeseg __P((struct lfs *, struct segment *)); 61 void lfs_writesuper __P((struct lfs *, struct segment *)); 62 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 63 struct segment *sp, int dirops)); 64 65 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 66 67 /* 68 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 69 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 70 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 71 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 72 */ 73 74 int 75 lfs_vflush(vp) 76 struct vnode *vp; 77 { 78 struct inode *ip; 79 struct lfs *fs; 80 struct segment *sp; 81 int error, s; 82 83 fs = VFSTOUFS(vp->v_mount)->um_lfs; 84 lfs_seglock(fs); 85 86 /* 87 * Allocate a segment structure and enough space to hold pointers to 88 * the maximum possible number of buffers which can be described in a 89 * single summary block. 90 */ 91 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 92 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 93 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 94 sp->seg_flags = SEGM_CKP; 95 sp->vp = NULL; 96 97 /* 98 * Keep a cumulative count of the outstanding I/O operations. If the 99 * disk drive catches up with us it could go to zero before we finish, 100 * so we artificially increment it by one until we've scheduled all of 101 * the writes we intend to do. 102 */ 103 s = splbio(); 104 ++fs->lfs_iocount; 105 splx(s); 106 107 ip = VTOI(vp); 108 do { 109 lfs_initseg(fs, sp); 110 do { 111 if (vp->v_dirtyblkhd != NULL) 112 lfs_writefile(fs, sp, vp); 113 } while (lfs_writeinode(fs, sp, ip)); 114 115 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 116 117 /* 118 * If the I/O count is non-zero, sleep until it reaches zero. At the 119 * moment, the user's process hangs around so we can sleep. 120 */ 121 s = splbio(); 122 if (--fs->lfs_iocount && (error = 123 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 124 free(sp->bpp, M_SEGMENT); 125 free(sp, M_SEGMENT); 126 return (error); 127 } 128 splx(s); 129 lfs_segunlock(fs); 130 131 /* 132 * XXX 133 * Should be writing a checkpoint? 134 */ 135 free(sp->bpp, M_SEGMENT); 136 free(sp, M_SEGMENT); 137 138 return (0); 139 } 140 141 void 142 lfs_writevnodes(fs, mp, sp, dirops) 143 struct lfs *fs; 144 struct mount *mp; 145 struct segment *sp; 146 int dirops; 147 { 148 struct inode *ip; 149 struct vnode *vp; 150 int error, s; 151 152 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 153 /* 154 * If the vnode that we are about to sync is no longer 155 * associated with this mount point, start over. 156 */ 157 if (vp->v_mount != mp) 158 goto loop; 159 160 if (dirops && !(vp->v_flag & VDIROP) || 161 !dirops && (vp->v_flag & VDIROP)) 162 continue; 163 /* 164 * XXX 165 * Up the ref count so we don't get tossed out of 166 * memory. 167 */ 168 VREF(vp); 169 170 /* 171 * Write the inode/file if dirty and it's not the 172 * the IFILE. 173 */ 174 ip = VTOI(vp); 175 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 176 vp->v_dirtyblkhd != NULL) && 177 ip->i_number != LFS_IFILE_INUM) { 178 if (vp->v_dirtyblkhd != NULL) 179 lfs_writefile(fs, sp, vp); 180 (void) lfs_writeinode(fs, sp, ip); 181 } 182 vp->v_flag &= ~VDIROP; 183 vrele(vp); 184 } 185 } 186 187 int 188 lfs_segwrite(mp, do_ckp) 189 struct mount *mp; 190 int do_ckp; /* Do a checkpoint. */ 191 { 192 struct buf *bp; 193 struct inode *ip; 194 struct lfs *fs; 195 struct segment *sp; 196 struct vnode *vp; 197 SEGUSE *segusep; 198 daddr_t ibno; 199 CLEANERINFO *cip; 200 int clean, error, i, s; 201 202 fs = VFSTOUFS(mp)->um_lfs; 203 204 /* 205 * If we have fewer than 2 clean segments, wait until cleaner 206 * writes. 207 */ 208 do { 209 LFS_CLEANERINFO(cip, fs, bp); 210 clean = cip->clean; 211 brelse(bp); 212 if (clean <= 2) { 213 printf ("segs clean: %d\n", clean); 214 wakeup(&lfs_allclean_wakeup); 215 if (error = tsleep(&fs->lfs_avail, PRIBIO + 1, 216 "lfs writer", 0)) 217 return (error); 218 } 219 } while (clean <= 2 ); 220 lfs_seglock(fs); 221 222 /* 223 * Allocate a segment structure and enough space to hold pointers to 224 * the maximum possible number of buffers which can be described in a 225 * single summary block. 226 */ 227 do_ckp = do_ckp || fs->lfs_nactive > MAX_ACTIVE; 228 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 229 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 230 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 231 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 232 sp->vp = NULL; 233 lfs_initseg(fs, sp); 234 235 /* 236 * Keep a cumulative count of the outstanding I/O operations. If the 237 * disk drive catches up with us it could go to zero before we finish, 238 * so we artificially increment it by one until we've scheduled all of 239 * the writes we intend to do. If not a checkpoint, we never do the 240 * final decrement, avoiding the wakeup in the callback routine. 241 */ 242 s = splbio(); 243 ++fs->lfs_iocount; 244 splx(s); 245 246 lfs_writevnodes(fs, mp, sp, 0); 247 fs->lfs_writer = 1; 248 if (fs->lfs_dirops && (error = 249 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 250 free(sp->bpp, M_SEGMENT); 251 free(sp, M_SEGMENT); 252 fs->lfs_writer = 0; 253 return (error); 254 } 255 256 lfs_writevnodes(fs, mp, sp, 1); 257 258 /* 259 * If we are doing a checkpoint, mark everything since the 260 * last checkpoint as no longer ACTIVE. 261 */ 262 if (do_ckp) 263 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 264 --ibno >= fs->lfs_cleansz; ) { 265 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 266 NOCRED, &bp)) 267 268 panic("lfs: ifile read"); 269 segusep = (SEGUSE *)bp->b_un.b_addr; 270 for (i = fs->lfs_sepb; i--; segusep++) 271 segusep->su_flags &= ~SEGUSE_ACTIVE; 272 273 error = VOP_BWRITE(bp); 274 } 275 276 if (do_ckp || fs->lfs_doifile) { 277 redo: 278 vp = fs->lfs_ivnode; 279 while (vget(vp)); 280 ip = VTOI(vp); 281 if (vp->v_dirtyblkhd != NULL) 282 lfs_writefile(fs, sp, vp); 283 (void)lfs_writeinode(fs, sp, ip); 284 vput(vp); 285 if (lfs_writeseg(fs, sp) && do_ckp) { 286 lfs_initseg(fs, sp); 287 goto redo; 288 } 289 } else 290 (void) lfs_writeseg(fs, sp); 291 292 /* 293 * If the I/O count is non-zero, sleep until it reaches zero. At the 294 * moment, the user's process hangs around so we can sleep. 295 */ 296 fs->lfs_writer = 0; 297 fs->lfs_doifile = 0; 298 wakeup(&fs->lfs_dirops); 299 300 s = splbio(); 301 --fs->lfs_iocount; 302 if (do_ckp) { 303 if (fs->lfs_iocount && (error = 304 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 305 free(sp->bpp, M_SEGMENT); 306 free(sp, M_SEGMENT); 307 return (error); 308 } 309 splx(s); 310 fs->lfs_nactive = 0; 311 lfs_writesuper(fs, sp); 312 } else 313 splx(s); 314 315 lfs_segunlock(fs); 316 317 free(sp->bpp, M_SEGMENT); 318 free(sp, M_SEGMENT); 319 320 return (0); 321 } 322 323 /* 324 * Write the dirty blocks associated with a vnode. 325 */ 326 void 327 lfs_writefile(fs, sp, vp) 328 struct lfs *fs; 329 struct segment *sp; 330 struct vnode *vp; 331 { 332 struct buf *bp; 333 struct finfo *fip; 334 IFILE *ifp; 335 336 if (sp->seg_bytes_left < fs->lfs_bsize || 337 sp->sum_bytes_left < sizeof(struct finfo)) { 338 (void) lfs_writeseg(fs, sp); 339 lfs_initseg(fs, sp); 340 } 341 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 342 343 fip = sp->fip; 344 fip->fi_nblocks = 0; 345 fip->fi_ino = VTOI(vp)->i_number; 346 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 347 fip->fi_version = ifp->if_version; 348 brelse(bp); 349 350 /* 351 * It may not be necessary to write the meta-data blocks at this point, 352 * as the roll-forward recovery code should be able to reconstruct the 353 * list. 354 */ 355 lfs_gather(fs, sp, vp, lfs_match_data); 356 lfs_gather(fs, sp, vp, lfs_match_indir); 357 lfs_gather(fs, sp, vp, lfs_match_dindir); 358 #ifdef TRIPLE 359 lfs_gather(fs, sp, vp, lfs_match_tindir); 360 #endif 361 362 fip = sp->fip; 363 #ifdef META 364 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 365 #endif 366 if (fip->fi_nblocks != 0) { 367 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 368 sp->fip = 369 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 370 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 371 sp->start_lbp = &sp->fip->fi_blocks[0]; 372 } else 373 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 374 } 375 376 int 377 lfs_writeinode(fs, sp, ip) 378 struct lfs *fs; 379 struct segment *sp; 380 struct inode *ip; 381 { 382 struct buf *bp, *ibp; 383 IFILE *ifp; 384 SEGUSE *sup; 385 daddr_t daddr; 386 ino_t ino; 387 int error, ndx; 388 int redo_ifile = 0; 389 390 if (!(ip->i_flag & (IMOD | IACC | IUPD | ICHG))) 391 return; 392 393 /* Allocate a new inode block if necessary. */ 394 if (sp->ibp == NULL) { 395 /* Allocate a new segment if necessary. */ 396 if (sp->seg_bytes_left < fs->lfs_bsize || 397 sp->sum_bytes_left < sizeof(daddr_t)) { 398 (void) lfs_writeseg(fs, sp); 399 lfs_initseg(fs, sp); 400 } 401 402 /* Get next inode block. */ 403 daddr = fs->lfs_offset; 404 fs->lfs_offset += fsbtodb(fs, 1); 405 sp->ibp = *sp->cbpp++ = 406 lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, 407 fs->lfs_bsize); 408 ++sp->start_bpp; 409 fs->lfs_avail -= fsbtodb(fs, 1); 410 /* Set remaining space counters. */ 411 sp->seg_bytes_left -= fs->lfs_bsize; 412 sp->sum_bytes_left -= sizeof(daddr_t); 413 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 414 sp->ninodes / INOPB(fs) - 1; 415 ((daddr_t *)(sp->segsum))[ndx] = daddr; 416 } 417 418 /* Update the inode times and copy the inode onto the inode page. */ 419 if (ip->i_flag & IMOD) 420 --fs->lfs_uinodes; 421 ITIMES(ip, &time, &time); 422 ip->i_flag &= ~(IMOD | IACC | IUPD | ICHG); 423 bp = sp->ibp; 424 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 425 /* Increment inode count in segment summary block. */ 426 ++((SEGSUM *)(sp->segsum))->ss_ninos; 427 428 /* If this page is full, set flag to allocate a new page. */ 429 if (++sp->ninodes % INOPB(fs) == 0) 430 sp->ibp = NULL; 431 432 /* 433 * If updating the ifile, update the super-block. Update the disk 434 * address and access times for this inode in the ifile. 435 */ 436 ino = ip->i_number; 437 if (ino == LFS_IFILE_INUM) { 438 daddr = fs->lfs_idaddr; 439 fs->lfs_idaddr = bp->b_blkno; 440 } else { 441 LFS_IENTRY(ifp, fs, ino, ibp); 442 daddr = ifp->if_daddr; 443 ifp->if_daddr = bp->b_blkno; 444 error = VOP_BWRITE(ibp); 445 } 446 447 /* 448 * No need to update segment usage if there was no former inode address 449 * or if the last inode address is in the current partial segment. 450 */ 451 if (daddr != LFS_UNUSED_DADDR && 452 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 453 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 454 #ifdef DIAGNOSTIC 455 if (sup->su_nbytes < sizeof(struct dinode)) { 456 /* XXX -- Change to a panic. */ 457 printf("lfs: negative bytes (segment %d)\n", 458 datosn(fs, daddr)); 459 panic("negative bytes"); 460 } 461 #endif 462 sup->su_nbytes -= sizeof(struct dinode); 463 redo_ifile = 464 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 465 error = VOP_BWRITE(bp); 466 } 467 return (redo_ifile); 468 } 469 470 int 471 lfs_gatherblock(sp, bp, sptr) 472 struct segment *sp; 473 struct buf *bp; 474 int *sptr; 475 { 476 struct lfs *fs; 477 int version; 478 479 /* 480 * If full, finish this segment. We may be doing I/O, so 481 * release and reacquire the splbio(). 482 */ 483 #ifdef DIAGNOSTIC 484 if (sp->vp == NULL) 485 panic ("lfs_gatherblock: Null vp in segment"); 486 #endif 487 fs = sp->fs; 488 if (sp->sum_bytes_left < sizeof(daddr_t) || 489 sp->seg_bytes_left < fs->lfs_bsize) { 490 if (sptr) 491 splx(*sptr); 492 lfs_updatemeta(sp); 493 494 /* Add the current file to the segment summary. */ 495 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 496 497 version = sp->fip->fi_version; 498 (void) lfs_writeseg(fs, sp); 499 lfs_initseg(fs, sp); 500 501 sp->fip->fi_version = version; 502 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 503 504 sp->sum_bytes_left -= 505 sizeof(struct finfo) - sizeof(daddr_t); 506 507 if (sptr) 508 *sptr = splbio(); 509 return(1); 510 } 511 512 /* Insert into the buffer list, update the FINFO block. */ 513 if (bp->b_vp == sp->fs->lfs_ivnode && 514 ((bp->b_lblkno == 0 && (bp->b_un.b_daddr[0] > 26 || bp->b_un.b_daddr[1] > 26)) || 515 (bp->b_lblkno > 2))) 516 printf ("Bad ifile block\n"); 517 bp->b_flags |= B_GATHERED; 518 *sp->cbpp++ = bp; 519 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 520 521 sp->sum_bytes_left -= sizeof(daddr_t); 522 sp->seg_bytes_left -= bp->b_bufsize; 523 return(0); 524 } 525 526 void 527 lfs_gather(fs, sp, vp, match) 528 struct lfs *fs; 529 struct segment *sp; 530 struct vnode *vp; 531 int (*match) __P((struct lfs *, struct buf *)); 532 { 533 struct buf *bp; 534 int s; 535 536 sp->vp = vp; 537 s = splbio(); 538 loop: for (bp = vp->v_dirtyblkhd; bp; bp = bp->b_blockf) { 539 if (bp->b_flags & B_BUSY || !match(fs, bp) || 540 bp->b_flags & B_GATHERED) 541 continue; 542 #ifdef DIAGNOSTIC 543 if (!(bp->b_flags & B_DELWRI)) 544 panic("lfs_gather: bp not B_DELWRI"); 545 if (!(bp->b_flags & B_LOCKED)) 546 panic("lfs_gather: bp not B_LOCKED"); 547 #endif 548 if (lfs_gatherblock(sp, bp, &s)) 549 goto loop; 550 } 551 splx(s); 552 lfs_updatemeta(sp); 553 sp->vp = NULL; 554 } 555 556 557 /* 558 * Update the metadata that points to the blocks listed in the FINFO 559 * array. 560 */ 561 void 562 lfs_updatemeta(sp) 563 struct segment *sp; 564 { 565 SEGUSE *sup; 566 struct buf *bp; 567 struct lfs *fs; 568 struct vnode *vp; 569 INDIR a[NIADDR], *ap; 570 struct inode *ip; 571 daddr_t daddr, lbn, off; 572 int db_per_fsb, error, i, nblocks, num; 573 574 vp = sp->vp; 575 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 576 if (vp == NULL || nblocks == 0) 577 return; 578 579 /* Sort the blocks. */ 580 if (!(sp->seg_flags & SEGM_CLEAN)) 581 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 582 583 /* 584 * Assign disk addresses, and update references to the logical 585 * block and the segment usage information. 586 */ 587 fs = sp->fs; 588 db_per_fsb = fsbtodb(fs, 1); 589 for (i = nblocks; i--; ++sp->start_bpp) { 590 lbn = *sp->start_lbp++; 591 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 592 fs->lfs_offset += db_per_fsb; 593 594 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 595 panic("lfs_updatemeta: lfs_bmaparray %d", error); 596 ip = VTOI(vp); 597 switch (num) { 598 case 0: 599 ip->i_db[lbn] = off; 600 break; 601 case 1: 602 ip->i_ib[a[0].in_off] = off; 603 break; 604 default: 605 ap = &a[num - 1]; 606 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 607 panic("lfs_updatemeta: bread bno %d", 608 ap->in_lbn); 609 /* 610 * Bread may create a new indirect block which needs 611 * to get counted for the inode. 612 */ 613 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 614 printf ("Updatemeta allocating indirect block: shouldn't happen\n"); 615 ip->i_blocks += btodb(fs->lfs_bsize); 616 fs->lfs_bfree -= btodb(fs->lfs_bsize); 617 } 618 bp->b_un.b_daddr[ap->in_off] = off; 619 VOP_BWRITE(bp); 620 } 621 622 /* Update segment usage information. */ 623 if (daddr != UNASSIGNED) { 624 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 625 #ifdef DIAGNOSTIC 626 if (sup->su_nbytes < fs->lfs_bsize) { 627 /* XXX -- Change to a panic. */ 628 printf("lfs: negative bytes (segment %d)\n", 629 datosn(fs, daddr)); 630 panic ("Negative Bytes"); 631 } 632 #endif 633 sup->su_nbytes -= fs->lfs_bsize; 634 error = VOP_BWRITE(bp); 635 } 636 } 637 } 638 639 /* 640 * Start a new segment. 641 */ 642 void 643 lfs_initseg(fs, sp) 644 struct lfs *fs; 645 struct segment *sp; 646 { 647 SEGUSE *sup; 648 SEGSUM *ssp; 649 struct buf *bp; 650 daddr_t lbn, *lbnp; 651 652 /* Advance to the next segment. */ 653 if (!LFS_PARTIAL_FITS(fs)) { 654 /* Wake up any cleaning procs waiting on this file system. */ 655 wakeup(&fs->lfs_nextseg); 656 wakeup(&lfs_allclean_wakeup); 657 658 lfs_newseg(fs); 659 fs->lfs_offset = fs->lfs_curseg; 660 sp->seg_number = datosn(fs, fs->lfs_curseg); 661 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 662 663 /* 664 * If the segment contains a superblock, update the offset 665 * and summary address to skip over it. 666 */ 667 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 668 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 669 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 670 sp->seg_bytes_left -= LFS_SBPAD; 671 } 672 brelse(bp); 673 } else { 674 sp->seg_number = datosn(fs, fs->lfs_curseg); 675 sp->seg_bytes_left = (fs->lfs_dbpseg - 676 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 677 } 678 fs->lfs_lastpseg = fs->lfs_offset; 679 680 sp->fs = fs; 681 sp->ibp = NULL; 682 sp->ninodes = 0; 683 684 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 685 sp->cbpp = sp->bpp; 686 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset, 687 LFS_SUMMARY_SIZE); 688 sp->segsum = (*sp->cbpp)->b_un.b_addr; 689 sp->start_bpp = ++sp->cbpp; 690 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 691 692 /* Set point to SEGSUM, initialize it. */ 693 ssp = sp->segsum; 694 ssp->ss_next = fs->lfs_nextseg; 695 ssp->ss_nfinfo = ssp->ss_ninos = 0; 696 697 /* Set pointer to first FINFO, initialize it. */ 698 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 699 sp->fip->fi_nblocks = 0; 700 sp->start_lbp = &sp->fip->fi_blocks[0]; 701 702 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 703 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 704 } 705 706 /* 707 * Return the next segment to write. 708 */ 709 void 710 lfs_newseg(fs) 711 struct lfs *fs; 712 { 713 CLEANERINFO *cip; 714 SEGUSE *sup; 715 struct buf *bp; 716 int curseg, error, isdirty, sn; 717 718 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 719 sup->su_flags |= SEGUSE_DIRTY; 720 sup->su_nbytes = 0; 721 sup->su_nsums = 0; 722 sup->su_ninos = 0; 723 (void) VOP_BWRITE(bp); 724 725 LFS_CLEANERINFO(cip, fs, bp); 726 --cip->clean; 727 ++cip->dirty; 728 (void) VOP_BWRITE(bp); 729 730 fs->lfs_lastseg = fs->lfs_curseg; 731 fs->lfs_curseg = fs->lfs_nextseg; 732 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 733 sn = (sn + 1) % fs->lfs_nseg; 734 if (sn == curseg) 735 panic("lfs_nextseg: no clean segments"); 736 LFS_SEGENTRY(sup, fs, sn, bp); 737 isdirty = sup->su_flags & SEGUSE_DIRTY; 738 brelse(bp); 739 if (!isdirty) 740 break; 741 } 742 743 ++fs->lfs_nactive; 744 fs->lfs_nextseg = sntoda(fs, sn); 745 } 746 747 int 748 lfs_writeseg(fs, sp) 749 struct lfs *fs; 750 struct segment *sp; 751 { 752 extern int locked_queue_count; 753 struct buf **bpp, *bp, *cbp; 754 SEGUSE *sup; 755 SEGSUM *ssp; 756 dev_t i_dev; 757 size_t size; 758 u_long *datap, *dp; 759 int ch_per_blk, do_again, error, i, nblocks, num, s; 760 int (*strategy)__P((struct vop_strategy_args *)); 761 struct vop_strategy_args vop_strategy_a; 762 u_short ninos; 763 char *p; 764 765 /* 766 * If there are no buffers other than the segment summary to write 767 * and it is not a checkpoint, don't do anything. On a checkpoint, 768 * even if there aren't any buffers, you need to write the superblock. 769 */ 770 if ((nblocks = sp->cbpp - sp->bpp) == 1 && !(sp->seg_flags & SEGM_CKP)) 771 return (0); 772 773 /* 774 * Compute checksum across data and then across summary; the first 775 * block (the summary block) is skipped. Set the create time here 776 * so that it's guaranteed to be later than the inode mod times. 777 * 778 * XXX 779 * Fix this to do it inline, instead of malloc/copy. 780 */ 781 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 782 for (bpp = sp->bpp, i = nblocks - 1; i--;) 783 *dp++ = (*++bpp)->b_un.b_words[0]; 784 ssp = (SEGSUM *)sp->segsum; 785 ssp->ss_create = time.tv_sec; 786 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 787 ssp->ss_sumsum = 788 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 789 free(datap, M_SEGMENT); 790 /* Update the segment usage information. */ 791 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 792 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 793 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift; 794 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 795 sup->su_nbytes += LFS_SUMMARY_SIZE; 796 sup->su_lastmod = time.tv_sec; 797 sup->su_flags |= SEGUSE_ACTIVE; 798 sup->su_ninos += ninos; 799 ++sup->su_nsums; 800 do_again = !(bp->b_flags & B_GATHERED); 801 (void)VOP_BWRITE(bp); 802 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 803 804 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 805 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 806 807 /* 808 * When we simply write the blocks we lose a rotation for every block 809 * written. To avoid this problem, we allocate memory in chunks, copy 810 * the buffers into the chunk and write the chunk. 56K was chosen as 811 * some driver/controllers can't handle unsigned 16 bit transfers. 812 * When the data is copied to the chunk, turn off the the B_LOCKED bit 813 * and brelse the buffer (which will move them to the LRU list). Add 814 * the B_CALL flag to the buffer header so we can count I/O's for the 815 * checkpoints and so we can release the allocated memory. 816 * 817 * XXX 818 * This should be removed if the new virtual memory system allows us to 819 * easily make the buffers contiguous in kernel memory and if that's 820 * fast enough. 821 */ 822 #define LFS_CHUNKSIZE (56 * 1024) 823 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 824 for (bpp = sp->bpp, i = nblocks; i;) { 825 num = ch_per_blk; 826 if (num > i) 827 num = i; 828 i -= num; 829 size = num * fs->lfs_bsize; 830 831 cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 832 (*bpp)->b_blkno, size); 833 cbp->b_dev = i_dev; 834 cbp->b_flags |= B_ASYNC | B_BUSY; 835 836 s = splbio(); 837 ++fs->lfs_iocount; 838 for (p = cbp->b_un.b_addr; num--;) { 839 bp = *bpp++; 840 /* 841 * Fake buffers from the cleaner are marked as B_INVAL. 842 * We need to copy the data from user space rather than 843 * from the buffer indicated. 844 * XXX == what do I do on an error? 845 */ 846 if (bp->b_flags & B_INVAL) { 847 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 848 panic("lfs_writeseg: copyin failed"); 849 } else 850 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 851 p += bp->b_bcount; 852 if (bp->b_flags & B_LOCKED) 853 --locked_queue_count; 854 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 855 B_LOCKED | B_GATHERED); 856 if (bp->b_flags & B_CALL) { 857 /* if B_CALL, it was created with newbuf */ 858 brelvp(bp); 859 free(bp, M_SEGMENT); 860 } else { 861 bremfree(bp); 862 reassignbuf(bp, bp->b_vp); 863 brelse(bp); 864 } 865 } 866 ++cbp->b_vp->v_numoutput; 867 splx(s); 868 cbp->b_bcount = p - cbp->b_un.b_addr; 869 /* 870 * XXXX This is a gross and disgusting hack. Since these 871 * buffers are physically addressed, they hang off the 872 * device vnode (devvp). As a result, they have no way 873 * of getting to the LFS superblock or lfs structure to 874 * keep track of the number of I/O's pending. So, I am 875 * going to stuff the fs into the saveaddr field of 876 * the buffer (yuk). 877 */ 878 cbp->b_saveaddr = (caddr_t)fs; 879 vop_strategy_a.a_desc = VDESC(vop_strategy); 880 vop_strategy_a.a_bp = cbp; 881 (strategy)(&vop_strategy_a); 882 } 883 return (do_again); 884 } 885 886 void 887 lfs_writesuper(fs, sp) 888 struct lfs *fs; 889 struct segment *sp; 890 { 891 struct buf *bp; 892 dev_t i_dev; 893 int (*strategy) __P((struct vop_strategy_args *)); 894 int s; 895 struct vop_strategy_args vop_strategy_a; 896 897 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 898 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 899 900 /* Checksum the superblock and copy it into a buffer. */ 901 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 902 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0], 903 LFS_SBPAD); 904 *bp->b_un.b_lfs = *fs; 905 906 /* Write the first superblock (wait). */ 907 bp->b_dev = i_dev; 908 bp->b_flags |= B_BUSY; 909 bp->b_flags &= ~(B_DONE | B_CALL | B_ERROR | B_READ | B_DELWRI); 910 vop_strategy_a.a_desc = VDESC(vop_strategy); 911 vop_strategy_a.a_bp = bp; 912 s = splbio(); 913 bp->b_vp->v_numoutput += 2; 914 splx(s); 915 (strategy)(&vop_strategy_a); 916 biowait(bp); 917 918 /* Write the second superblock (don't wait). */ 919 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 920 bp->b_flags |= B_CALL | B_ASYNC | B_BUSY; 921 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 922 bp->b_iodone = lfs_supercallback; 923 (strategy)(&vop_strategy_a); 924 } 925 926 /* 927 * Logical block number match routines used when traversing the dirty block 928 * chain. 929 */ 930 int 931 lfs_match_data(fs, bp) 932 struct lfs *fs; 933 struct buf *bp; 934 { 935 return (bp->b_lblkno >= 0); 936 } 937 938 int 939 lfs_match_indir(fs, bp) 940 struct lfs *fs; 941 struct buf *bp; 942 { 943 int lbn; 944 945 lbn = bp->b_lblkno; 946 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 947 } 948 949 int 950 lfs_match_dindir(fs, bp) 951 struct lfs *fs; 952 struct buf *bp; 953 { 954 int lbn; 955 956 lbn = bp->b_lblkno; 957 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 958 } 959 960 int 961 lfs_match_tindir(fs, bp) 962 struct lfs *fs; 963 struct buf *bp; 964 { 965 int lbn; 966 967 lbn = bp->b_lblkno; 968 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 969 } 970 971 /* 972 * Allocate a new buffer header. 973 */ 974 struct buf * 975 lfs_newbuf(vp, daddr, size) 976 struct vnode *vp; 977 daddr_t daddr; 978 size_t size; 979 { 980 struct buf *bp; 981 size_t nbytes; 982 983 nbytes = roundup(size, DEV_BSIZE); 984 bp = malloc(sizeof(struct buf) + nbytes, M_SEGMENT, M_WAITOK); 985 bzero(bp, sizeof(struct buf) + nbytes); 986 bgetvp(vp, bp); 987 bp->b_un.b_addr = (caddr_t)(bp + 1); 988 bp->b_bufsize = size; 989 bp->b_bcount = size; 990 bp->b_lblkno = daddr; 991 bp->b_blkno = daddr; 992 bp->b_error = 0; 993 bp->b_resid = 0; 994 bp->b_iodone = lfs_callback; 995 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 996 return (bp); 997 } 998 999 void 1000 lfs_callback(bp) 1001 struct buf *bp; 1002 { 1003 struct lfs *fs; 1004 1005 fs = (struct lfs *)bp->b_saveaddr; 1006 #ifdef DIAGNOSTIC 1007 if (fs->lfs_iocount == 0) 1008 panic("lfs_callback: zero iocount\n"); 1009 #endif 1010 if (--fs->lfs_iocount == 0) 1011 wakeup(&fs->lfs_iocount); 1012 1013 brelvp(bp); 1014 free(bp, M_SEGMENT); 1015 } 1016 1017 void 1018 lfs_supercallback(bp) 1019 struct buf *bp; 1020 { 1021 brelvp(bp); 1022 free(bp, M_SEGMENT); 1023 } 1024 1025 /* 1026 * Shellsort (diminishing increment sort) from Data Structures and 1027 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1028 * see also Knuth Vol. 3, page 84. The increments are selected from 1029 * formula (8), page 95. Roughly O(N^3/2). 1030 */ 1031 /* 1032 * This is our own private copy of shellsort because we want to sort 1033 * two parallel arrays (the array of buffer pointers and the array of 1034 * logical block numbers) simultaneously. Note that we cast the array 1035 * of logical block numbers to a unsigned in this routine so that the 1036 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1037 */ 1038 void 1039 lfs_shellsort(bp_array, lb_array, nmemb) 1040 struct buf **bp_array; 1041 daddr_t *lb_array; 1042 register int nmemb; 1043 { 1044 static int __rsshell_increments[] = { 4, 1, 0 }; 1045 register int incr, *incrp, t1, t2; 1046 struct buf *bp_temp; 1047 u_long lb_temp; 1048 1049 for (incrp = __rsshell_increments; incr = *incrp++;) 1050 for (t1 = incr; t1 < nmemb; ++t1) 1051 for (t2 = t1 - incr; t2 >= 0;) 1052 if (lb_array[t2] > lb_array[t2 + incr]) { 1053 lb_temp = lb_array[t2]; 1054 lb_array[t2] = lb_array[t2 + incr]; 1055 lb_array[t2 + incr] = lb_temp; 1056 bp_temp = bp_array[t2]; 1057 bp_array[t2] = bp_array[t2 + incr]; 1058 bp_array[t2 + incr] = bp_temp; 1059 t2 -= incr; 1060 } else 1061 break; 1062 } 1063 1064