1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.37 (Berkeley) 10/24/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 #include <ufs/ufs/ufs_extern.h> 32 33 #include <ufs/lfs/lfs.h> 34 #include <ufs/lfs/lfs_extern.h> 35 36 #define MAX_ACTIVE 10 37 /* 38 * Determine if it's OK to start a partial in this segment, or if we need 39 * to go on to a new segment. 40 */ 41 #define LFS_PARTIAL_FITS(fs) \ 42 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 43 1 << (fs)->lfs_fsbtodb) 44 45 void lfs_callback __P((struct buf *)); 46 void lfs_gather __P((struct lfs *, struct segment *, 47 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 48 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 49 void lfs_initseg __P((struct lfs *, struct segment *)); 50 void lfs_iset __P((struct inode *, daddr_t, time_t)); 51 int lfs_match_data __P((struct lfs *, struct buf *)); 52 int lfs_match_dindir __P((struct lfs *, struct buf *)); 53 int lfs_match_indir __P((struct lfs *, struct buf *)); 54 int lfs_match_tindir __P((struct lfs *, struct buf *)); 55 void lfs_newseg __P((struct lfs *)); 56 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 57 void lfs_supercallback __P((struct buf *)); 58 void lfs_updatemeta __P((struct segment *)); 59 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 60 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 61 int lfs_writeseg __P((struct lfs *, struct segment *)); 62 void lfs_writesuper __P((struct lfs *, struct segment *)); 63 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 64 struct segment *sp, int dirops)); 65 66 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 67 68 /* 69 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 70 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 71 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 72 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 73 */ 74 75 int 76 lfs_vflush(vp) 77 struct vnode *vp; 78 { 79 struct inode *ip; 80 struct lfs *fs; 81 struct segment *sp; 82 int error, s; 83 84 fs = VFSTOUFS(vp->v_mount)->um_lfs; 85 if (fs->lfs_nactive > MAX_ACTIVE) 86 return(lfs_segwrite(vp->v_mount, 1)); 87 88 lfs_seglock(fs); 89 90 /* 91 * Allocate a segment structure and enough space to hold pointers to 92 * the maximum possible number of buffers which can be described in a 93 * single summary block. 94 */ 95 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 96 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 97 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 98 sp->seg_flags = SEGM_CKP; 99 sp->vp = NULL; 100 101 /* 102 * Keep a cumulative count of the outstanding I/O operations. If the 103 * disk drive catches up with us it could go to zero before we finish, 104 * so we artificially increment it by one until we've scheduled all of 105 * the writes we intend to do. 106 */ 107 s = splbio(); 108 ++fs->lfs_iocount; 109 splx(s); 110 111 ip = VTOI(vp); 112 do { 113 lfs_initseg(fs, sp); 114 do { 115 if (vp->v_dirtyblkhd.le_next != NULL) 116 lfs_writefile(fs, sp, vp); 117 } while (lfs_writeinode(fs, sp, ip)); 118 119 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 120 121 /* 122 * If the I/O count is non-zero, sleep until it reaches zero. At the 123 * moment, the user's process hangs around so we can sleep. 124 */ 125 s = splbio(); 126 if (--fs->lfs_iocount && (error = 127 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 128 free(sp->bpp, M_SEGMENT); 129 free(sp, M_SEGMENT); 130 return (error); 131 } 132 splx(s); 133 lfs_segunlock(fs); 134 135 /* 136 * XXX 137 * Should be writing a checkpoint? 138 */ 139 free(sp->bpp, M_SEGMENT); 140 free(sp, M_SEGMENT); 141 142 return (0); 143 } 144 145 void 146 lfs_writevnodes(fs, mp, sp, dirops) 147 struct lfs *fs; 148 struct mount *mp; 149 struct segment *sp; 150 int dirops; 151 { 152 struct inode *ip; 153 struct vnode *vp; 154 int error, s; 155 156 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 157 /* 158 * If the vnode that we are about to sync is no longer 159 * associated with this mount point, start over. 160 */ 161 if (vp->v_mount != mp) 162 goto loop; 163 164 if (dirops && !(vp->v_flag & VDIROP) || 165 !dirops && (vp->v_flag & VDIROP)) 166 continue; 167 /* 168 * XXX 169 * Up the ref count so we don't get tossed out of 170 * memory. 171 */ 172 VREF(vp); 173 174 /* 175 * Write the inode/file if dirty and it's not the 176 * the IFILE. 177 */ 178 ip = VTOI(vp); 179 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 180 vp->v_dirtyblkhd.le_next != NULL) && 181 ip->i_number != LFS_IFILE_INUM) { 182 if (vp->v_dirtyblkhd.le_next != NULL) 183 lfs_writefile(fs, sp, vp); 184 (void) lfs_writeinode(fs, sp, ip); 185 } 186 vp->v_flag &= ~VDIROP; 187 vrele(vp); 188 } 189 } 190 191 int 192 lfs_segwrite(mp, do_ckp) 193 struct mount *mp; 194 int do_ckp; /* Do a checkpoint. */ 195 { 196 struct buf *bp; 197 struct inode *ip; 198 struct lfs *fs; 199 struct segment *sp; 200 struct vnode *vp; 201 SEGUSE *segusep; 202 daddr_t ibno; 203 CLEANERINFO *cip; 204 int clean, error, i, s; 205 206 fs = VFSTOUFS(mp)->um_lfs; 207 208 /* 209 * If we have fewer than 2 clean segments, wait until cleaner 210 * writes. 211 */ 212 do { 213 LFS_CLEANERINFO(cip, fs, bp); 214 clean = cip->clean; 215 brelse(bp); 216 if (clean <= 2) { 217 printf ("segs clean: %d\n", clean); 218 wakeup(&lfs_allclean_wakeup); 219 if (error = tsleep(&fs->lfs_avail, PRIBIO + 1, 220 "lfs writer", 0)) 221 return (error); 222 } 223 } while (clean <= 2 ); 224 lfs_seglock(fs); 225 226 /* 227 * Allocate a segment structure and enough space to hold pointers to 228 * the maximum possible number of buffers which can be described in a 229 * single summary block. 230 */ 231 do_ckp = do_ckp || fs->lfs_nactive > MAX_ACTIVE; 232 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 233 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 234 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 235 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 236 sp->vp = NULL; 237 lfs_initseg(fs, sp); 238 239 /* 240 * Keep a cumulative count of the outstanding I/O operations. If the 241 * disk drive catches up with us it could go to zero before we finish, 242 * so we artificially increment it by one until we've scheduled all of 243 * the writes we intend to do. If not a checkpoint, we never do the 244 * final decrement, avoiding the wakeup in the callback routine. 245 */ 246 s = splbio(); 247 ++fs->lfs_iocount; 248 splx(s); 249 250 lfs_writevnodes(fs, mp, sp, 0); 251 fs->lfs_writer = 1; 252 if (fs->lfs_dirops && (error = 253 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 254 free(sp->bpp, M_SEGMENT); 255 free(sp, M_SEGMENT); 256 fs->lfs_writer = 0; 257 return (error); 258 } 259 260 lfs_writevnodes(fs, mp, sp, 1); 261 262 /* 263 * If we are doing a checkpoint, mark everything since the 264 * last checkpoint as no longer ACTIVE. 265 */ 266 if (do_ckp) 267 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 268 --ibno >= fs->lfs_cleansz; ) { 269 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 270 NOCRED, &bp)) 271 272 panic("lfs: ifile read"); 273 segusep = (SEGUSE *)bp->b_un.b_addr; 274 for (i = fs->lfs_sepb; i--; segusep++) 275 segusep->su_flags &= ~SEGUSE_ACTIVE; 276 277 error = VOP_BWRITE(bp); 278 } 279 280 if (do_ckp || fs->lfs_doifile) { 281 redo: 282 vp = fs->lfs_ivnode; 283 while (vget(vp)); 284 ip = VTOI(vp); 285 if (vp->v_dirtyblkhd.le_next != NULL) 286 lfs_writefile(fs, sp, vp); 287 (void)lfs_writeinode(fs, sp, ip); 288 vput(vp); 289 if (lfs_writeseg(fs, sp) && do_ckp) { 290 lfs_initseg(fs, sp); 291 goto redo; 292 } 293 } else 294 (void) lfs_writeseg(fs, sp); 295 296 /* 297 * If the I/O count is non-zero, sleep until it reaches zero. At the 298 * moment, the user's process hangs around so we can sleep. 299 */ 300 fs->lfs_writer = 0; 301 fs->lfs_doifile = 0; 302 wakeup(&fs->lfs_dirops); 303 304 s = splbio(); 305 --fs->lfs_iocount; 306 if (do_ckp) { 307 if (fs->lfs_iocount && (error = 308 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 309 free(sp->bpp, M_SEGMENT); 310 free(sp, M_SEGMENT); 311 return (error); 312 } 313 splx(s); 314 fs->lfs_nactive = 0; 315 lfs_writesuper(fs, sp); 316 } else 317 splx(s); 318 319 lfs_segunlock(fs); 320 321 free(sp->bpp, M_SEGMENT); 322 free(sp, M_SEGMENT); 323 324 return (0); 325 } 326 327 /* 328 * Write the dirty blocks associated with a vnode. 329 */ 330 void 331 lfs_writefile(fs, sp, vp) 332 struct lfs *fs; 333 struct segment *sp; 334 struct vnode *vp; 335 { 336 struct buf *bp; 337 struct finfo *fip; 338 IFILE *ifp; 339 340 if (sp->seg_bytes_left < fs->lfs_bsize || 341 sp->sum_bytes_left < sizeof(struct finfo)) { 342 (void) lfs_writeseg(fs, sp); 343 lfs_initseg(fs, sp); 344 } 345 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 346 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 347 348 fip = sp->fip; 349 fip->fi_nblocks = 0; 350 fip->fi_ino = VTOI(vp)->i_number; 351 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 352 fip->fi_version = ifp->if_version; 353 brelse(bp); 354 355 /* 356 * It may not be necessary to write the meta-data blocks at this point, 357 * as the roll-forward recovery code should be able to reconstruct the 358 * list. 359 */ 360 lfs_gather(fs, sp, vp, lfs_match_data); 361 lfs_gather(fs, sp, vp, lfs_match_indir); 362 lfs_gather(fs, sp, vp, lfs_match_dindir); 363 #ifdef TRIPLE 364 lfs_gather(fs, sp, vp, lfs_match_tindir); 365 #endif 366 367 fip = sp->fip; 368 #ifdef META 369 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 370 #endif 371 if (fip->fi_nblocks != 0) { 372 sp->fip = 373 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 374 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 375 sp->start_lbp = &sp->fip->fi_blocks[0]; 376 } else { 377 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 378 --((SEGSUM *)(sp->segsum))->ss_nfinfo; 379 } 380 } 381 382 int 383 lfs_writeinode(fs, sp, ip) 384 struct lfs *fs; 385 struct segment *sp; 386 struct inode *ip; 387 { 388 struct buf *bp, *ibp; 389 IFILE *ifp; 390 SEGUSE *sup; 391 daddr_t daddr; 392 ino_t ino; 393 int error, ndx; 394 int redo_ifile = 0; 395 396 if (!(ip->i_flag & (IMOD | IACC | IUPD | ICHG))) 397 return(0); 398 399 /* Allocate a new inode block if necessary. */ 400 if (sp->ibp == NULL) { 401 /* Allocate a new segment if necessary. */ 402 if (sp->seg_bytes_left < fs->lfs_bsize || 403 sp->sum_bytes_left < sizeof(daddr_t)) { 404 (void) lfs_writeseg(fs, sp); 405 lfs_initseg(fs, sp); 406 } 407 408 /* Get next inode block. */ 409 daddr = fs->lfs_offset; 410 fs->lfs_offset += fsbtodb(fs, 1); 411 sp->ibp = *sp->cbpp++ = 412 lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, 413 fs->lfs_bsize); 414 ++sp->start_bpp; 415 fs->lfs_avail -= fsbtodb(fs, 1); 416 /* Set remaining space counters. */ 417 sp->seg_bytes_left -= fs->lfs_bsize; 418 sp->sum_bytes_left -= sizeof(daddr_t); 419 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 420 sp->ninodes / INOPB(fs) - 1; 421 ((daddr_t *)(sp->segsum))[ndx] = daddr; 422 } 423 424 /* Update the inode times and copy the inode onto the inode page. */ 425 if (ip->i_flag & IMOD) 426 --fs->lfs_uinodes; 427 ITIMES(ip, &time, &time); 428 ip->i_flag &= ~(IMOD | IACC | IUPD | ICHG); 429 bp = sp->ibp; 430 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 431 /* Increment inode count in segment summary block. */ 432 ++((SEGSUM *)(sp->segsum))->ss_ninos; 433 434 /* If this page is full, set flag to allocate a new page. */ 435 if (++sp->ninodes % INOPB(fs) == 0) 436 sp->ibp = NULL; 437 438 /* 439 * If updating the ifile, update the super-block. Update the disk 440 * address and access times for this inode in the ifile. 441 */ 442 ino = ip->i_number; 443 if (ino == LFS_IFILE_INUM) { 444 daddr = fs->lfs_idaddr; 445 fs->lfs_idaddr = bp->b_blkno; 446 } else { 447 LFS_IENTRY(ifp, fs, ino, ibp); 448 daddr = ifp->if_daddr; 449 ifp->if_daddr = bp->b_blkno; 450 error = VOP_BWRITE(ibp); 451 } 452 453 /* 454 * No need to update segment usage if there was no former inode address 455 * or if the last inode address is in the current partial segment. 456 */ 457 if (daddr != LFS_UNUSED_DADDR && 458 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 459 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 460 #ifdef DIAGNOSTIC 461 if (sup->su_nbytes < sizeof(struct dinode)) { 462 /* XXX -- Change to a panic. */ 463 printf("lfs: negative bytes (segment %d)\n", 464 datosn(fs, daddr)); 465 panic("negative bytes"); 466 } 467 #endif 468 sup->su_nbytes -= sizeof(struct dinode); 469 redo_ifile = 470 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 471 error = VOP_BWRITE(bp); 472 } 473 return (redo_ifile); 474 } 475 476 int 477 lfs_gatherblock(sp, bp, sptr) 478 struct segment *sp; 479 struct buf *bp; 480 int *sptr; 481 { 482 struct lfs *fs; 483 int version; 484 485 /* 486 * If full, finish this segment. We may be doing I/O, so 487 * release and reacquire the splbio(). 488 */ 489 #ifdef DIAGNOSTIC 490 if (sp->vp == NULL) 491 panic ("lfs_gatherblock: Null vp in segment"); 492 #endif 493 fs = sp->fs; 494 if (sp->sum_bytes_left < sizeof(daddr_t) || 495 sp->seg_bytes_left < fs->lfs_bsize) { 496 if (sptr) 497 splx(*sptr); 498 lfs_updatemeta(sp); 499 500 version = sp->fip->fi_version; 501 (void) lfs_writeseg(fs, sp); 502 lfs_initseg(fs, sp); 503 504 sp->fip->fi_version = version; 505 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 506 /* Add the current file to the segment summary. */ 507 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 508 sp->sum_bytes_left -= 509 sizeof(struct finfo) - sizeof(daddr_t); 510 511 if (sptr) 512 *sptr = splbio(); 513 return(1); 514 } 515 516 /* Insert into the buffer list, update the FINFO block. */ 517 bp->b_flags |= B_GATHERED; 518 *sp->cbpp++ = bp; 519 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 520 521 sp->sum_bytes_left -= sizeof(daddr_t); 522 sp->seg_bytes_left -= bp->b_bufsize; 523 return(0); 524 } 525 526 void 527 lfs_gather(fs, sp, vp, match) 528 struct lfs *fs; 529 struct segment *sp; 530 struct vnode *vp; 531 int (*match) __P((struct lfs *, struct buf *)); 532 { 533 struct buf *bp; 534 int s; 535 536 sp->vp = vp; 537 s = splbio(); 538 loop: for (bp = vp->v_dirtyblkhd.le_next; bp; bp = bp->b_vnbufs.qe_next) { 539 if (bp->b_flags & B_BUSY || !match(fs, bp) || 540 bp->b_flags & B_GATHERED) 541 continue; 542 #ifdef DIAGNOSTIC 543 if (!(bp->b_flags & B_DELWRI)) 544 panic("lfs_gather: bp not B_DELWRI"); 545 if (!(bp->b_flags & B_LOCKED)) 546 panic("lfs_gather: bp not B_LOCKED"); 547 #endif 548 if (lfs_gatherblock(sp, bp, &s)) 549 goto loop; 550 } 551 splx(s); 552 lfs_updatemeta(sp); 553 sp->vp = NULL; 554 } 555 556 557 /* 558 * Update the metadata that points to the blocks listed in the FINFO 559 * array. 560 */ 561 void 562 lfs_updatemeta(sp) 563 struct segment *sp; 564 { 565 SEGUSE *sup; 566 struct buf *bp; 567 struct lfs *fs; 568 struct vnode *vp; 569 struct indir a[NIADDR + 2], *ap; 570 struct inode *ip; 571 daddr_t daddr, lbn, off; 572 int db_per_fsb, error, i, nblocks, num; 573 574 vp = sp->vp; 575 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 576 if (vp == NULL || nblocks == 0) 577 return; 578 579 /* Sort the blocks. */ 580 if (!(sp->seg_flags & SEGM_CLEAN)) 581 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 582 583 /* 584 * Assign disk addresses, and update references to the logical 585 * block and the segment usage information. 586 */ 587 fs = sp->fs; 588 db_per_fsb = fsbtodb(fs, 1); 589 for (i = nblocks; i--; ++sp->start_bpp) { 590 lbn = *sp->start_lbp++; 591 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 592 fs->lfs_offset += db_per_fsb; 593 594 if (error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL)) 595 panic("lfs_updatemeta: ufs_bmaparray %d", error); 596 ip = VTOI(vp); 597 switch (num) { 598 case 0: 599 ip->i_db[lbn] = off; 600 break; 601 case 1: 602 ip->i_ib[a[0].in_off] = off; 603 break; 604 default: 605 ap = &a[num - 1]; 606 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 607 panic("lfs_updatemeta: bread bno %d", 608 ap->in_lbn); 609 /* 610 * Bread may create a new indirect block which needs 611 * to get counted for the inode. 612 */ 613 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 614 printf ("Updatemeta allocating indirect block: shouldn't happen\n"); 615 ip->i_blocks += btodb(fs->lfs_bsize); 616 fs->lfs_bfree -= btodb(fs->lfs_bsize); 617 } 618 bp->b_un.b_daddr[ap->in_off] = off; 619 VOP_BWRITE(bp); 620 } 621 622 /* Update segment usage information. */ 623 if (daddr != UNASSIGNED) { 624 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 625 #ifdef DIAGNOSTIC 626 if (sup->su_nbytes < fs->lfs_bsize) { 627 /* XXX -- Change to a panic. */ 628 printf("lfs: negative bytes (segment %d)\n", 629 datosn(fs, daddr)); 630 panic ("Negative Bytes"); 631 } 632 #endif 633 sup->su_nbytes -= fs->lfs_bsize; 634 error = VOP_BWRITE(bp); 635 } 636 } 637 } 638 639 /* 640 * Start a new segment. 641 */ 642 void 643 lfs_initseg(fs, sp) 644 struct lfs *fs; 645 struct segment *sp; 646 { 647 SEGUSE *sup; 648 SEGSUM *ssp; 649 struct buf *bp; 650 daddr_t lbn, *lbnp; 651 652 /* Advance to the next segment. */ 653 if (!LFS_PARTIAL_FITS(fs)) { 654 /* Wake up any cleaning procs waiting on this file system. */ 655 wakeup(&fs->lfs_nextseg); 656 wakeup(&lfs_allclean_wakeup); 657 658 lfs_newseg(fs); 659 fs->lfs_offset = fs->lfs_curseg; 660 sp->seg_number = datosn(fs, fs->lfs_curseg); 661 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 662 663 /* 664 * If the segment contains a superblock, update the offset 665 * and summary address to skip over it. 666 */ 667 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 668 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 669 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 670 sp->seg_bytes_left -= LFS_SBPAD; 671 } 672 brelse(bp); 673 } else { 674 sp->seg_number = datosn(fs, fs->lfs_curseg); 675 sp->seg_bytes_left = (fs->lfs_dbpseg - 676 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 677 } 678 fs->lfs_lastpseg = fs->lfs_offset; 679 680 sp->fs = fs; 681 sp->ibp = NULL; 682 sp->ninodes = 0; 683 684 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 685 sp->cbpp = sp->bpp; 686 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset, 687 LFS_SUMMARY_SIZE); 688 sp->segsum = (*sp->cbpp)->b_un.b_addr; 689 sp->start_bpp = ++sp->cbpp; 690 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 691 692 /* Set point to SEGSUM, initialize it. */ 693 ssp = sp->segsum; 694 ssp->ss_next = fs->lfs_nextseg; 695 ssp->ss_nfinfo = ssp->ss_ninos = 0; 696 697 /* Set pointer to first FINFO, initialize it. */ 698 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 699 sp->fip->fi_nblocks = 0; 700 sp->start_lbp = &sp->fip->fi_blocks[0]; 701 702 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 703 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 704 } 705 706 /* 707 * Return the next segment to write. 708 */ 709 void 710 lfs_newseg(fs) 711 struct lfs *fs; 712 { 713 CLEANERINFO *cip; 714 SEGUSE *sup; 715 struct buf *bp; 716 int curseg, error, isdirty, sn; 717 718 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 719 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 720 sup->su_nbytes = 0; 721 sup->su_nsums = 0; 722 sup->su_ninos = 0; 723 (void) VOP_BWRITE(bp); 724 725 LFS_CLEANERINFO(cip, fs, bp); 726 --cip->clean; 727 ++cip->dirty; 728 (void) VOP_BWRITE(bp); 729 730 fs->lfs_lastseg = fs->lfs_curseg; 731 fs->lfs_curseg = fs->lfs_nextseg; 732 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 733 sn = (sn + 1) % fs->lfs_nseg; 734 if (sn == curseg) 735 panic("lfs_nextseg: no clean segments"); 736 LFS_SEGENTRY(sup, fs, sn, bp); 737 isdirty = sup->su_flags & SEGUSE_DIRTY; 738 brelse(bp); 739 if (!isdirty) 740 break; 741 } 742 743 ++fs->lfs_nactive; 744 fs->lfs_nextseg = sntoda(fs, sn); 745 } 746 747 int 748 lfs_writeseg(fs, sp) 749 struct lfs *fs; 750 struct segment *sp; 751 { 752 extern int locked_queue_count; 753 struct buf **bpp, *bp, *cbp; 754 SEGUSE *sup; 755 SEGSUM *ssp; 756 dev_t i_dev; 757 size_t size; 758 u_long *datap, *dp; 759 int ch_per_blk, do_again, error, i, nblocks, num, s; 760 int (*strategy)__P((struct vop_strategy_args *)); 761 struct vop_strategy_args vop_strategy_a; 762 u_short ninos; 763 char *p; 764 765 /* 766 * If there are no buffers other than the segment summary to write 767 * and it is not a checkpoint, don't do anything. On a checkpoint, 768 * even if there aren't any buffers, you need to write the superblock. 769 */ 770 if ((nblocks = sp->cbpp - sp->bpp) == 1) { 771 brelvp(*sp->cbpp); 772 free(*sp->cbpp, M_SEGMENT); 773 return (0); 774 } 775 776 ssp = (SEGSUM *)sp->segsum; 777 778 /* Update the segment usage information. */ 779 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 780 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 781 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift; 782 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 783 sup->su_nbytes += LFS_SUMMARY_SIZE; 784 sup->su_lastmod = time.tv_sec; 785 sup->su_ninos += ninos; 786 ++sup->su_nsums; 787 do_again = !(bp->b_flags & B_GATHERED); 788 (void)VOP_BWRITE(bp); 789 /* 790 * Compute checksum across data and then across summary; the first 791 * block (the summary block) is skipped. Set the create time here 792 * so that it's guaranteed to be later than the inode mod times. 793 * 794 * XXX 795 * Fix this to do it inline, instead of malloc/copy. 796 */ 797 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 798 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 799 if ((*++bpp)->b_flags & B_INVAL) { 800 if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long))) 801 panic("lfs_writeseg: copyin failed"); 802 } else 803 *dp++ = (*bpp)->b_un.b_words[0]; 804 } 805 ssp->ss_create = time.tv_sec; 806 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 807 ssp->ss_sumsum = 808 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 809 free(datap, M_SEGMENT); 810 #ifdef DIAGNOSTIC 811 if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE) 812 panic("lfs_writeseg: No diskspace for summary"); 813 #endif 814 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 815 816 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 817 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 818 819 /* 820 * When we simply write the blocks we lose a rotation for every block 821 * written. To avoid this problem, we allocate memory in chunks, copy 822 * the buffers into the chunk and write the chunk. 56K was chosen as 823 * some driver/controllers can't handle unsigned 16 bit transfers. 824 * When the data is copied to the chunk, turn off the the B_LOCKED bit 825 * and brelse the buffer (which will move them to the LRU list). Add 826 * the B_CALL flag to the buffer header so we can count I/O's for the 827 * checkpoints and so we can release the allocated memory. 828 * 829 * XXX 830 * This should be removed if the new virtual memory system allows us to 831 * easily make the buffers contiguous in kernel memory and if that's 832 * fast enough. 833 */ 834 #define LFS_CHUNKSIZE (56 * 1024) 835 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 836 for (bpp = sp->bpp, i = nblocks; i;) { 837 num = ch_per_blk; 838 if (num > i) 839 num = i; 840 i -= num; 841 size = num * fs->lfs_bsize; 842 843 cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 844 (*bpp)->b_blkno, size); 845 cbp->b_dev = i_dev; 846 cbp->b_flags |= B_ASYNC | B_BUSY; 847 848 s = splbio(); 849 ++fs->lfs_iocount; 850 for (p = cbp->b_un.b_addr; num--;) { 851 bp = *bpp++; 852 /* 853 * Fake buffers from the cleaner are marked as B_INVAL. 854 * We need to copy the data from user space rather than 855 * from the buffer indicated. 856 * XXX == what do I do on an error? 857 */ 858 if (bp->b_flags & B_INVAL) { 859 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 860 panic("lfs_writeseg: copyin failed"); 861 } else 862 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 863 p += bp->b_bcount; 864 if (bp->b_flags & B_LOCKED) 865 --locked_queue_count; 866 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 867 B_LOCKED | B_GATHERED); 868 if (bp->b_flags & B_CALL) { 869 /* if B_CALL, it was created with newbuf */ 870 brelvp(bp); 871 free(bp, M_SEGMENT); 872 } else { 873 bremfree(bp); 874 bp->b_flags |= B_DONE; 875 reassignbuf(bp, bp->b_vp); 876 brelse(bp); 877 } 878 } 879 ++cbp->b_vp->v_numoutput; 880 splx(s); 881 cbp->b_bcount = p - cbp->b_un.b_addr; 882 /* 883 * XXXX This is a gross and disgusting hack. Since these 884 * buffers are physically addressed, they hang off the 885 * device vnode (devvp). As a result, they have no way 886 * of getting to the LFS superblock or lfs structure to 887 * keep track of the number of I/O's pending. So, I am 888 * going to stuff the fs into the saveaddr field of 889 * the buffer (yuk). 890 */ 891 cbp->b_saveaddr = (caddr_t)fs; 892 vop_strategy_a.a_desc = VDESC(vop_strategy); 893 vop_strategy_a.a_bp = cbp; 894 (strategy)(&vop_strategy_a); 895 } 896 return (do_again); 897 } 898 899 void 900 lfs_writesuper(fs, sp) 901 struct lfs *fs; 902 struct segment *sp; 903 { 904 struct buf *bp; 905 dev_t i_dev; 906 int (*strategy) __P((struct vop_strategy_args *)); 907 int s; 908 struct vop_strategy_args vop_strategy_a; 909 910 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 911 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 912 913 /* Checksum the superblock and copy it into a buffer. */ 914 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 915 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0], 916 LFS_SBPAD); 917 *bp->b_un.b_lfs = *fs; 918 919 /* Write the first superblock (wait). */ 920 bp->b_dev = i_dev; 921 bp->b_flags |= B_BUSY; 922 bp->b_flags &= ~(B_DONE | B_CALL | B_ERROR | B_READ | B_DELWRI); 923 vop_strategy_a.a_desc = VDESC(vop_strategy); 924 vop_strategy_a.a_bp = bp; 925 s = splbio(); 926 bp->b_vp->v_numoutput += 2; 927 splx(s); 928 (strategy)(&vop_strategy_a); 929 biowait(bp); 930 931 /* Write the second superblock (don't wait). */ 932 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 933 bp->b_flags |= B_CALL | B_ASYNC | B_BUSY; 934 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 935 bp->b_iodone = lfs_supercallback; 936 (strategy)(&vop_strategy_a); 937 } 938 939 /* 940 * Logical block number match routines used when traversing the dirty block 941 * chain. 942 */ 943 int 944 lfs_match_data(fs, bp) 945 struct lfs *fs; 946 struct buf *bp; 947 { 948 return (bp->b_lblkno >= 0); 949 } 950 951 int 952 lfs_match_indir(fs, bp) 953 struct lfs *fs; 954 struct buf *bp; 955 { 956 int lbn; 957 958 lbn = bp->b_lblkno; 959 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 960 } 961 962 int 963 lfs_match_dindir(fs, bp) 964 struct lfs *fs; 965 struct buf *bp; 966 { 967 int lbn; 968 969 lbn = bp->b_lblkno; 970 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 971 } 972 973 int 974 lfs_match_tindir(fs, bp) 975 struct lfs *fs; 976 struct buf *bp; 977 { 978 int lbn; 979 980 lbn = bp->b_lblkno; 981 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 982 } 983 984 /* 985 * Allocate a new buffer header. 986 */ 987 struct buf * 988 lfs_newbuf(vp, daddr, size) 989 struct vnode *vp; 990 daddr_t daddr; 991 size_t size; 992 { 993 struct buf *bp; 994 size_t nbytes; 995 996 nbytes = roundup(size, DEV_BSIZE); 997 bp = malloc(sizeof(struct buf) + nbytes, M_SEGMENT, M_WAITOK); 998 bzero(bp, sizeof(struct buf) + nbytes); 999 bgetvp(vp, bp); 1000 bp->b_un.b_addr = (caddr_t)(bp + 1); 1001 bp->b_bufsize = size; 1002 bp->b_bcount = size; 1003 bp->b_lblkno = daddr; 1004 bp->b_blkno = daddr; 1005 bp->b_error = 0; 1006 bp->b_resid = 0; 1007 bp->b_iodone = lfs_callback; 1008 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 1009 return (bp); 1010 } 1011 1012 void 1013 lfs_callback(bp) 1014 struct buf *bp; 1015 { 1016 struct lfs *fs; 1017 1018 fs = (struct lfs *)bp->b_saveaddr; 1019 #ifdef DIAGNOSTIC 1020 if (fs->lfs_iocount == 0) 1021 panic("lfs_callback: zero iocount\n"); 1022 #endif 1023 if (--fs->lfs_iocount == 0) 1024 wakeup(&fs->lfs_iocount); 1025 1026 brelvp(bp); 1027 free(bp, M_SEGMENT); 1028 } 1029 1030 void 1031 lfs_supercallback(bp) 1032 struct buf *bp; 1033 { 1034 brelvp(bp); 1035 free(bp, M_SEGMENT); 1036 } 1037 1038 /* 1039 * Shellsort (diminishing increment sort) from Data Structures and 1040 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1041 * see also Knuth Vol. 3, page 84. The increments are selected from 1042 * formula (8), page 95. Roughly O(N^3/2). 1043 */ 1044 /* 1045 * This is our own private copy of shellsort because we want to sort 1046 * two parallel arrays (the array of buffer pointers and the array of 1047 * logical block numbers) simultaneously. Note that we cast the array 1048 * of logical block numbers to a unsigned in this routine so that the 1049 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1050 */ 1051 void 1052 lfs_shellsort(bp_array, lb_array, nmemb) 1053 struct buf **bp_array; 1054 daddr_t *lb_array; 1055 register int nmemb; 1056 { 1057 static int __rsshell_increments[] = { 4, 1, 0 }; 1058 register int incr, *incrp, t1, t2; 1059 struct buf *bp_temp; 1060 u_long lb_temp; 1061 1062 for (incrp = __rsshell_increments; incr = *incrp++;) 1063 for (t1 = incr; t1 < nmemb; ++t1) 1064 for (t2 = t1 - incr; t2 >= 0;) 1065 if (lb_array[t2] > lb_array[t2 + incr]) { 1066 lb_temp = lb_array[t2]; 1067 lb_array[t2] = lb_array[t2 + incr]; 1068 lb_array[t2 + incr] = lb_temp; 1069 bp_temp = bp_array[t2]; 1070 bp_array[t2] = bp_array[t2 + incr]; 1071 bp_array[t2 + incr] = bp_temp; 1072 t2 -= incr; 1073 } else 1074 break; 1075 } 1076 1077