1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.19 (Berkeley) 05/15/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/specdev.h> 22 #include <sys/fifo.h> 23 #include <sys/malloc.h> 24 #include <sys/mount.h> 25 26 #include <ufs/ufs/quota.h> 27 #include <ufs/ufs/inode.h> 28 #include <ufs/ufs/dir.h> 29 #include <ufs/ufs/ufsmount.h> 30 31 #include <ufs/lfs/lfs.h> 32 #include <ufs/lfs/lfs_extern.h> 33 34 /* In-memory description of a segment about to be written. */ 35 struct segment { 36 struct buf **bpp; /* pointer to buffer array */ 37 struct buf **cbpp; /* pointer to next available bp */ 38 struct buf *ibp; /* buffer pointer to inode page */ 39 struct finfo *fip; /* current fileinfo pointer */ 40 void *segsum; /* segment summary info */ 41 u_long ninodes; /* number of inodes in this segment */ 42 u_long seg_bytes_left; /* bytes left in segment */ 43 u_long sum_bytes_left; /* bytes left in summary block */ 44 u_long seg_number; /* number of this segment */ 45 #define SEGM_CKP 0x01 /* doing a checkpoint */ 46 u_long seg_flags; /* run-time flags for this segment */ 47 }; 48 49 /* 50 * Determine if it's OK to start a partial in this segment, or if we need 51 * to go on to a new segment. 52 */ 53 #define LFS_PARTIAL_FITS(fs) \ 54 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 55 1 << (fs)->lfs_fsbtodb) 56 57 void lfs_callback __P((struct buf *)); 58 void lfs_gather __P((struct lfs *, struct segment *, 59 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 60 void lfs_initseg __P((struct lfs *, struct segment *)); 61 void lfs_iset __P((struct inode *, daddr_t, time_t)); 62 int lfs_match_data __P((struct lfs *, struct buf *)); 63 int lfs_match_dindir __P((struct lfs *, struct buf *)); 64 int lfs_match_indir __P((struct lfs *, struct buf *)); 65 int lfs_match_tindir __P((struct lfs *, struct buf *)); 66 struct buf * 67 lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 68 void lfs_newseg __P((struct lfs *)); 69 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 70 void lfs_updatemeta __P((struct lfs *, 71 struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 72 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 73 void lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 74 void lfs_writeseg __P((struct lfs *, struct segment *)); 75 void lfs_writesuper __P((struct lfs *, struct segment *)); 76 77 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 78 79 /* 80 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 81 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 82 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 83 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 84 */ 85 86 int 87 lfs_vflush(vp) 88 struct vnode *vp; 89 { 90 struct inode *ip; 91 struct lfs *fs; 92 struct mount *mp; 93 struct segment *sp; 94 int error, s; 95 96 #ifdef VERBOSE 97 printf("lfs_vflush\n"); 98 #endif 99 mp = vp->v_mount; 100 fs = VFSTOUFS(mp)->um_lfs; 101 102 /* 103 * XXX 104 * check flags? 105 * mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY) || 106 */ 107 if (vfs_busy(mp)) 108 return (0); 109 110 /* 111 * Allocate a segment structure and enough space to hold pointers to 112 * the maximum possible number of buffers which can be described in a 113 * single summary block. 114 */ 115 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 116 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 117 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 118 sp->seg_flags = SEGM_CKP; 119 lfs_initseg(fs, sp); 120 121 /* 122 * Keep a cumulative count of the outstanding I/O operations. If the 123 * disk drive catches up with us it could go to zero before we finish, 124 * so we artificially increment it by one until we've scheduled all of 125 * the writes we intend to do. 126 */ 127 s = splbio(); 128 ++fs->lfs_iocount; 129 splx(s); 130 131 if (vp->v_dirtyblkhd != NULL) 132 lfs_writefile(fs, sp, vp); 133 ip = VTOI(vp); 134 lfs_writeinode(fs, sp, ip); 135 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 136 137 lfs_writeseg(fs, sp); 138 139 /* 140 * If the I/O count is non-zero, sleep until it reaches zero. At the 141 * moment, the user's process hangs around so we can sleep. 142 */ 143 s = splbio(); 144 if (--fs->lfs_iocount && (error = 145 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 146 free(sp->bpp, M_SEGMENT); 147 free(sp, M_SEGMENT); 148 return (error); 149 } 150 splx(s); 151 vfs_unbusy(mp); 152 153 /* 154 * XXX 155 * Should be writing a checkpoint? 156 */ 157 free(sp->bpp, M_SEGMENT); 158 free(sp, M_SEGMENT); 159 160 return (0); 161 } 162 163 int 164 lfs_segwrite(mp, do_ckp) 165 struct mount *mp; 166 int do_ckp; /* Do a checkpoint. */ 167 { 168 USES_VOP_ISLOCKED; 169 struct inode *ip; 170 struct lfs *fs; 171 struct segment *sp; 172 struct vnode *vp; 173 int error, islocked, s; 174 175 #ifdef VERBOSE 176 printf("lfs_segwrite\n"); 177 #endif 178 fs = VFSTOUFS(mp)->um_lfs; 179 180 /* 181 * Allocate a segment structure and enough space to hold pointers to 182 * the maximum possible number of buffers which can be described in a 183 * single summary block. 184 */ 185 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 186 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 187 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 188 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 189 lfs_initseg(fs, sp); 190 191 /* 192 * Keep a cumulative count of the outstanding I/O operations. If the 193 * disk drive catches up with us it could go to zero before we finish, 194 * so we artificially increment it by one until we've scheduled all of 195 * the writes we intend to do. If not a checkpoint, we never do the 196 * final decrement, avoiding the wakeup in the callback routine. 197 */ 198 s = splbio(); 199 ++fs->lfs_iocount; 200 splx(s); 201 202 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 203 /* 204 * If the vnode that we are about to sync is no longer 205 * associated with this mount point, start over. 206 */ 207 if (vp->v_mount != mp) 208 goto loop; 209 210 islocked = VOP_ISLOCKED(vp); 211 212 /* 213 * XXX 214 * This is wrong, I think -- we should just wait until we 215 * get the vnode and go on. Probably going to reschedule 216 * all of the writes we already scheduled... 217 */ 218 if (islocked) 219 VREF(vp); 220 else if (vget(vp)) 221 { 222 printf("lfs_segment: failed to get vnode (tell Keith)!\n"); 223 goto loop; 224 } 225 /* 226 * Write the inode/file if dirty and it's not the 227 * the IFILE. 228 */ 229 ip = VTOI(vp); 230 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 231 vp->v_dirtyblkhd != NULL) && 232 ip->i_number != LFS_IFILE_INUM) { 233 if (vp->v_dirtyblkhd != NULL) 234 lfs_writefile(fs, sp, vp); 235 lfs_writeinode(fs, sp, ip); 236 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 237 } 238 if (islocked) 239 vrele(vp); 240 else 241 vput(vp); 242 } 243 if (do_ckp) { 244 vp = fs->lfs_ivnode; 245 while (vget(vp)); 246 ip = VTOI(vp); 247 if (vp->v_dirtyblkhd != NULL) 248 lfs_writefile(fs, sp, vp); 249 lfs_writeinode(fs, sp, ip); 250 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 251 vput(vp); 252 } 253 lfs_writeseg(fs, sp); 254 255 /* 256 * If the I/O count is non-zero, sleep until it reaches zero. At the 257 * moment, the user's process hangs around so we can sleep. 258 */ 259 s = splbio(); 260 --fs->lfs_iocount; 261 if (do_ckp) { 262 if (fs->lfs_iocount && (error = 263 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 264 free(sp->bpp, M_SEGMENT); 265 free(sp, M_SEGMENT); 266 return (error); 267 } 268 splx(s); 269 lfs_writesuper(fs, sp); 270 } else 271 splx(s); 272 273 free(sp->bpp, M_SEGMENT); 274 free(sp, M_SEGMENT); 275 276 return (0); 277 } 278 279 /* 280 * Write the dirty blocks associated with a vnode. 281 */ 282 void 283 lfs_writefile(fs, sp, vp) 284 struct lfs *fs; 285 struct segment *sp; 286 struct vnode *vp; 287 { 288 struct buf *bp; 289 struct finfo *fip; 290 IFILE *ifp; 291 292 #ifdef VERBOSE 293 printf("lfs_writefile\n"); 294 #endif 295 if (sp->seg_bytes_left < fs->lfs_bsize || 296 sp->sum_bytes_left < sizeof(struct finfo)) { 297 lfs_writeseg(fs, sp); 298 lfs_initseg(fs, sp); 299 } 300 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 301 302 fip = sp->fip; 303 fip->fi_nblocks = 0; 304 fip->fi_ino = VTOI(vp)->i_number; 305 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 306 fip->fi_version = ifp->if_version; 307 brelse(bp); 308 309 /* 310 * It may not be necessary to write the meta-data blocks at this point, 311 * as the roll-forward recovery code should be able to reconstruct the 312 * list. 313 */ 314 lfs_gather(fs, sp, vp, lfs_match_data); 315 lfs_gather(fs, sp, vp, lfs_match_indir); 316 lfs_gather(fs, sp, vp, lfs_match_dindir); 317 #ifdef TRIPLE 318 lfs_gather(fs, sp, vp, lfs_match_tindir); 319 #endif 320 321 fip = sp->fip; 322 #ifdef META 323 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 324 #endif 325 if (fip->fi_nblocks != 0) { 326 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 327 sp->fip = 328 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 329 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 330 } else 331 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 332 } 333 334 void 335 lfs_writeinode(fs, sp, ip) 336 struct lfs *fs; 337 struct segment *sp; 338 struct inode *ip; 339 { 340 struct buf *bp, *ibp; 341 IFILE *ifp; 342 SEGUSE *sup; 343 daddr_t daddr; 344 ino_t ino; 345 int ndx; 346 347 #ifdef VERBOSE 348 printf("lfs_writeinode\n"); 349 #endif 350 /* Allocate a new inode block if necessary. */ 351 if (sp->ibp == NULL) { 352 /* Allocate a new segment if necessary. */ 353 if (sp->seg_bytes_left < fs->lfs_bsize || 354 sp->sum_bytes_left < sizeof(daddr_t)) { 355 lfs_writeseg(fs, sp); 356 lfs_initseg(fs, sp); 357 } 358 359 /* Get next inode block. */ 360 daddr = fs->lfs_offset; 361 fs->lfs_offset += fsbtodb(fs, 1); 362 sp->ibp = *sp->cbpp++ = 363 lfs_newbuf(fs, daddr, fs->lfs_bsize); 364 365 /* Set remaining space counters. */ 366 sp->seg_bytes_left -= fs->lfs_bsize; 367 sp->sum_bytes_left -= sizeof(daddr_t); 368 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 369 sp->ninodes / INOPB(fs) - 1; 370 ((daddr_t *)(sp->segsum))[ndx] = daddr; 371 } 372 373 /* Update the inode times and copy the inode onto the inode page. */ 374 ITIMES(ip, &time, &time); 375 bp = sp->ibp; 376 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 377 378 /* Increment inode count in segment summary block. */ 379 ++((SEGSUM *)(sp->segsum))->ss_ninos; 380 381 /* If this page is full, set flag to allocate a new page. */ 382 if (++sp->ninodes % INOPB(fs) == 0) 383 sp->ibp = NULL; 384 385 /* 386 * If updating the ifile, update the super-block. Update the disk 387 * address and access times for this inode in the ifile. 388 */ 389 ino = ip->i_number; 390 if (ino == LFS_IFILE_INUM) 391 fs->lfs_idaddr = bp->b_blkno; 392 393 LFS_IENTRY(ifp, fs, ino, ibp); 394 daddr = ifp->if_daddr; 395 ifp->if_daddr = bp->b_blkno; 396 LFS_UBWRITE(ibp); 397 398 if (daddr != LFS_UNUSED_DADDR) { 399 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 400 #ifdef DIAGNOSTIC 401 if (sup->su_nbytes < sizeof(struct dinode)) 402 /* XXX -- Change to a panic. */ 403 printf("lfs: negative bytes (segment %d)\n", 404 datosn(fs, daddr)); 405 #endif 406 sup->su_nbytes -= sizeof(struct dinode); 407 LFS_UBWRITE(bp); 408 } 409 } 410 411 void 412 lfs_gather(fs, sp, vp, match) 413 struct lfs *fs; 414 struct segment *sp; 415 struct vnode *vp; 416 int (*match) __P((struct lfs *, struct buf *)); 417 { 418 struct buf **bpp, *bp, *nbp; 419 struct finfo *fip; 420 struct inode *ip; 421 daddr_t *lbp, *start_lbp; 422 u_long version; 423 int s; 424 425 #ifdef VERBOSE 426 printf("lfs_gather\n"); 427 #endif 428 ip = VTOI(vp); 429 bpp = sp->cbpp; 430 fip = sp->fip; 431 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 432 433 loop: s = splbio(); 434 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 435 nbp = bp->b_blockf; 436 /* 437 * XXX 438 * Should sleep on any BUSY buffer if doing an fsync? 439 */ 440 if (bp->b_flags & B_BUSY || !match(fs, bp)) 441 continue; 442 #ifdef DIAGNOSTIC 443 if (!(bp->b_flags & B_DELWRI)) 444 panic("lfs_gather: bp not B_DELWRI"); 445 if (!(bp->b_flags & B_LOCKED)) 446 panic("lfs_gather: bp not B_LOCKED"); 447 #endif 448 /* 449 * If full, finish this segment. We may be doing I/O, so 450 * release and reacquire the splbio(). 451 */ 452 if (sp->sum_bytes_left < sizeof(daddr_t) || 453 sp->seg_bytes_left < fs->lfs_bsize) { 454 splx(s); 455 lfs_updatemeta(fs, 456 sp, vp, start_lbp, bpp, lbp - start_lbp); 457 458 /* Add the current file to the segment summary. */ 459 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 460 461 version = fip->fi_version; 462 lfs_writeseg(fs, sp); 463 lfs_initseg(fs, sp); 464 465 fip = sp->fip; 466 fip->fi_version = version; 467 fip->fi_ino = ip->i_number; 468 start_lbp = lbp = fip->fi_blocks; 469 470 sp->sum_bytes_left -= 471 sizeof(struct finfo) - sizeof(daddr_t); 472 473 bpp = sp->cbpp; 474 goto loop; 475 } 476 477 /* Insert into the buffer list, update the FINFO block. */ 478 *sp->cbpp++ = bp; 479 ++fip->fi_nblocks; 480 *lbp++ = bp->b_lblkno; 481 482 sp->sum_bytes_left -= sizeof(daddr_t); 483 sp->seg_bytes_left -= bp->b_bufsize; 484 } 485 splx(s); 486 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 487 } 488 489 /* 490 * Update the metadata that points to the blocks listed in the FINFO 491 * array. 492 */ 493 void 494 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 495 struct lfs *fs; 496 struct segment *sp; 497 struct vnode *vp; 498 daddr_t *lbp; 499 struct buf **bpp; 500 int nblocks; 501 { 502 USES_VOP_BWRITE; 503 SEGUSE *sup; 504 struct buf *bp; 505 INDIR a[NIADDR], *ap; 506 struct inode *ip; 507 daddr_t daddr, lbn, off; 508 int db_per_fsb, error, i, num; 509 510 #ifdef VERBOSE 511 printf("lfs_updatemeta\n"); 512 #endif 513 if (nblocks == 0) 514 return; 515 516 /* Sort the blocks. */ 517 lfs_shellsort(bpp, lbp, nblocks); 518 519 /* 520 * Assign disk addresses, and update references to the logical 521 * block and the segment usage information. 522 */ 523 db_per_fsb = fsbtodb(fs, 1); 524 for (i = nblocks; i--; ++bpp) { 525 lbn = *lbp++; 526 (*bpp)->b_blkno = off = fs->lfs_offset; 527 fs->lfs_offset += db_per_fsb; 528 529 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 530 panic("lfs_updatemeta: lfs_bmaparray %d", error); 531 ip = VTOI(vp); 532 switch (num) { 533 case 0: 534 ip->i_db[lbn] = off; 535 break; 536 case 1: 537 ip->i_ib[a[0].in_off] = off; 538 break; 539 default: 540 ap = &a[num - 1]; 541 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 542 panic("lfs_updatemeta: bread bno %d", 543 ap->in_lbn); 544 bp->b_un.b_daddr[ap->in_off] = off; 545 VOP_BWRITE(bp); 546 } 547 548 /* Update segment usage information. */ 549 if (daddr != UNASSIGNED) { 550 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 551 #ifdef DIAGNOSTIC 552 if (sup->su_nbytes < fs->lfs_bsize) 553 /* XXX -- Change to a panic. */ 554 printf("lfs: negative bytes (segment %d)\n", 555 datosn(fs, daddr)); 556 #endif 557 sup->su_nbytes -= fs->lfs_bsize; 558 LFS_UBWRITE(bp); 559 } 560 } 561 } 562 563 /* 564 * Start a new segment. 565 */ 566 void 567 lfs_initseg(fs, sp) 568 struct lfs *fs; 569 struct segment *sp; 570 { 571 SEGUSE *sup; 572 SEGSUM *ssp; 573 struct buf *bp; 574 daddr_t lbn, *lbnp; 575 576 #ifdef VERBOSE 577 printf("lfs_initseg\n"); 578 #endif 579 /* Advance to the next segment. */ 580 if (!LFS_PARTIAL_FITS(fs)) { 581 /* Wake up any cleaning procs waiting on this file system. */ 582 wakeup(&fs->lfs_nextseg); 583 wakeup(&lfs_allclean_wakeup); 584 585 lfs_newseg(fs); 586 fs->lfs_offset = fs->lfs_curseg; 587 sp->seg_number = datosn(fs, fs->lfs_curseg); 588 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 589 590 /* 591 * If the segment contains a superblock, update the offset 592 * and summary address to skip over it. 593 */ 594 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 595 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 596 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 597 sp->seg_bytes_left -= LFS_SBPAD; 598 } 599 brelse(bp); 600 } else { 601 sp->seg_number = datosn(fs, fs->lfs_curseg); 602 sp->seg_bytes_left = (fs->lfs_dbpseg - 603 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 604 } 605 606 sp->ibp = NULL; 607 sp->ninodes = 0; 608 609 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 610 sp->cbpp = sp->bpp; 611 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 612 sp->segsum = (*sp->cbpp)->b_un.b_addr; 613 ++sp->cbpp; 614 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 615 616 /* Set point to SEGSUM, initialize it. */ 617 ssp = sp->segsum; 618 ssp->ss_next = fs->lfs_nextseg; 619 ssp->ss_nfinfo = ssp->ss_ninos = 0; 620 621 /* Set pointer to first FINFO, initialize it. */ 622 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 623 sp->fip->fi_nblocks = 0; 624 625 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 626 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 627 } 628 629 /* 630 * Return the next segment to write. 631 */ 632 void 633 lfs_newseg(fs) 634 struct lfs *fs; 635 { 636 CLEANERINFO *cip; 637 SEGUSE *sup; 638 struct buf *bp; 639 int curseg, isdirty, sn; 640 641 #ifdef VERBOSE 642 printf("lfs_newseg\n"); 643 #endif 644 /* 645 * Turn off the active bit for the current segment, turn on the 646 * active and dirty bits for the next segment, update the cleaner 647 * info. Set the current segment to the next segment, get a new 648 * next segment. 649 */ 650 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 651 sup->su_flags &= ~SEGUSE_ACTIVE; 652 LFS_UBWRITE(bp); 653 654 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 655 sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY; 656 LFS_UBWRITE(bp); 657 658 LFS_CLEANERINFO(cip, fs, bp); 659 --cip->clean; 660 ++cip->dirty; 661 LFS_UBWRITE(bp); 662 663 fs->lfs_lastseg = fs->lfs_curseg; 664 fs->lfs_curseg = fs->lfs_nextseg; 665 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 666 sn = (sn + 1) % fs->lfs_nseg; 667 if (sn == curseg) 668 panic("lfs_nextseg: no clean segments"); 669 LFS_SEGENTRY(sup, fs, sn, bp); 670 isdirty = sup->su_flags & SEGUSE_DIRTY; 671 brelse(bp); 672 if (!isdirty) 673 break; 674 } 675 fs->lfs_nextseg = sntoda(fs, sn); 676 } 677 678 void 679 lfs_writeseg(fs, sp) 680 struct lfs *fs; 681 struct segment *sp; 682 { 683 USES_VOP_STRATEGY; 684 struct buf **bpp, *bp, *cbp; 685 SEGUSE *sup; 686 SEGSUM *ssp; 687 dev_t i_dev; 688 u_long *datap, *dp; 689 size_t size; 690 int ch_per_blk, i, nblocks, num, s, (*strategy)__P((struct vop_strategy_args *)); 691 char *p; 692 693 #ifdef VERBOSE 694 printf("lfs_writeseg\n"); 695 #endif 696 if ((nblocks = sp->cbpp - sp->bpp) == 0) 697 return; 698 699 /* 700 * Compute checksum across data and then across summary; the first 701 * block (the summary block) is skipped. Set the create time here 702 * so that it's guaranteed to be later than the inode mod times. 703 * 704 * XXX 705 * Fix this to do it inline, instead of malloc/copy. 706 */ 707 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 708 for (bpp = sp->bpp, i = nblocks - 1; i--;) 709 *dp++ = (*++bpp)->b_un.b_words[0]; 710 ssp = (SEGSUM *)sp->segsum; 711 ssp->ss_create = time.tv_sec; 712 ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 713 ssp->ss_sumsum = 714 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 715 free(datap, M_SEGMENT); 716 717 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 718 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 719 720 /* 721 * When we simply write the blocks we lose a rotation for every block 722 * written. To avoid this problem, we allocate memory in chunks, copy 723 * the buffers into the chunk and write the chunk. 56K was chosen as 724 * some driver/controllers can't handle unsigned 16 bit transfers. 725 * When the data is copied to the chunk, turn off the the B_LOCKED bit 726 * and brelse the buffer (which will move them to the LRU list). Add 727 * the B_CALL flag to the buffer header so we can count I/O's for the 728 * checkpoints and so we can release the allocated memory. 729 * 730 * XXX 731 * This should be removed if the new virtual memory system allows us to 732 * easily make the buffers contiguous in kernel memory and if that's 733 * fast enough. 734 */ 735 #define LFS_CHUNKSIZE (56 * 1024) 736 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 737 for (bpp = sp->bpp, i = nblocks; i;) { 738 num = ch_per_blk; 739 if (num > i) 740 num = i; 741 i -= num; 742 size = num * fs->lfs_bsize; 743 744 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 745 cbp->b_dev = i_dev; 746 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 747 cbp->b_iodone = lfs_callback; 748 cbp->b_saveaddr = cbp->b_un.b_addr; 749 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 750 751 s = splbio(); 752 ++fs->lfs_iocount; 753 for (p = cbp->b_un.b_addr; num--;) { 754 bp = *bpp++; 755 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 756 p += bp->b_bcount; 757 bp->b_flags &= 758 ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); 759 if (!(bp->b_flags & B_NOCACHE)) { 760 bremfree(bp); 761 reassignbuf(bp, bp->b_vp); 762 } 763 brelse(bp); 764 } 765 splx(s); 766 cbp->b_bcount = p - cbp->b_un.b_addr; 767 vop_strategy_a.a_desc = VDESC(vop_strategy); 768 vop_strategy_a.a_bp = cbp; 769 (strategy)(&vop_strategy_a); 770 } 771 772 /* Update the segment usage information. */ 773 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 774 sup->su_nbytes += nblocks - 1 - 775 (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs) << fs->lfs_bshift; 776 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 777 sup->su_lastmod = time.tv_sec; 778 LFS_UBWRITE(bp); 779 } 780 781 void 782 lfs_writesuper(fs, sp) 783 struct lfs *fs; 784 struct segment *sp; 785 { 786 USES_VOP_STRATEGY; 787 struct buf *bp; 788 dev_t i_dev; 789 int (*strategy) __P((struct vop_strategy_args *)); 790 791 #ifdef VERBOSE 792 printf("lfs_writesuper\n"); 793 #endif 794 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 795 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 796 797 /* Checksum the superblock and copy it into a buffer. */ 798 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 799 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 800 *bp->b_un.b_lfs = *fs; 801 802 /* Write the first superblock (wait). */ 803 bp->b_dev = i_dev; 804 bp->b_flags |= B_BUSY; 805 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 806 vop_strategy_a.a_desc = VDESC(vop_strategy); 807 vop_strategy_a.a_bp = bp; 808 (strategy)(&vop_strategy_a); 809 biowait(bp); 810 811 /* Write the second superblock (don't wait). */ 812 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 813 bp->b_flags |= B_ASYNC | B_BUSY; 814 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 815 (strategy)(&vop_strategy_a); 816 } 817 818 /* 819 * Logical block number match routines used when traversing the dirty block 820 * chain. 821 */ 822 int 823 lfs_match_data(fs, bp) 824 struct lfs *fs; 825 struct buf *bp; 826 { 827 return (bp->b_lblkno >= 0); 828 } 829 830 int 831 lfs_match_indir(fs, bp) 832 struct lfs *fs; 833 struct buf *bp; 834 { 835 int lbn; 836 837 lbn = bp->b_lblkno; 838 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 839 } 840 841 int 842 lfs_match_dindir(fs, bp) 843 struct lfs *fs; 844 struct buf *bp; 845 { 846 int lbn; 847 848 lbn = bp->b_lblkno; 849 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 850 } 851 852 int 853 lfs_match_tindir(fs, bp) 854 struct lfs *fs; 855 struct buf *bp; 856 { 857 int lbn; 858 859 lbn = bp->b_lblkno; 860 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 861 } 862 863 /* 864 * Allocate a new buffer header. 865 */ 866 struct buf * 867 lfs_newbuf(fs, daddr, size) 868 struct lfs *fs; 869 daddr_t daddr; 870 size_t size; 871 { 872 struct buf *bp; 873 874 #ifdef VERBOSE 875 printf("lfs_newbuf\n"); 876 #endif 877 bp = getnewbuf(); 878 bremhash(bp); 879 bgetvp(fs->lfs_ivnode, bp); 880 bp->b_bcount = 0; 881 bp->b_lblkno = daddr; 882 bp->b_blkno = daddr; 883 bp->b_error = 0; 884 bp->b_resid = 0; 885 if (size) 886 allocbuf(bp, size); 887 bp->b_flags |= B_NOCACHE; 888 bp->b_saveaddr = NULL; 889 binshash(bp, &bfreelist[BQ_AGE]); 890 return (bp); 891 } 892 893 void 894 lfs_callback(bp) 895 struct buf *bp; 896 { 897 struct lfs *fs; 898 899 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 900 #ifdef DIAGNOSTIC 901 if (fs->lfs_iocount == 0) 902 panic("lfs_callback: zero iocount\n"); 903 #endif 904 if (--fs->lfs_iocount == 0) 905 wakeup(&fs->lfs_iocount); 906 907 if (bp->b_saveaddr) { 908 free(bp->b_un.b_addr, M_SEGMENT); 909 bp->b_un.b_addr = bp->b_saveaddr; 910 bp->b_saveaddr = NULL; 911 } 912 brelse(bp); 913 } 914 915 /* 916 * Shellsort (diminishing increment sort) from Data Structures and 917 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 918 * see also Knuth Vol. 3, page 84. The increments are selected from 919 * formula (8), page 95. Roughly O(N^3/2). 920 */ 921 /* 922 * This is our own private copy of shellsort because we want to sort 923 * two parallel arrays (the array of buffer pointers and the array of 924 * logical block numbers) simultaneously. Note that we cast the array 925 * of logical block numbers to a unsigned in this routine so that the 926 * negative block numbers (meta data blocks) sort AFTER the data blocks. 927 */ 928 void 929 lfs_shellsort(bp_array, lb_array, nmemb) 930 struct buf **bp_array; 931 daddr_t *lb_array; 932 register int nmemb; 933 { 934 static int __rsshell_increments[] = { 4, 1, 0 }; 935 register int incr, *incrp, t1, t2; 936 struct buf *bp_temp; 937 u_long lb_temp; 938 939 for (incrp = __rsshell_increments; incr = *incrp++;) 940 for (t1 = incr; t1 < nmemb; ++t1) 941 for (t2 = t1 - incr; t2 >= 0;) 942 if (lb_array[t2] > lb_array[t2 + incr]) { 943 lb_temp = lb_array[t2]; 944 lb_array[t2] = lb_array[t2 + incr]; 945 lb_array[t2 + incr] = lb_temp; 946 bp_temp = bp_array[t2]; 947 bp_array[t2] = bp_array[t2 + incr]; 948 bp_array[t2 + incr] = bp_temp; 949 t2 -= incr; 950 } else 951 break; 952 } 953