1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.15 (Berkeley) 03/18/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/specdev.h> 22 #include <sys/fifo.h> 23 #include <sys/malloc.h> 24 #include <sys/mount.h> 25 26 #include <ufs/ufs/quota.h> 27 #include <ufs/ufs/inode.h> 28 #include <ufs/ufs/dir.h> 29 #include <ufs/ufs/ufsmount.h> 30 31 #include <ufs/lfs/lfs.h> 32 #include <ufs/lfs/lfs_extern.h> 33 34 /* In-memory description of a segment about to be written. */ 35 struct segment { 36 struct buf **bpp; /* pointer to buffer array */ 37 struct buf **cbpp; /* pointer to next available bp */ 38 struct buf *ibp; /* buffer pointer to inode page */ 39 struct finfo *fip; /* current fileinfo pointer */ 40 void *segsum; /* segment summary info */ 41 u_long ninodes; /* number of inodes in this segment */ 42 u_long seg_bytes_left; /* bytes left in segment */ 43 u_long sum_bytes_left; /* bytes left in summary block */ 44 u_long seg_number; /* number of this segment */ 45 #define SEGM_CKP 0x01 /* doing a checkpoint */ 46 u_long seg_flags; /* run-time flags for this segment */ 47 }; 48 49 /* 50 * Determine if it's OK to start a partial in this segment, or if we need 51 * to go on to a new segment. 52 */ 53 #define LFS_PARTIAL_FITS(fs) \ 54 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 55 1 << (fs)->lfs_fsbtodb) 56 57 int lfs_callback __P((struct buf *)); 58 void lfs_gather __P((struct lfs *, struct segment *, 59 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 60 void lfs_initseg __P((struct lfs *, struct segment *)); 61 void lfs_iset __P((struct inode *, daddr_t, time_t)); 62 int lfs_match_data __P((struct lfs *, struct buf *)); 63 int lfs_match_dindir __P((struct lfs *, struct buf *)); 64 int lfs_match_indir __P((struct lfs *, struct buf *)); 65 int lfs_match_tindir __P((struct lfs *, struct buf *)); 66 struct buf * 67 lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 68 void lfs_newseg __P((struct lfs *)); 69 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 70 void lfs_updatemeta __P((struct lfs *, 71 struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 72 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 73 void lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 74 void lfs_writeseg __P((struct lfs *, struct segment *)); 75 void lfs_writesuper __P((struct lfs *, struct segment *)); 76 77 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 78 79 /* 80 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 81 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 82 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 83 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 84 */ 85 86 int 87 lfs_vflush(vp) 88 struct vnode *vp; 89 { 90 struct inode *ip; 91 struct lfs *fs; 92 struct mount *mp; 93 struct segment *sp; 94 int error, s; 95 96 #ifdef VERBOSE 97 printf("lfs_vflush\n"); 98 #endif 99 mp = vp->v_mount; 100 fs = VFSTOUFS(mp)->um_lfs; 101 102 /* 103 * XXX 104 * check flags? 105 * mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY) || 106 */ 107 if (vfs_busy(mp)) 108 return (0); 109 110 /* 111 * Allocate a segment structure and enough space to hold pointers to 112 * the maximum possible number of buffers which can be described in a 113 * single summary block. 114 */ 115 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 116 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 117 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 118 sp->seg_flags = SEGM_CKP; 119 lfs_initseg(fs, sp); 120 121 /* 122 * Keep a cumulative count of the outstanding I/O operations. If the 123 * disk drive catches up with us it could go to zero before we finish, 124 * so we artificially increment it by one until we've scheduled all of 125 * the writes we intend to do. 126 */ 127 s = splbio(); 128 ++fs->lfs_iocount; 129 splx(s); 130 131 if (vp->v_dirtyblkhd != NULL) 132 lfs_writefile(fs, sp, vp); 133 ip = VTOI(vp); 134 lfs_writeinode(fs, sp, ip); 135 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 136 137 lfs_writeseg(fs, sp); 138 139 /* 140 * If the I/O count is non-zero, sleep until it reaches zero. At the 141 * moment, the user's process hangs around so we can sleep. 142 */ 143 s = splbio(); 144 if (--fs->lfs_iocount && (error = 145 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 146 free(sp->bpp, M_SEGMENT); 147 free(sp, M_SEGMENT); 148 return (error); 149 } 150 splx(s); 151 vfs_unbusy(mp); 152 153 /* 154 * XXX 155 * Should be writing a checkpoint? 156 */ 157 free(sp->bpp, M_SEGMENT); 158 free(sp, M_SEGMENT); 159 160 return (0); 161 } 162 163 int 164 lfs_segwrite(mp, do_ckp) 165 struct mount *mp; 166 int do_ckp; /* Do a checkpoint. */ 167 { 168 struct inode *ip; 169 struct lfs *fs; 170 struct segment *sp; 171 struct vnode *vp; 172 int error, islocked, s; 173 174 #ifdef VERBOSE 175 printf("lfs_segwrite\n"); 176 #endif 177 fs = VFSTOUFS(mp)->um_lfs; 178 179 /* 180 * Allocate a segment structure and enough space to hold pointers to 181 * the maximum possible number of buffers which can be described in a 182 * single summary block. 183 */ 184 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 185 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 186 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 187 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 188 lfs_initseg(fs, sp); 189 190 /* 191 * Keep a cumulative count of the outstanding I/O operations. If the 192 * disk drive catches up with us it could go to zero before we finish, 193 * so we artificially increment it by one until we've scheduled all of 194 * the writes we intend to do. If not a checkpoint, we never do the 195 * final decrement, avoiding the wakeup in the callback routine. 196 */ 197 s = splbio(); 198 ++fs->lfs_iocount; 199 splx(s); 200 201 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 202 /* 203 * If the vnode that we are about to sync is no longer 204 * associated with this mount point, start over. 205 */ 206 if (vp->v_mount != mp) 207 goto loop; 208 209 islocked = VOP_ISLOCKED(vp); 210 211 /* 212 * XXX 213 * This is wrong, I think -- we should just wait until we 214 * get the vnode and go on. Probably going to reschedule 215 * all of the writes we already scheduled... 216 */ 217 if (islocked) 218 VREF(vp); 219 else if (vget(vp)) 220 { 221 printf("lfs_segment: failed to get vnode (tell Keith)!\n"); 222 goto loop; 223 } 224 /* 225 * Write the inode/file if dirty and it's not the 226 * the IFILE. 227 */ 228 ip = VTOI(vp); 229 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 230 vp->v_dirtyblkhd != NULL) && 231 ip->i_number != LFS_IFILE_INUM) { 232 if (vp->v_dirtyblkhd != NULL) 233 lfs_writefile(fs, sp, vp); 234 lfs_writeinode(fs, sp, ip); 235 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 236 } 237 if (islocked) 238 vrele(vp); 239 else 240 vput(vp); 241 } 242 if (do_ckp) { 243 vp = fs->lfs_ivnode; 244 while (vget(vp)); 245 ip = VTOI(vp); 246 if (vp->v_dirtyblkhd != NULL) 247 lfs_writefile(fs, sp, vp); 248 lfs_writeinode(fs, sp, ip); 249 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 250 vput(vp); 251 } 252 lfs_writeseg(fs, sp); 253 254 /* 255 * If the I/O count is non-zero, sleep until it reaches zero. At the 256 * moment, the user's process hangs around so we can sleep. 257 */ 258 s = splbio(); 259 --fs->lfs_iocount; 260 if (do_ckp) { 261 if (fs->lfs_iocount && (error = 262 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 263 free(sp->bpp, M_SEGMENT); 264 free(sp, M_SEGMENT); 265 return (error); 266 } 267 splx(s); 268 lfs_writesuper(fs, sp); 269 } else 270 splx(s); 271 272 free(sp->bpp, M_SEGMENT); 273 free(sp, M_SEGMENT); 274 275 return (0); 276 } 277 278 /* 279 * Write the dirty blocks associated with a vnode. 280 */ 281 void 282 lfs_writefile(fs, sp, vp) 283 struct lfs *fs; 284 struct segment *sp; 285 struct vnode *vp; 286 { 287 struct buf *bp; 288 struct finfo *fip; 289 IFILE *ifp; 290 291 #ifdef VERBOSE 292 printf("lfs_writefile\n"); 293 #endif 294 if (sp->seg_bytes_left < fs->lfs_bsize || 295 sp->sum_bytes_left < sizeof(struct finfo)) { 296 lfs_writeseg(fs, sp); 297 lfs_initseg(fs, sp); 298 } 299 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 300 301 fip = sp->fip; 302 fip->fi_nblocks = 0; 303 fip->fi_ino = VTOI(vp)->i_number; 304 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 305 fip->fi_version = ifp->if_version; 306 brelse(bp); 307 308 /* 309 * It may not be necessary to write the meta-data blocks at this point, 310 * as the roll-forward recovery code should be able to reconstruct the 311 * list. 312 */ 313 lfs_gather(fs, sp, vp, lfs_match_data); 314 lfs_gather(fs, sp, vp, lfs_match_indir); 315 lfs_gather(fs, sp, vp, lfs_match_dindir); 316 #ifdef TRIPLE 317 lfs_gather(fs, sp, vp, lfs_match_tindir); 318 #endif 319 320 fip = sp->fip; 321 #ifdef META 322 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 323 #endif 324 if (fip->fi_nblocks != 0) { 325 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 326 sp->fip = 327 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 328 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 329 } else 330 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 331 } 332 333 void 334 lfs_writeinode(fs, sp, ip) 335 struct lfs *fs; 336 struct segment *sp; 337 struct inode *ip; 338 { 339 struct buf *bp, *ibp; 340 IFILE *ifp; 341 SEGUSE *sup; 342 daddr_t daddr; 343 ino_t ino; 344 int ndx; 345 346 #ifdef VERBOSE 347 printf("lfs_writeinode\n"); 348 #endif 349 /* Allocate a new inode block if necessary. */ 350 if (sp->ibp == NULL) { 351 /* Allocate a new segment if necessary. */ 352 if (sp->seg_bytes_left < fs->lfs_bsize || 353 sp->sum_bytes_left < sizeof(daddr_t)) { 354 lfs_writeseg(fs, sp); 355 lfs_initseg(fs, sp); 356 } 357 358 /* Get next inode block. */ 359 daddr = fs->lfs_offset; 360 fs->lfs_offset += fsbtodb(fs, 1); 361 sp->ibp = *sp->cbpp++ = 362 lfs_newbuf(fs, daddr, fs->lfs_bsize); 363 364 /* Set remaining space counters. */ 365 sp->seg_bytes_left -= fs->lfs_bsize; 366 sp->sum_bytes_left -= sizeof(daddr_t); 367 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 368 sp->ninodes / INOPB(fs) - 1; 369 ((daddr_t *)(sp->segsum))[ndx] = daddr; 370 } 371 372 /* Update the inode times and copy the inode onto the inode page. */ 373 ITIMES(ip, &time, &time); 374 bp = sp->ibp; 375 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 376 377 /* Increment inode count in segment summary block. */ 378 ++((SEGSUM *)(sp->segsum))->ss_ninos; 379 380 /* If this page is full, set flag to allocate a new page. */ 381 if (++sp->ninodes % INOPB(fs) == 0) 382 sp->ibp = NULL; 383 384 /* 385 * If updating the ifile, update the super-block. Update the disk 386 * address and access times for this inode in the ifile. 387 */ 388 ino = ip->i_number; 389 if (ino == LFS_IFILE_INUM) 390 fs->lfs_idaddr = bp->b_blkno; 391 392 LFS_IENTRY(ifp, fs, ino, ibp); 393 daddr = ifp->if_daddr; 394 ifp->if_daddr = bp->b_blkno; 395 LFS_UBWRITE(ibp); 396 397 if (daddr != LFS_UNUSED_DADDR) { 398 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 399 #ifdef DIAGNOSTIC 400 if (sup->su_nbytes < sizeof(struct dinode)) 401 /* XXX -- Change to a panic. */ 402 printf("lfs: negative bytes (segment %d)\n", 403 datosn(fs, daddr)); 404 #endif 405 sup->su_nbytes -= sizeof(struct dinode); 406 LFS_UBWRITE(bp); 407 } 408 } 409 410 void 411 lfs_gather(fs, sp, vp, match) 412 struct lfs *fs; 413 struct segment *sp; 414 struct vnode *vp; 415 int (*match) __P((struct lfs *, struct buf *)); 416 { 417 struct buf **bpp, *bp, *nbp; 418 struct finfo *fip; 419 struct inode *ip; 420 daddr_t *lbp, *start_lbp; 421 u_long version; 422 int s; 423 424 #ifdef VERBOSE 425 printf("lfs_gather\n"); 426 #endif 427 ip = VTOI(vp); 428 bpp = sp->cbpp; 429 fip = sp->fip; 430 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 431 432 s = splbio(); 433 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 434 nbp = bp->b_blockf; 435 /* 436 * XXX 437 * Should sleep on any BUSY buffer if doing an fsync? 438 */ 439 if (bp->b_flags & B_BUSY || !match(fs, bp)) 440 continue; 441 #ifdef DIAGNOSTIC 442 if (!(bp->b_flags & B_DELWRI)) 443 panic("lfs_gather: bp not B_DELWRI"); 444 if (!(bp->b_flags & B_LOCKED)) 445 panic("lfs_gather: bp not B_LOCKED"); 446 #endif 447 /* 448 * If full, finish this segment. We may be doing I/O, so 449 * release and reacquire the splbio(). 450 */ 451 if (sp->sum_bytes_left < sizeof(daddr_t) || 452 sp->seg_bytes_left < fs->lfs_bsize) { 453 splx(s); 454 lfs_updatemeta(fs, 455 sp, vp, start_lbp, bpp, lbp - start_lbp); 456 457 /* Add the current file to the segment summary. */ 458 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 459 460 version = fip->fi_version; 461 lfs_writeseg(fs, sp); 462 lfs_initseg(fs, sp); 463 464 fip = sp->fip; 465 fip->fi_version = version; 466 fip->fi_ino = ip->i_number; 467 start_lbp = lbp = fip->fi_blocks; 468 469 sp->sum_bytes_left -= 470 sizeof(struct finfo) - sizeof(daddr_t); 471 472 bpp = sp->cbpp; 473 s = splbio(); 474 } 475 476 /* Insert into the buffer list, update the FINFO block. */ 477 *sp->cbpp++ = bp; 478 ++fip->fi_nblocks; 479 *lbp++ = bp->b_lblkno; 480 481 sp->sum_bytes_left -= sizeof(daddr_t); 482 sp->seg_bytes_left -= bp->b_bufsize; 483 } 484 splx(s); 485 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 486 } 487 488 /* 489 * Update the metadata that points to the blocks listed in the FINFO 490 * array. 491 */ 492 void 493 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 494 struct lfs *fs; 495 struct segment *sp; 496 struct vnode *vp; 497 daddr_t *lbp; 498 struct buf **bpp; 499 int nblocks; 500 { 501 SEGUSE *sup; 502 struct buf *bp; 503 INDIR a[NIADDR], *ap; 504 struct inode *ip; 505 daddr_t daddr, lbn, off; 506 int db_per_fsb, error, i, num; 507 508 #ifdef VERBOSE 509 printf("lfs_updatemeta\n"); 510 #endif 511 if (nblocks == 0) 512 return; 513 514 /* Sort the blocks. */ 515 lfs_shellsort(bpp, lbp, nblocks); 516 517 /* 518 * Assign disk addresses, and update references to the logical 519 * block and the segment usage information. 520 */ 521 db_per_fsb = fsbtodb(fs, 1); 522 for (i = nblocks; i--; ++bpp) { 523 lbn = *lbp++; 524 (*bpp)->b_blkno = off = fs->lfs_offset; 525 fs->lfs_offset += db_per_fsb; 526 527 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 528 panic("lfs_updatemeta: lfs_bmaparray %d", error); 529 ip = VTOI(vp); 530 switch (num) { 531 case 0: 532 ip->i_db[lbn] = off; 533 break; 534 case 1: 535 ip->i_ib[a[0].in_off] = off; 536 break; 537 default: 538 ap = &a[num - 1]; 539 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 540 panic("lfs_updatemeta: bread bno %d", 541 ap->in_lbn); 542 bp->b_un.b_daddr[ap->in_off] = off; 543 lfs_bwrite(bp); 544 } 545 546 /* Update segment usage information. */ 547 if (daddr != UNASSIGNED) { 548 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 549 #ifdef DIAGNOSTIC 550 if (sup->su_nbytes < fs->lfs_bsize) 551 /* XXX -- Change to a panic. */ 552 printf("lfs: negative bytes (segment %d)\n", 553 datosn(fs, daddr)); 554 #endif 555 sup->su_nbytes -= fs->lfs_bsize; 556 LFS_UBWRITE(bp); 557 } 558 } 559 } 560 561 /* 562 * Start a new segment. 563 */ 564 void 565 lfs_initseg(fs, sp) 566 struct lfs *fs; 567 struct segment *sp; 568 { 569 SEGUSE *sup; 570 SEGSUM *ssp; 571 struct buf *bp; 572 daddr_t lbn, *lbnp; 573 574 #ifdef VERBOSE 575 printf("lfs_initseg\n"); 576 #endif 577 /* Advance to the next segment. */ 578 if (!LFS_PARTIAL_FITS(fs)) { 579 /* Wake up any cleaning procs waiting on this file system. */ 580 wakeup(&fs->lfs_nextseg); 581 wakeup(&lfs_allclean_wakeup); 582 583 lfs_newseg(fs); 584 fs->lfs_offset = fs->lfs_curseg; 585 sp->seg_number = datosn(fs, fs->lfs_curseg); 586 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 587 588 /* 589 * If the segment contains a superblock, update the offset 590 * and summary address to skip over it. 591 */ 592 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 593 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 594 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 595 sp->seg_bytes_left -= LFS_SBPAD; 596 } 597 brelse(bp); 598 } else { 599 sp->seg_number = datosn(fs, fs->lfs_curseg); 600 sp->seg_bytes_left = (fs->lfs_dbpseg - 601 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 602 } 603 604 sp->ibp = NULL; 605 sp->ninodes = 0; 606 607 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 608 sp->cbpp = sp->bpp; 609 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 610 sp->segsum = (*sp->cbpp)->b_un.b_addr; 611 ++sp->cbpp; 612 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 613 614 /* Set point to SEGSUM, initialize it. */ 615 ssp = sp->segsum; 616 ssp->ss_next = fs->lfs_nextseg; 617 ssp->ss_nfinfo = ssp->ss_ninos = 0; 618 619 /* Set pointer to first FINFO, initialize it. */ 620 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 621 sp->fip->fi_nblocks = 0; 622 623 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 624 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 625 } 626 627 /* 628 * Return the next segment to write. 629 */ 630 void 631 lfs_newseg(fs) 632 struct lfs *fs; 633 { 634 CLEANERINFO *cip; 635 SEGUSE *sup; 636 struct buf *bp; 637 int curseg, isdirty, sn; 638 639 #ifdef VERBOSE 640 printf("lfs_newseg\n"); 641 #endif 642 /* 643 * Turn off the active bit for the current segment, turn on the 644 * active and dirty bits for the next segment, update the cleaner 645 * info. Set the current segment to the next segment, get a new 646 * next segment. 647 */ 648 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 649 sup->su_flags &= ~SEGUSE_ACTIVE; 650 LFS_UBWRITE(bp); 651 652 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 653 sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY; 654 LFS_UBWRITE(bp); 655 656 LFS_CLEANERINFO(cip, fs, bp); 657 --cip->clean; 658 ++cip->dirty; 659 LFS_UBWRITE(bp); 660 661 fs->lfs_lastseg = fs->lfs_curseg; 662 fs->lfs_curseg = fs->lfs_nextseg; 663 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 664 sn = (sn + 1) % fs->lfs_nseg; 665 if (sn == curseg) 666 panic("lfs_nextseg: no clean segments"); 667 LFS_SEGENTRY(sup, fs, sn, bp); 668 isdirty = sup->su_flags & SEGUSE_DIRTY; 669 brelse(bp); 670 if (!isdirty) 671 break; 672 } 673 fs->lfs_nextseg = sntoda(fs, sn); 674 } 675 676 void 677 lfs_writeseg(fs, sp) 678 struct lfs *fs; 679 struct segment *sp; 680 { 681 struct buf **bpp, *bp, *cbp; 682 SEGUSE *sup; 683 SEGSUM *ssp; 684 dev_t i_dev; 685 u_long *datap, *dp; 686 size_t size; 687 int ch_per_blk, i, nblocks, num, s, (*strategy)__P((struct buf *)); 688 char *p; 689 690 #ifdef VERBOSE 691 printf("lfs_writeseg\n"); 692 #endif 693 if ((nblocks = sp->cbpp - sp->bpp) == 0) 694 return; 695 696 /* 697 * Compute checksum across data and then across summary; the first 698 * block (the summary block) is skipped. Set the create time here 699 * so that it's guaranteed to be later than the inode mod times. 700 * 701 * XXX 702 * Fix this to do it inline, instead of malloc/copy. 703 */ 704 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 705 for (bpp = sp->bpp, i = nblocks - 1; i--;) 706 *dp++ = (*++bpp)->b_un.b_words[0]; 707 ssp = (SEGSUM *)sp->segsum; 708 ssp->ss_create = time.tv_sec; 709 ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 710 ssp->ss_sumsum = 711 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 712 free(datap, M_SEGMENT); 713 714 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 715 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 716 717 /* 718 * When we simply write the blocks we lose a rotation for every block 719 * written. To avoid this problem, we allocate memory in chunks, copy 720 * the buffers into the chunk and write the chunk. 56K was chosen as 721 * some driver/controllers can't handle unsigned 16 bit transfers. 722 * When the data is copied to the chunk, turn off the the B_LOCKED bit 723 * and brelse the buffer (which will move them to the LRU list). Add 724 * the B_CALL flag to the buffer header so we can count I/O's for the 725 * checkpoints and so we can release the allocated memory. 726 * 727 * XXX 728 * This should be removed if the new virtual memory system allows us to 729 * easily make the buffers contiguous in kernel memory and if that's 730 * fast enough. 731 */ 732 #define LFS_CHUNKSIZE (56 * 1024) 733 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 734 for (bpp = sp->bpp, i = nblocks; i;) { 735 num = ch_per_blk; 736 if (num > i) 737 num = i; 738 i -= num; 739 size = num * fs->lfs_bsize; 740 741 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 742 cbp->b_dev = i_dev; 743 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 744 cbp->b_iodone = lfs_callback; 745 cbp->b_saveaddr = cbp->b_un.b_addr; 746 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 747 748 s = splbio(); 749 ++fs->lfs_iocount; 750 for (p = cbp->b_un.b_addr; num--;) { 751 bp = *bpp++; 752 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 753 p += bp->b_bcount; 754 bp->b_flags &= 755 ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); 756 if (!(bp->b_flags & B_NOCACHE)) { 757 bremfree(bp); 758 reassignbuf(bp, bp->b_vp); 759 } 760 brelse(bp); 761 } 762 splx(s); 763 cbp->b_bcount = p - cbp->b_un.b_addr; 764 (strategy)(cbp); 765 } 766 767 /* Update the segment usage information. */ 768 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 769 sup->su_nbytes += nblocks - 1 - 770 (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs) << fs->lfs_bshift; 771 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 772 sup->su_lastmod = time.tv_sec; 773 LFS_UBWRITE(bp); 774 } 775 776 void 777 lfs_writesuper(fs, sp) 778 struct lfs *fs; 779 struct segment *sp; 780 { 781 struct buf *bp; 782 dev_t i_dev; 783 int (*strategy) __P((struct buf *)); 784 785 #ifdef VERBOSE 786 printf("lfs_writesuper\n"); 787 #endif 788 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 789 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 790 791 /* Checksum the superblock and copy it into a buffer. */ 792 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 793 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 794 *bp->b_un.b_lfs = *fs; 795 796 /* Write the first superblock (wait). */ 797 bp->b_dev = i_dev; 798 bp->b_flags |= B_BUSY; 799 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 800 (strategy)(bp); 801 biowait(bp); 802 803 /* Write the second superblock (don't wait). */ 804 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 805 bp->b_flags |= B_ASYNC | B_BUSY; 806 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 807 (strategy)(bp); 808 } 809 810 /* 811 * Logical block number match routines used when traversing the dirty block 812 * chain. 813 */ 814 int 815 lfs_match_data(fs, bp) 816 struct lfs *fs; 817 struct buf *bp; 818 { 819 return (bp->b_lblkno >= 0); 820 } 821 822 int 823 lfs_match_indir(fs, bp) 824 struct lfs *fs; 825 struct buf *bp; 826 { 827 int lbn; 828 829 lbn = bp->b_lblkno; 830 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 831 } 832 833 int 834 lfs_match_dindir(fs, bp) 835 struct lfs *fs; 836 struct buf *bp; 837 { 838 int lbn; 839 840 lbn = bp->b_lblkno; 841 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 842 } 843 844 int 845 lfs_match_tindir(fs, bp) 846 struct lfs *fs; 847 struct buf *bp; 848 { 849 int lbn; 850 851 lbn = bp->b_lblkno; 852 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 853 } 854 855 /* 856 * Allocate a new buffer header. 857 */ 858 struct buf * 859 lfs_newbuf(fs, daddr, size) 860 struct lfs *fs; 861 daddr_t daddr; 862 size_t size; 863 { 864 struct buf *bp; 865 866 #ifdef VERBOSE 867 printf("lfs_newbuf\n"); 868 #endif 869 bp = getnewbuf(); 870 bremhash(bp); 871 bgetvp(fs->lfs_ivnode, bp); 872 bp->b_bcount = 0; 873 bp->b_lblkno = daddr; 874 bp->b_blkno = daddr; 875 bp->b_error = 0; 876 bp->b_resid = 0; 877 if (size) 878 allocbuf(bp, size); 879 bp->b_flags |= B_NOCACHE; 880 bp->b_saveaddr = NULL; 881 binshash(bp, &bfreelist[BQ_AGE]); 882 return (bp); 883 } 884 885 int /* XXX should be void */ 886 lfs_callback(bp) 887 struct buf *bp; 888 { 889 struct lfs *fs; 890 891 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 892 #ifdef DIAGNOSTIC 893 if (fs->lfs_iocount == 0) 894 panic("lfs_callback: zero iocount\n"); 895 #endif 896 if (--fs->lfs_iocount == 0) 897 wakeup(&fs->lfs_iocount); 898 899 if (bp->b_saveaddr) { 900 free(bp->b_un.b_addr, M_SEGMENT); 901 bp->b_un.b_addr = bp->b_saveaddr; 902 bp->b_saveaddr = NULL; 903 } 904 brelse(bp); 905 } 906 907 /* 908 * Shellsort (diminishing increment sort) from Data Structures and 909 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 910 * see also Knuth Vol. 3, page 84. The increments are selected from 911 * formula (8), page 95. Roughly O(N^3/2). 912 */ 913 /* 914 * This is our own private copy of shellsort because we want to sort 915 * two parallel arrays (the array of buffer pointers and the array of 916 * logical block numbers) simultaneously. Note that we cast the array 917 * of logical block numbers to a unsigned in this routine so that the 918 * negative block numbers (meta data blocks) sort AFTER the data blocks. 919 */ 920 void 921 lfs_shellsort(bp_array, lb_array, nmemb) 922 struct buf **bp_array; 923 daddr_t *lb_array; 924 register int nmemb; 925 { 926 static int __rsshell_increments[] = { 4, 1, 0 }; 927 register int incr, *incrp, t1, t2; 928 struct buf *bp_temp; 929 u_long lb_temp; 930 931 for (incrp = __rsshell_increments; incr = *incrp++;) 932 for (t1 = incr; t1 < nmemb; ++t1) 933 for (t2 = t1 - incr; t2 >= 0;) 934 if (lb_array[t2] > lb_array[t2 + incr]) { 935 lb_temp = lb_array[t2]; 936 lb_array[t2] = lb_array[t2 + incr]; 937 lb_array[t2 + incr] = lb_temp; 938 bp_temp = bp_array[t2]; 939 bp_array[t2] = bp_array[t2 + incr]; 940 bp_array[t2 + incr] = bp_temp; 941 t2 -= incr; 942 } else 943 break; 944 } 945