1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.26 (Berkeley) 07/25/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 32 #include <ufs/lfs/lfs.h> 33 #include <ufs/lfs/lfs_extern.h> 34 35 /* In-memory description of a segment about to be written. */ 36 struct segment { 37 struct buf **bpp; /* pointer to buffer array */ 38 struct buf **cbpp; /* pointer to next available bp */ 39 struct buf *ibp; /* buffer pointer to inode page */ 40 struct finfo *fip; /* current fileinfo pointer */ 41 void *segsum; /* segment summary info */ 42 u_long ninodes; /* number of inodes in this segment */ 43 u_long seg_bytes_left; /* bytes left in segment */ 44 u_long sum_bytes_left; /* bytes left in summary block */ 45 u_long seg_number; /* number of this segment */ 46 #define SEGM_CKP 0x01 /* doing a checkpoint */ 47 u_long seg_flags; /* run-time flags for this segment */ 48 }; 49 50 /* 51 * Determine if it's OK to start a partial in this segment, or if we need 52 * to go on to a new segment. 53 */ 54 #define LFS_PARTIAL_FITS(fs) \ 55 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 56 1 << (fs)->lfs_fsbtodb) 57 58 void lfs_callback __P((struct buf *)); 59 void lfs_gather __P((struct lfs *, struct segment *, 60 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 61 void lfs_initseg __P((struct lfs *, struct segment *)); 62 void lfs_iset __P((struct inode *, daddr_t, time_t)); 63 int lfs_match_data __P((struct lfs *, struct buf *)); 64 int lfs_match_dindir __P((struct lfs *, struct buf *)); 65 int lfs_match_indir __P((struct lfs *, struct buf *)); 66 int lfs_match_tindir __P((struct lfs *, struct buf *)); 67 struct buf * 68 lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 69 void lfs_newseg __P((struct lfs *)); 70 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 71 void lfs_updatemeta __P((struct lfs *, 72 struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 73 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 74 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 75 int lfs_writeseg __P((struct lfs *, struct segment *)); 76 void lfs_writesuper __P((struct lfs *, struct segment *)); 77 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 78 struct segment *sp, int dirops)); 79 80 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 81 82 /* 83 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 84 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 85 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 86 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 87 */ 88 89 int 90 lfs_vflush(vp) 91 struct vnode *vp; 92 { 93 struct inode *ip; 94 struct lfs *fs; 95 struct segment *sp; 96 int error, s; 97 98 #ifdef VERBOSE 99 printf("lfs_vflush\n"); 100 #endif 101 fs = VFSTOUFS(vp->v_mount)->um_lfs; 102 lfs_seglock(fs); 103 104 /* 105 * Allocate a segment structure and enough space to hold pointers to 106 * the maximum possible number of buffers which can be described in a 107 * single summary block. 108 */ 109 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 110 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 111 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 112 sp->seg_flags = SEGM_CKP; 113 lfs_initseg(fs, sp); 114 115 /* 116 * Keep a cumulative count of the outstanding I/O operations. If the 117 * disk drive catches up with us it could go to zero before we finish, 118 * so we artificially increment it by one until we've scheduled all of 119 * the writes we intend to do. 120 */ 121 s = splbio(); 122 ++fs->lfs_iocount; 123 splx(s); 124 125 ip = VTOI(vp); 126 do { 127 do { 128 if (vp->v_dirtyblkhd != NULL) 129 lfs_writefile(fs, sp, vp); 130 } while (lfs_writeinode(fs, sp, ip)); 131 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 132 133 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 134 135 /* 136 * If the I/O count is non-zero, sleep until it reaches zero. At the 137 * moment, the user's process hangs around so we can sleep. 138 */ 139 s = splbio(); 140 if (--fs->lfs_iocount && (error = 141 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 142 free(sp->bpp, M_SEGMENT); 143 free(sp, M_SEGMENT); 144 return (error); 145 } 146 splx(s); 147 lfs_segunlock(fs); 148 149 /* 150 * XXX 151 * Should be writing a checkpoint? 152 */ 153 free(sp->bpp, M_SEGMENT); 154 free(sp, M_SEGMENT); 155 156 return (0); 157 } 158 159 void 160 lfs_writevnodes(fs, mp, sp, dirops) 161 struct lfs *fs; 162 struct mount *mp; 163 struct segment *sp; 164 int dirops; 165 { 166 struct inode *ip; 167 struct vnode *vp; 168 int error, s; 169 170 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 171 /* 172 * If the vnode that we are about to sync is no longer 173 * associated with this mount point, start over. 174 */ 175 if (vp->v_mount != mp) 176 goto loop; 177 178 if (dirops && !(vp->v_flag & VDIROP) || 179 !dirops && (vp->v_flag & VDIROP)) 180 continue; 181 /* 182 * XXX 183 * Up the ref count so we don't get tossed out of 184 * memory. 185 */ 186 VREF(vp); 187 188 /* 189 * Write the inode/file if dirty and it's not the 190 * the IFILE. 191 */ 192 ip = VTOI(vp); 193 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 194 vp->v_dirtyblkhd != NULL) && 195 ip->i_number != LFS_IFILE_INUM) { 196 if (vp->v_dirtyblkhd != NULL) 197 lfs_writefile(fs, sp, vp); 198 (void) lfs_writeinode(fs, sp, ip); 199 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 200 } 201 vp->v_flag &= ~VDIROP; 202 vrele(vp); 203 } 204 } 205 206 int 207 lfs_segwrite(mp, do_ckp) 208 struct mount *mp; 209 int do_ckp; /* Do a checkpoint. */ 210 { 211 struct buf *bp; 212 struct inode *ip; 213 struct lfs *fs; 214 struct segment *sp; 215 struct vnode *vp; 216 SEGUSE *segusep; 217 daddr_t ibno; 218 int error, i, s; 219 220 #ifdef VERBOSE 221 printf("lfs_segwrite\n"); 222 #endif 223 fs = VFSTOUFS(mp)->um_lfs; 224 lfs_seglock(fs); 225 226 /* 227 * Allocate a segment structure and enough space to hold pointers to 228 * the maximum possible number of buffers which can be described in a 229 * single summary block. 230 */ 231 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 232 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 233 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 234 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 235 lfs_initseg(fs, sp); 236 237 /* 238 * Keep a cumulative count of the outstanding I/O operations. If the 239 * disk drive catches up with us it could go to zero before we finish, 240 * so we artificially increment it by one until we've scheduled all of 241 * the writes we intend to do. If not a checkpoint, we never do the 242 * final decrement, avoiding the wakeup in the callback routine. 243 */ 244 s = splbio(); 245 ++fs->lfs_iocount; 246 splx(s); 247 248 lfs_writevnodes(fs, mp, sp, 0); 249 fs->lfs_writer = 1; 250 if (fs->lfs_dirops && (error = 251 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 252 free(sp->bpp, M_SEGMENT); 253 free(sp, M_SEGMENT); 254 fs->lfs_writer = 0; 255 return (error); 256 } 257 258 lfs_writevnodes(fs, mp, sp, 1); 259 260 /* 261 * If we are doing a checkpoint, mark everything since the 262 * last checkpoint as no longer ACTIVE. 263 */ 264 if (do_ckp) 265 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 266 --ibno >= fs->lfs_cleansz; ) { 267 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 268 NOCRED, &bp)) 269 270 panic("lfs: ifile read"); 271 segusep = (SEGUSE *)bp->b_un.b_addr; 272 for (i = fs->lfs_sepb; i--; segusep++) 273 segusep->su_flags &= ~SEGUSE_ACTIVE; 274 275 LFS_UBWRITE(bp); 276 } 277 278 if (do_ckp || fs->lfs_doifile) { 279 vp = fs->lfs_ivnode; 280 while (vget(vp)); 281 ip = VTOI(vp); 282 if (vp->v_dirtyblkhd != NULL) 283 lfs_writefile(fs, sp, vp); 284 (void)lfs_writeinode(fs, sp, ip); 285 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 286 vput(vp); 287 /* 288 * This should never happen because we just guaranteed 289 * that all the segment usage table blocks are dirty, so 290 * no new ones should get written. 291 */ 292 if (lfs_writeseg(fs, sp) && do_ckp) 293 panic("lfs_segwrite: created dirty blocks on ckp"); 294 } else 295 (void) lfs_writeseg(fs, sp); 296 297 /* 298 * If the I/O count is non-zero, sleep until it reaches zero. At the 299 * moment, the user's process hangs around so we can sleep. 300 */ 301 fs->lfs_writer = 0; 302 fs->lfs_doifile = 0; 303 wakeup(&fs->lfs_dirops); 304 305 s = splbio(); 306 --fs->lfs_iocount; 307 if (do_ckp) { 308 if (fs->lfs_iocount && (error = 309 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 310 free(sp->bpp, M_SEGMENT); 311 free(sp, M_SEGMENT); 312 return (error); 313 } 314 splx(s); 315 lfs_writesuper(fs, sp); 316 } else 317 splx(s); 318 319 lfs_segunlock(fs); 320 321 free(sp->bpp, M_SEGMENT); 322 free(sp, M_SEGMENT); 323 324 return (0); 325 } 326 327 /* 328 * Write the dirty blocks associated with a vnode. 329 */ 330 void 331 lfs_writefile(fs, sp, vp) 332 struct lfs *fs; 333 struct segment *sp; 334 struct vnode *vp; 335 { 336 struct buf *bp; 337 struct finfo *fip; 338 IFILE *ifp; 339 340 #ifdef VERBOSE 341 printf("lfs_writefile\n"); 342 #endif 343 if (sp->seg_bytes_left < fs->lfs_bsize || 344 sp->sum_bytes_left < sizeof(struct finfo)) { 345 (void) lfs_writeseg(fs, sp); 346 lfs_initseg(fs, sp); 347 } 348 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 349 350 fip = sp->fip; 351 fip->fi_nblocks = 0; 352 fip->fi_ino = VTOI(vp)->i_number; 353 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 354 fip->fi_version = ifp->if_version; 355 brelse(bp); 356 357 /* 358 * It may not be necessary to write the meta-data blocks at this point, 359 * as the roll-forward recovery code should be able to reconstruct the 360 * list. 361 */ 362 lfs_gather(fs, sp, vp, lfs_match_data); 363 lfs_gather(fs, sp, vp, lfs_match_indir); 364 lfs_gather(fs, sp, vp, lfs_match_dindir); 365 #ifdef TRIPLE 366 lfs_gather(fs, sp, vp, lfs_match_tindir); 367 #endif 368 369 fip = sp->fip; 370 #ifdef META 371 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 372 #endif 373 if (fip->fi_nblocks != 0) { 374 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 375 sp->fip = 376 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 377 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 378 } else 379 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 380 } 381 382 int 383 lfs_writeinode(fs, sp, ip) 384 struct lfs *fs; 385 struct segment *sp; 386 struct inode *ip; 387 { 388 struct buf *bp, *ibp; 389 IFILE *ifp; 390 SEGUSE *sup; 391 daddr_t daddr; 392 ino_t ino; 393 int ndx; 394 int redo_ifile = 0; 395 396 #ifdef VERBOSE 397 printf("lfs_writeinode\n"); 398 #endif 399 /* Allocate a new inode block if necessary. */ 400 if (sp->ibp == NULL) { 401 /* Allocate a new segment if necessary. */ 402 if (sp->seg_bytes_left < fs->lfs_bsize || 403 sp->sum_bytes_left < sizeof(daddr_t)) { 404 (void) lfs_writeseg(fs, sp); 405 lfs_initseg(fs, sp); 406 } 407 408 /* Get next inode block. */ 409 daddr = fs->lfs_offset; 410 fs->lfs_offset += fsbtodb(fs, 1); 411 sp->ibp = *sp->cbpp++ = 412 lfs_newbuf(fs, daddr, fs->lfs_bsize); 413 414 /* Set remaining space counters. */ 415 sp->seg_bytes_left -= fs->lfs_bsize; 416 sp->sum_bytes_left -= sizeof(daddr_t); 417 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 418 sp->ninodes / INOPB(fs) - 1; 419 ((daddr_t *)(sp->segsum))[ndx] = daddr; 420 } 421 422 /* Update the inode times and copy the inode onto the inode page. */ 423 ITIMES(ip, &time, &time); 424 bp = sp->ibp; 425 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 426 427 /* Increment inode count in segment summary block. */ 428 ++((SEGSUM *)(sp->segsum))->ss_ninos; 429 430 /* If this page is full, set flag to allocate a new page. */ 431 if (++sp->ninodes % INOPB(fs) == 0) 432 sp->ibp = NULL; 433 434 /* 435 * If updating the ifile, update the super-block. Update the disk 436 * address and access times for this inode in the ifile. 437 */ 438 ino = ip->i_number; 439 if (ino == LFS_IFILE_INUM) { 440 daddr = fs->lfs_idaddr; 441 fs->lfs_idaddr = bp->b_blkno; 442 } else { 443 LFS_IENTRY(ifp, fs, ino, ibp); 444 daddr = ifp->if_daddr; 445 ifp->if_daddr = bp->b_blkno; 446 LFS_UBWRITE(ibp); 447 } 448 449 /* 450 * No need to update segment usage if there was no former inode address 451 * or if the last inode address is in the current partial segment. 452 */ 453 if (daddr != LFS_UNUSED_DADDR && 454 !(daddr >= fs->lfs_curseg && daddr <= ifp->if_daddr) ) { 455 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 456 #ifdef DIAGNOSTIC 457 if (sup->su_nbytes < sizeof(struct dinode)) { 458 /* XXX -- Change to a panic. */ 459 printf("lfs: negative bytes (segment %d)\n", 460 datosn(fs, daddr)); 461 panic("negative bytes"); 462 } 463 #endif 464 sup->su_nbytes -= sizeof(struct dinode); 465 LFS_UBWRITE(bp); 466 redo_ifile = (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 467 } 468 return (redo_ifile); 469 } 470 471 void 472 lfs_gather(fs, sp, vp, match) 473 struct lfs *fs; 474 struct segment *sp; 475 struct vnode *vp; 476 int (*match) __P((struct lfs *, struct buf *)); 477 { 478 struct buf **bpp, *bp; 479 struct buf *lastbp; 480 struct finfo *fip; 481 struct inode *ip; 482 daddr_t *lbp, *start_lbp; 483 u_long version; 484 int s; 485 486 #ifdef VERBOSE 487 printf("lfs_gather\n"); 488 #endif 489 ip = VTOI(vp); 490 bpp = sp->cbpp; 491 fip = sp->fip; 492 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 493 494 loop: s = splbio(); 495 lastbp = NULL; 496 for (bp = vp->v_dirtyblkhd; bp; lastbp = bp, bp = bp->b_blockf) { 497 if (bp->b_flags & B_BUSY || !match(fs, bp) || 498 bp->b_flags & B_GATHERED) 499 continue; 500 #ifdef DIAGNOSTIC 501 if (!(bp->b_flags & B_DELWRI)) 502 panic("lfs_gather: bp not B_DELWRI"); 503 if (!(bp->b_flags & B_LOCKED)) 504 panic("lfs_gather: bp not B_LOCKED"); 505 #endif 506 /* 507 * If full, finish this segment. We may be doing I/O, so 508 * release and reacquire the splbio(). 509 */ 510 if (sp->sum_bytes_left < sizeof(daddr_t) || 511 sp->seg_bytes_left < fs->lfs_bsize) { 512 splx(s); 513 lfs_updatemeta(fs, 514 sp, vp, start_lbp, bpp, lbp - start_lbp); 515 516 /* Add the current file to the segment summary. */ 517 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 518 519 version = fip->fi_version; 520 (void) lfs_writeseg(fs, sp); 521 lfs_initseg(fs, sp); 522 523 fip = sp->fip; 524 fip->fi_version = version; 525 fip->fi_ino = ip->i_number; 526 start_lbp = lbp = fip->fi_blocks; 527 528 sp->sum_bytes_left -= 529 sizeof(struct finfo) - sizeof(daddr_t); 530 531 bpp = sp->cbpp; 532 goto loop; 533 } 534 535 /* Insert into the buffer list, update the FINFO block. */ 536 bp->b_flags |= B_GATHERED; 537 *sp->cbpp++ = bp; 538 ++fip->fi_nblocks; 539 *lbp++ = bp->b_lblkno; 540 541 sp->sum_bytes_left -= sizeof(daddr_t); 542 sp->seg_bytes_left -= bp->b_bufsize; 543 } 544 splx(s); 545 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 546 } 547 548 /* 549 * Update the metadata that points to the blocks listed in the FINFO 550 * array. 551 */ 552 void 553 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 554 struct lfs *fs; 555 struct segment *sp; 556 struct vnode *vp; 557 daddr_t *lbp; 558 struct buf **bpp; 559 int nblocks; 560 { 561 SEGUSE *sup; 562 struct buf *bp; 563 INDIR a[NIADDR], *ap; 564 struct inode *ip; 565 daddr_t daddr, lbn, off; 566 int db_per_fsb, error, i, num; 567 568 #ifdef VERBOSE 569 printf("lfs_updatemeta\n"); 570 #endif 571 if (nblocks == 0) 572 return; 573 574 /* Sort the blocks. */ 575 lfs_shellsort(bpp, lbp, nblocks); 576 577 /* 578 * Assign disk addresses, and update references to the logical 579 * block and the segment usage information. 580 */ 581 db_per_fsb = fsbtodb(fs, 1); 582 for (i = nblocks; i--; ++bpp) { 583 lbn = *lbp++; 584 (*bpp)->b_blkno = off = fs->lfs_offset; 585 fs->lfs_offset += db_per_fsb; 586 587 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 588 panic("lfs_updatemeta: lfs_bmaparray %d", error); 589 ip = VTOI(vp); 590 switch (num) { 591 case 0: 592 ip->i_db[lbn] = off; 593 break; 594 case 1: 595 ip->i_ib[a[0].in_off] = off; 596 break; 597 default: 598 ap = &a[num - 1]; 599 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 600 panic("lfs_updatemeta: bread bno %d", 601 ap->in_lbn); 602 /* 603 * Bread may create a new indirect block which needs 604 * to get counted for the inode. 605 */ 606 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 607 ip->i_blocks += btodb(fs->lfs_bsize); 608 fs->lfs_bfree -= btodb(fs->lfs_bsize); 609 } 610 bp->b_un.b_daddr[ap->in_off] = off; 611 VOP_BWRITE(bp); 612 } 613 614 /* Update segment usage information. */ 615 if (daddr != UNASSIGNED) { 616 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 617 #ifdef DIAGNOSTIC 618 if (sup->su_nbytes < fs->lfs_bsize) { 619 /* XXX -- Change to a panic. */ 620 printf("lfs: negative bytes (segment %d)\n", 621 datosn(fs, daddr)); 622 panic ("Negative Bytes"); 623 } 624 #endif 625 sup->su_nbytes -= fs->lfs_bsize; 626 LFS_UBWRITE(bp); 627 } 628 } 629 } 630 631 /* 632 * Start a new segment. 633 */ 634 void 635 lfs_initseg(fs, sp) 636 struct lfs *fs; 637 struct segment *sp; 638 { 639 SEGUSE *sup; 640 SEGSUM *ssp; 641 struct buf *bp; 642 daddr_t lbn, *lbnp; 643 644 #ifdef VERBOSE 645 printf("lfs_initseg\n"); 646 #endif 647 /* Advance to the next segment. */ 648 if (!LFS_PARTIAL_FITS(fs)) { 649 /* Wake up any cleaning procs waiting on this file system. */ 650 wakeup(&fs->lfs_nextseg); 651 wakeup(&lfs_allclean_wakeup); 652 653 lfs_newseg(fs); 654 fs->lfs_offset = fs->lfs_curseg; 655 sp->seg_number = datosn(fs, fs->lfs_curseg); 656 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 657 658 /* 659 * If the segment contains a superblock, update the offset 660 * and summary address to skip over it. 661 */ 662 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 663 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 664 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 665 sp->seg_bytes_left -= LFS_SBPAD; 666 } 667 brelse(bp); 668 } else { 669 sp->seg_number = datosn(fs, fs->lfs_curseg); 670 sp->seg_bytes_left = (fs->lfs_dbpseg - 671 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 672 } 673 fs->lfs_lastpseg = fs->lfs_offset; 674 675 sp->ibp = NULL; 676 sp->ninodes = 0; 677 678 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 679 sp->cbpp = sp->bpp; 680 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 681 sp->segsum = (*sp->cbpp)->b_un.b_addr; 682 ++sp->cbpp; 683 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 684 685 /* Set point to SEGSUM, initialize it. */ 686 ssp = sp->segsum; 687 ssp->ss_next = fs->lfs_nextseg; 688 ssp->ss_nfinfo = ssp->ss_ninos = 0; 689 690 /* Set pointer to first FINFO, initialize it. */ 691 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 692 sp->fip->fi_nblocks = 0; 693 694 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 695 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 696 } 697 698 /* 699 * Return the next segment to write. 700 */ 701 void 702 lfs_newseg(fs) 703 struct lfs *fs; 704 { 705 CLEANERINFO *cip; 706 SEGUSE *sup; 707 struct buf *bp; 708 int curseg, isdirty, sn; 709 710 #ifdef VERBOSE 711 printf("lfs_newseg\n"); 712 #endif 713 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 714 sup->su_flags |= SEGUSE_DIRTY; 715 LFS_UBWRITE(bp); 716 717 LFS_CLEANERINFO(cip, fs, bp); 718 --cip->clean; 719 ++cip->dirty; 720 LFS_UBWRITE(bp); 721 722 fs->lfs_lastseg = fs->lfs_curseg; 723 fs->lfs_curseg = fs->lfs_nextseg; 724 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 725 sn = (sn + 1) % fs->lfs_nseg; 726 if (sn == curseg) 727 panic("lfs_nextseg: no clean segments"); 728 LFS_SEGENTRY(sup, fs, sn, bp); 729 isdirty = sup->su_flags & SEGUSE_DIRTY; 730 brelse(bp); 731 if (!isdirty) 732 break; 733 } 734 735 fs->lfs_nextseg = sntoda(fs, sn); 736 } 737 738 int 739 lfs_writeseg(fs, sp) 740 struct lfs *fs; 741 struct segment *sp; 742 { 743 struct buf **bpp, *bp, *cbp; 744 SEGUSE *sup; 745 SEGSUM *ssp; 746 dev_t i_dev; 747 size_t size; 748 u_long *datap, *dp; 749 int ch_per_blk, do_again, i, nblocks, num, s; 750 int (*strategy)__P((struct vop_strategy_args *)); 751 struct vop_strategy_args vop_strategy_a; 752 u_short ninos; 753 char *p; 754 755 #ifdef VERBOSE 756 printf("lfs_writeseg\n"); 757 #endif 758 /* Checkpoint always writes superblock, even if no data blocks. */ 759 if ((nblocks = sp->cbpp - sp->bpp) == 0 && !(sp->seg_flags & SEGM_CKP)) 760 return (0); 761 762 /* 763 * Compute checksum across data and then across summary; the first 764 * block (the summary block) is skipped. Set the create time here 765 * so that it's guaranteed to be later than the inode mod times. 766 * 767 * XXX 768 * Fix this to do it inline, instead of malloc/copy. 769 */ 770 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 771 for (bpp = sp->bpp, i = nblocks - 1; i--;) 772 *dp++ = (*++bpp)->b_un.b_words[0]; 773 ssp = (SEGSUM *)sp->segsum; 774 ssp->ss_create = time.tv_sec; 775 ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 776 ssp->ss_sumsum = 777 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 778 free(datap, M_SEGMENT); 779 780 /* Update the segment usage information. */ 781 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 782 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 783 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift; 784 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 785 sup->su_lastmod = time.tv_sec; 786 sup->su_flags |= SEGUSE_ACTIVE; 787 sup->su_ninos += ninos; 788 ++sup->su_nsums; 789 LFS_UBWRITE(bp); 790 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 791 do_again = !(bp->b_flags & B_GATHERED); 792 793 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 794 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 795 796 /* 797 * When we simply write the blocks we lose a rotation for every block 798 * written. To avoid this problem, we allocate memory in chunks, copy 799 * the buffers into the chunk and write the chunk. 56K was chosen as 800 * some driver/controllers can't handle unsigned 16 bit transfers. 801 * When the data is copied to the chunk, turn off the the B_LOCKED bit 802 * and brelse the buffer (which will move them to the LRU list). Add 803 * the B_CALL flag to the buffer header so we can count I/O's for the 804 * checkpoints and so we can release the allocated memory. 805 * 806 * XXX 807 * This should be removed if the new virtual memory system allows us to 808 * easily make the buffers contiguous in kernel memory and if that's 809 * fast enough. 810 */ 811 #define LFS_CHUNKSIZE (56 * 1024) 812 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 813 for (bpp = sp->bpp, i = nblocks; i;) { 814 num = ch_per_blk; 815 if (num > i) 816 num = i; 817 i -= num; 818 size = num * fs->lfs_bsize; 819 820 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 821 cbp->b_dev = i_dev; 822 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 823 cbp->b_iodone = lfs_callback; 824 cbp->b_saveaddr = cbp->b_un.b_addr; 825 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 826 827 s = splbio(); 828 ++fs->lfs_iocount; 829 for (p = cbp->b_un.b_addr; num--;) { 830 bp = *bpp++; 831 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 832 p += bp->b_bcount; 833 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI | 834 B_LOCKED | B_GATHERED); 835 if (!(bp->b_flags & (B_NOCACHE | B_INVAL))) { 836 bremfree(bp); 837 reassignbuf(bp, bp->b_vp); 838 } 839 brelse(bp); 840 } 841 splx(s); 842 cbp->b_bcount = p - cbp->b_un.b_addr; 843 vop_strategy_a.a_desc = VDESC(vop_strategy); 844 vop_strategy_a.a_bp = cbp; 845 (strategy)(&vop_strategy_a); 846 } 847 return (do_again); 848 } 849 850 void 851 lfs_writesuper(fs, sp) 852 struct lfs *fs; 853 struct segment *sp; 854 { 855 struct buf *bp; 856 dev_t i_dev; 857 int (*strategy) __P((struct vop_strategy_args *)); 858 struct vop_strategy_args vop_strategy_a; 859 860 #ifdef VERBOSE 861 printf("lfs_writesuper\n"); 862 #endif 863 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 864 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 865 866 /* Checksum the superblock and copy it into a buffer. */ 867 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 868 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 869 *bp->b_un.b_lfs = *fs; 870 871 /* Write the first superblock (wait). */ 872 bp->b_dev = i_dev; 873 bp->b_flags |= B_BUSY; 874 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 875 vop_strategy_a.a_desc = VDESC(vop_strategy); 876 vop_strategy_a.a_bp = bp; 877 (strategy)(&vop_strategy_a); 878 biowait(bp); 879 880 /* Write the second superblock (don't wait). */ 881 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 882 bp->b_flags |= B_ASYNC | B_BUSY; 883 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 884 (strategy)(&vop_strategy_a); 885 } 886 887 /* 888 * Logical block number match routines used when traversing the dirty block 889 * chain. 890 */ 891 int 892 lfs_match_data(fs, bp) 893 struct lfs *fs; 894 struct buf *bp; 895 { 896 return (bp->b_lblkno >= 0); 897 } 898 899 int 900 lfs_match_indir(fs, bp) 901 struct lfs *fs; 902 struct buf *bp; 903 { 904 int lbn; 905 906 lbn = bp->b_lblkno; 907 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 908 } 909 910 int 911 lfs_match_dindir(fs, bp) 912 struct lfs *fs; 913 struct buf *bp; 914 { 915 int lbn; 916 917 lbn = bp->b_lblkno; 918 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 919 } 920 921 int 922 lfs_match_tindir(fs, bp) 923 struct lfs *fs; 924 struct buf *bp; 925 { 926 int lbn; 927 928 lbn = bp->b_lblkno; 929 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 930 } 931 932 /* 933 * Allocate a new buffer header. 934 */ 935 struct buf * 936 lfs_newbuf(fs, daddr, size) 937 struct lfs *fs; 938 daddr_t daddr; 939 size_t size; 940 { 941 struct buf *bp; 942 943 #ifdef VERBOSE 944 printf("lfs_newbuf\n"); 945 #endif 946 bp = getnewbuf(); 947 bremhash(bp); 948 bgetvp(fs->lfs_ivnode, bp); 949 bp->b_bcount = 0; 950 bp->b_lblkno = daddr; 951 bp->b_blkno = daddr; 952 bp->b_error = 0; 953 bp->b_resid = 0; 954 if (size) 955 allocbuf(bp, size); 956 bp->b_flags |= B_NOCACHE; 957 bp->b_saveaddr = NULL; 958 binshash(bp, &bfreelist[BQ_AGE]); 959 return (bp); 960 } 961 962 void 963 lfs_callback(bp) 964 struct buf *bp; 965 { 966 struct lfs *fs; 967 968 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 969 #ifdef DIAGNOSTIC 970 if (fs->lfs_iocount == 0) 971 panic("lfs_callback: zero iocount\n"); 972 #endif 973 if (--fs->lfs_iocount == 0) 974 wakeup(&fs->lfs_iocount); 975 976 if (bp->b_saveaddr) { 977 free(bp->b_un.b_addr, M_SEGMENT); 978 bp->b_un.b_addr = bp->b_saveaddr; 979 bp->b_saveaddr = NULL; 980 } 981 brelse(bp); 982 } 983 984 /* 985 * Shellsort (diminishing increment sort) from Data Structures and 986 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 987 * see also Knuth Vol. 3, page 84. The increments are selected from 988 * formula (8), page 95. Roughly O(N^3/2). 989 */ 990 /* 991 * This is our own private copy of shellsort because we want to sort 992 * two parallel arrays (the array of buffer pointers and the array of 993 * logical block numbers) simultaneously. Note that we cast the array 994 * of logical block numbers to a unsigned in this routine so that the 995 * negative block numbers (meta data blocks) sort AFTER the data blocks. 996 */ 997 void 998 lfs_shellsort(bp_array, lb_array, nmemb) 999 struct buf **bp_array; 1000 daddr_t *lb_array; 1001 register int nmemb; 1002 { 1003 static int __rsshell_increments[] = { 4, 1, 0 }; 1004 register int incr, *incrp, t1, t2; 1005 struct buf *bp_temp; 1006 u_long lb_temp; 1007 1008 for (incrp = __rsshell_increments; incr = *incrp++;) 1009 for (t1 = incr; t1 < nmemb; ++t1) 1010 for (t2 = t1 - incr; t2 >= 0;) 1011 if (lb_array[t2] > lb_array[t2 + incr]) { 1012 lb_temp = lb_array[t2]; 1013 lb_array[t2] = lb_array[t2 + incr]; 1014 lb_array[t2 + incr] = lb_temp; 1015 bp_temp = bp_array[t2]; 1016 bp_array[t2] = bp_array[t2 + incr]; 1017 bp_array[t2 + incr] = bp_temp; 1018 t2 -= incr; 1019 } else 1020 break; 1021 } 1022